def __main__(): # Parse Command Line options, args = doc_optparse.parse(__doc__) mincols = 0 strand_col = -1 if options.dbkey: primary_species = options.dbkey else: primary_species = None if primary_species in [None, "?", "None"]: stop_err( "You must specify a proper build in order to extract alignments. You can specify your genome build by clicking on the pencil icon associated with your interval file." ) include_primary = True secondary_species = maf_utilities.parse_species_option(options.species) if secondary_species: species = list(secondary_species) # make copy of species list if primary_species in secondary_species: secondary_species.remove(primary_species) else: include_primary = False else: species = None if options.interval_file: interval_file = options.interval_file else: stop_err("Input interval file has not been specified.") if options.output_file: output_file = options.output_file else: stop_err("Output file has not been specified.") if not options.geneBED: if options.chromCol: chr_col = int(options.chromCol) - 1 else: stop_err( "Chromosome column not set, click the pencil icon in the history item to set the metadata attributes." ) if options.startCol: start_col = int(options.startCol) - 1 else: stop_err( "Start column not set, click the pencil icon in the history item to set the metadata attributes." ) if options.endCol: end_col = int(options.endCol) - 1 else: stop_err( "End column not set, click the pencil icon in the history item to set the metadata attributes." ) if options.strandCol: strand_col = int(options.strandCol) - 1 mafIndexFile = "%s/maf_index.loc" % options.mafIndexFileDir overwrite_with_gaps = True if options.overwrite_with_gaps and options.overwrite_with_gaps.lower( ) == 'false': overwrite_with_gaps = False # Finish parsing command line # get index for mafs based on type index = index_filename = None # using specified uid for locally cached if options.mafSourceType.lower() in ["cached"]: index = maf_utilities.maf_index_by_uid(options.mafSource, mafIndexFile) if index is None: stop_err("The MAF source specified (%s) appears to be invalid." % (options.mafSource)) elif options.mafSourceType.lower() in ["user"]: # index maf for use here, need to remove index_file when finished index, index_filename = maf_utilities.open_or_build_maf_index( options.mafSource, options.mafIndex, species=[primary_species]) if index is None: stop_err("Your MAF file appears to be malformed.") else: stop_err("Invalid MAF source type specified.") # open output file output = open(output_file, "w") if options.geneBED: region_enumerator = maf_utilities.line_enumerator( open(interval_file, "r").readlines()) else: region_enumerator = enumerate( bx.intervals.io.NiceReaderWrapper(open(interval_file, 'r'), chrom_col=chr_col, start_col=start_col, end_col=end_col, strand_col=strand_col, fix_strand=True, return_header=False, return_comments=False)) # Step through intervals regions_extracted = 0 line_count = 0 for line_count, line in region_enumerator: try: if options.geneBED: # Process as Gene BED try: starts, ends, fields = maf_utilities.get_starts_ends_fields_from_gene_bed( line) # create spliced alignment object alignment = maf_utilities.get_spliced_region_alignment( index, primary_species, fields[0], starts, ends, strand='+', species=species, mincols=mincols, overwrite_with_gaps=overwrite_with_gaps) primary_name = secondary_name = fields[3] alignment_strand = fields[5] except Exception, e: print "Error loading exon positions from input line %i: %s" % ( line_count, e) continue else: # Process as standard intervals try: # create spliced alignment object alignment = maf_utilities.get_region_alignment( index, primary_species, line.chrom, line.start, line.end, strand='+', species=species, mincols=mincols, overwrite_with_gaps=overwrite_with_gaps) primary_name = "%s(%s):%s-%s" % (line.chrom, line.strand, line.start, line.end) secondary_name = "" alignment_strand = line.strand except Exception, e: print "Error loading region positions from input line %i: %s" % ( line_count, e) continue
def __main__(): maf_source_type = sys.argv.pop( 1 ) input_maf_filename = sys.argv[1].strip() input_interval_filename = sys.argv[2].strip() output_filename = sys.argv[3].strip() dbkey = sys.argv[4].strip() try: chr_col = int( sys.argv[5].strip() ) - 1 start_col = int( sys.argv[6].strip() ) - 1 end_col = int( sys.argv[7].strip() ) - 1 except: print >>sys.stderr, "You appear to be missing metadata. You can specify your metadata by clicking on the pencil icon associated with your interval file." sys.exit() summary = sys.argv[8].strip() if summary.lower() == "true": summary = True else: summary = False mafIndexFile = "%s/maf_index.loc" % sys.argv[9] try: maf_index_filename = sys.argv[10].strip() except: maf_index_filename = None index = index_filename = None if maf_source_type == "user": #index maf for use here index, index_filename = maf_utilities.open_or_build_maf_index( input_maf_filename, maf_index_filename, species = [dbkey] ) if index is None: print >>sys.stderr, "Your MAF file appears to be malformed." sys.exit() elif maf_source_type == "cached": #access existing indexes index = maf_utilities.maf_index_by_uid( input_maf_filename, mafIndexFile ) if index is None: print >> sys.stderr, "The MAF source specified (%s) appears to be invalid." % ( input_maf_filename ) sys.exit() else: print >>sys.stdout, 'Invalid source type specified: %s' % maf_source_type sys.exit() out = open(output_filename, 'w') num_region = 0 species_summary = {} total_length = 0 #loop through interval file for num_region, region in enumerate( bx.intervals.io.NiceReaderWrapper( open( input_interval_filename, 'r' ), chrom_col = chr_col, start_col = start_col, end_col = end_col, fix_strand = True, return_header = False, return_comments = False ) ): src = "%s.%s" % ( dbkey, region.chrom ) region_length = region.end - region.start total_length += region_length coverage = { dbkey: BitSet( region_length ) } for block in maf_utilities.get_chopped_blocks_for_region( index, src, region, force_strand='+' ): #make sure all species are known for c in block.components: spec = c.src.split( '.' )[0] if spec not in coverage: coverage[spec] = BitSet( region_length ) start_offset, alignment = maf_utilities.reduce_block_by_primary_genome( block, dbkey, region.chrom, region.start ) for i in range( len( alignment[dbkey] ) ): for spec, text in alignment.items(): if text[i] != '-': coverage[spec].set( start_offset + i ) if summary: #record summary for key in coverage.keys(): if key not in species_summary: species_summary[key] = 0 species_summary[key] = species_summary[key] + coverage[key].count_range() else: #print coverage for interval coverage_sum = coverage[dbkey].count_range() out.write( "%s\t%s\t%s\t%s\n" % ( "\t".join( region.fields ), dbkey, coverage_sum, region_length - coverage_sum ) ) keys = coverage.keys() keys.remove( dbkey ) keys.sort() for key in keys: coverage_sum = coverage[key].count_range() out.write( "%s\t%s\t%s\t%s\n" % ( "\t".join( region.fields ), key, coverage_sum, region_length - coverage_sum ) ) if summary: out.write( "#species\tnucleotides\tcoverage\n" ) for spec in species_summary: out.write( "%s\t%s\t%.4f\n" % ( spec, species_summary[spec], float( species_summary[spec] ) / total_length ) ) out.close() print "%i regions were processed with a total length of %i." % ( num_region, total_length ) maf_utilities.remove_temp_index_file( index_filename )
def __main__(): # Parse Command Line options, args = doc_optparse.parse(__doc__) mincols = 0 strand_col = -1 if options.dbkey: primary_species = options.dbkey else: primary_species = None if primary_species in [None, "?", "None"]: stop_err("You must specify a proper build in order to extract alignments. You can specify your genome build by clicking on the pencil icon associated with your interval file.") include_primary = True secondary_species = maf_utilities.parse_species_option(options.species) if secondary_species: species = list(secondary_species) # make copy of species list if primary_species in secondary_species: secondary_species.remove(primary_species) else: include_primary = False else: species = None if options.interval_file: interval_file = options.interval_file else: stop_err("Input interval file has not been specified.") if options.output_file: output_file = options.output_file else: stop_err("Output file has not been specified.") if not options.geneBED: if options.chromCol: chr_col = int(options.chromCol) - 1 else: stop_err("Chromosome column not set, click the pencil icon in the history item to set the metadata attributes.") if options.startCol: start_col = int(options.startCol) - 1 else: stop_err("Start column not set, click the pencil icon in the history item to set the metadata attributes.") if options.endCol: end_col = int(options.endCol) - 1 else: stop_err("End column not set, click the pencil icon in the history item to set the metadata attributes.") if options.strandCol: strand_col = int(options.strandCol) - 1 mafIndexFile = "%s/maf_index.loc" % options.mafIndexFileDir overwrite_with_gaps = True if options.overwrite_with_gaps and options.overwrite_with_gaps.lower() == 'false': overwrite_with_gaps = False # Finish parsing command line # get index for mafs based on type index = index_filename = None # using specified uid for locally cached if options.mafSourceType.lower() in ["cached"]: index = maf_utilities.maf_index_by_uid(options.mafSource, mafIndexFile) if index is None: stop_err("The MAF source specified (%s) appears to be invalid." % (options.mafSource)) elif options.mafSourceType.lower() in ["user"]: # index maf for use here, need to remove index_file when finished index, index_filename = maf_utilities.open_or_build_maf_index(options.mafSource, options.mafIndex, species=[primary_species]) if index is None: stop_err("Your MAF file appears to be malformed.") else: stop_err("Invalid MAF source type specified.") # open output file output = open(output_file, "w") if options.geneBED: region_enumerator = maf_utilities.line_enumerator(open(interval_file, "r").readlines()) else: region_enumerator = enumerate(bx.intervals.io.NiceReaderWrapper( open(interval_file, 'r'), chrom_col=chr_col, start_col=start_col, end_col=end_col, strand_col=strand_col, fix_strand=True, return_header=False, return_comments=False)) # Step through intervals regions_extracted = 0 line_count = 0 for line_count, line in region_enumerator: try: if options.geneBED: # Process as Gene BED try: starts, ends, fields = maf_utilities.get_starts_ends_fields_from_gene_bed(line) # create spliced alignment object alignment = maf_utilities.get_spliced_region_alignment( index, primary_species, fields[0], starts, ends, strand='+', species=species, mincols=mincols, overwrite_with_gaps=overwrite_with_gaps) primary_name = secondary_name = fields[3] alignment_strand = fields[5] except Exception as e: print("Error loading exon positions from input line %i: %s" % (line_count, e)) continue else: # Process as standard intervals try: # create spliced alignment object alignment = maf_utilities.get_region_alignment( index, primary_species, line.chrom, line.start, line.end, strand='+', species=species, mincols=mincols, overwrite_with_gaps=overwrite_with_gaps) primary_name = "%s(%s):%s-%s" % (line.chrom, line.strand, line.start, line.end) secondary_name = "" alignment_strand = line.strand except Exception as e: print("Error loading region positions from input line %i: %s" % (line_count, e)) continue # Write alignment to output file # Output primary species first, if requested if include_primary: output.write(">%s.%s\n" % (primary_species, primary_name)) if alignment_strand == "-": output.write(alignment.get_sequence_reverse_complement(primary_species)) else: output.write(alignment.get_sequence(primary_species)) output.write("\n") # Output all remainging species for spec in secondary_species or alignment.get_species_names(skip=primary_species): if secondary_name: output.write(">%s.%s\n" % (spec, secondary_name)) else: output.write(">%s\n" % (spec)) if alignment_strand == "-": output.write(alignment.get_sequence_reverse_complement(spec)) else: output.write(alignment.get_sequence(spec)) output.write("\n") output.write("\n") regions_extracted += 1 except Exception as e: print("Unexpected error from input line %i: %s" % (line_count, e)) continue # close output file output.close() # remove index file if created during run maf_utilities.remove_temp_index_file(index_filename) # Print message about success for user if regions_extracted > 0: print("%i regions were processed successfully." % (regions_extracted)) else: print("No regions were processed successfully.") if line_count > 0 and options.geneBED: print("This tool requires your input file to conform to the 12 column BED standard.")
def __main__(): index = index_filename = None # Parse Command Line options, args = doc_optparse.parse(__doc__) if options.dbkey: dbkey = options.dbkey else: dbkey = None if dbkey in [None, "?"]: maf_utilities.tool_fail( "You must specify a proper build in order to extract alignments. You can specify your genome build by clicking on the pencil icon associated with your interval file." ) species = maf_utilities.parse_species_option(options.species) if options.chromCol: chromCol = int(options.chromCol) - 1 else: maf_utilities.tool_fail( "Chromosome column not set, click the pencil icon in the history item to set the metadata attributes." ) if options.startCol: startCol = int(options.startCol) - 1 else: maf_utilities.tool_fail( "Start column not set, click the pencil icon in the history item to set the metadata attributes." ) if options.endCol: endCol = int(options.endCol) - 1 else: maf_utilities.tool_fail( "End column not set, click the pencil icon in the history item to set the metadata attributes." ) if options.strandCol: strandCol = int(options.strandCol) - 1 else: strandCol = -1 if options.interval_file: interval_file = options.interval_file else: maf_utilities.tool_fail("Input interval file has not been specified.") if options.output_file: output_file = options.output_file else: maf_utilities.tool_fail("Output file has not been specified.") split_blocks_by_species = remove_all_gap_columns = False if options.split_blocks_by_species and options.split_blocks_by_species == 'split_blocks_by_species': split_blocks_by_species = True if options.remove_all_gap_columns and options.remove_all_gap_columns == 'remove_all_gap_columns': remove_all_gap_columns = True else: remove_all_gap_columns = True # Finish parsing command line # Open indexed access to MAFs if options.mafType: if options.indexLocation: index = maf_utilities.maf_index_by_uid(options.mafType, options.indexLocation) else: index = maf_utilities.maf_index_by_uid(options.mafType, options.mafIndexFile) if index is None: maf_utilities.tool_fail( "The MAF source specified (%s) appears to be invalid." % (options.mafType)) elif options.mafFile: index, index_filename = maf_utilities.open_or_build_maf_index( options.mafFile, options.mafIndex, species=[dbkey]) if index is None: maf_utilities.tool_fail("Your MAF file appears to be malformed.") else: maf_utilities.tool_fail( "Desired source MAF type has not been specified.") # Create MAF writter out = bx.align.maf.Writer(open(output_file, "w")) # Iterate over input regions num_blocks = 0 num_regions = None for num_regions, region in enumerate( bx.intervals.io.NiceReaderWrapper( open(interval_file), chrom_col=chromCol, start_col=startCol, end_col=endCol, strand_col=strandCol, fix_strand=True, return_header=False, return_comments=False)): # noqa: B007 src = maf_utilities.src_merge(dbkey, region.chrom) for block in index.get_as_iterator(src, region.start, region.end): if split_blocks_by_species: blocks = [ new_block for new_block in maf_utilities.iter_blocks_split_by_species(block) if maf_utilities.component_overlaps_region( new_block.get_component_by_src_start(dbkey), region) ] else: blocks = [block] for block in blocks: block = maf_utilities.chop_block_by_region(block, src, region) if block is not None: if species is not None: block = block.limit_to_species(species) block = maf_utilities.orient_block_by_region( block, src, region) if remove_all_gap_columns: block.remove_all_gap_columns() out.write(block) num_blocks += 1 # Close output MAF out.close() # remove index file if created during run maf_utilities.remove_temp_index_file(index_filename) if num_blocks: print("%i MAF blocks extracted for %i regions." % (num_blocks, (num_regions + 1))) elif num_regions is not None: print("No MAF blocks could be extracted for %i regions." % (num_regions + 1)) else: print("No valid regions have been provided.")
def __main__(): maf_source_type = sys.argv.pop(1) input_maf_filename = sys.argv[1].strip() input_interval_filename = sys.argv[2].strip() output_filename = sys.argv[3].strip() dbkey = sys.argv[4].strip() try: chr_col = int(sys.argv[5].strip()) - 1 start_col = int(sys.argv[6].strip()) - 1 end_col = int(sys.argv[7].strip()) - 1 except Exception: print( "You appear to be missing metadata. You can specify your metadata by clicking on the pencil icon associated with your interval file.", file=sys.stderr) sys.exit() summary = sys.argv[8].strip() if summary.lower() == "true": summary = True else: summary = False mafIndexFile = "%s/maf_index.loc" % sys.argv[9] try: maf_index_filename = sys.argv[10].strip() except Exception: maf_index_filename = None index = index_filename = None if maf_source_type == "user": # index maf for use here index, index_filename = maf_utilities.open_or_build_maf_index( input_maf_filename, maf_index_filename, species=[dbkey]) if index is None: print("Your MAF file appears to be malformed.", file=sys.stderr) sys.exit() elif maf_source_type == "cached": # access existing indexes index = maf_utilities.maf_index_by_uid(input_maf_filename, mafIndexFile) if index is None: print("The MAF source specified (%s) appears to be invalid." % (input_maf_filename), file=sys.stderr) sys.exit() else: print('Invalid source type specified: %s' % maf_source_type, file=sys.stdout) sys.exit() out = open(output_filename, 'w') num_region = None num_bad_region = 0 species_summary = {} total_length = 0 # loop through interval file for num_region, region in enumerate( bx.intervals.io.NiceReaderWrapper( open(input_interval_filename, 'r'), chrom_col=chr_col, start_col=start_col, end_col=end_col, fix_strand=True, return_header=False, return_comments=False)): # noqa: B007 src = "%s.%s" % (dbkey, region.chrom) region_length = region.end - region.start if region_length < 1: num_bad_region += 1 continue total_length += region_length coverage = {dbkey: BitSet(region_length)} for block in index.get_as_iterator(src, region.start, region.end): for spec in maf_utilities.get_species_in_block(block): if spec not in coverage: coverage[spec] = BitSet(region_length) for block in maf_utilities.iter_blocks_split_by_species(block): if maf_utilities.component_overlaps_region( block.get_component_by_src(src), region): # need to chop and orient the block block = maf_utilities.orient_block_by_region( maf_utilities.chop_block_by_region(block, src, region), src, region, force_strand='+') start_offset, alignment = maf_utilities.reduce_block_by_primary_genome( block, dbkey, region.chrom, region.start) for i in range(len(alignment[dbkey])): for spec, text in alignment.items(): if text[i] != '-': coverage[spec].set(start_offset + i) if summary: # record summary for key in coverage.keys(): if key not in species_summary: species_summary[key] = 0 species_summary[ key] = species_summary[key] + coverage[key].count_range() else: # print coverage for interval coverage_sum = coverage[dbkey].count_range() out.write("%s\t%s\t%s\t%s\n" % ("\t".join(region.fields), dbkey, coverage_sum, region_length - coverage_sum)) keys = list(coverage.keys()) keys.remove(dbkey) keys.sort() for key in keys: coverage_sum = coverage[key].count_range() out.write("%s\t%s\t%s\t%s\n" % ("\t".join(region.fields), key, coverage_sum, region_length - coverage_sum)) if summary: out.write("#species\tnucleotides\tcoverage\n") for spec in species_summary: out.write("%s\t%s\t%.4f\n" % (spec, species_summary[spec], float(species_summary[spec]) / total_length)) out.close() if num_region is not None: print("%i regions were processed with a total length of %i." % (num_region + 1, total_length)) if num_bad_region: print("%i regions were invalid." % (num_bad_region)) maf_utilities.remove_temp_index_file(index_filename)
def __main__(): # Parse Command Line options, args = doc_optparse.parse(__doc__) mincols = 0 strand_col = -1 if options.dbkey: primary_species = options.dbkey else: primary_species = None if primary_species in [None, "?", "None"]: stop_err( "You must specify a proper build in order to extract alignments. You can specify your genome build by clicking on the pencil icon associated with your interval file." ) include_primary = True secondary_species = maf_utilities.parse_species_option(options.species) if secondary_species: species = list(secondary_species) # make copy of species list if primary_species in secondary_species: secondary_species.remove(primary_species) else: include_primary = False else: species = None if options.interval_file: interval_file = options.interval_file else: stop_err("Input interval file has not been specified.") if options.output_file: output_file = options.output_file else: stop_err("Output file has not been specified.") if not options.geneBED: if options.chromCol: chr_col = int(options.chromCol) - 1 else: stop_err( "Chromosome column not set, click the pencil icon in the history item to set the metadata attributes." ) if options.startCol: start_col = int(options.startCol) - 1 else: stop_err("Start column not set, click the pencil icon in the history item to set the metadata attributes.") if options.endCol: end_col = int(options.endCol) - 1 else: stop_err("End column not set, click the pencil icon in the history item to set the metadata attributes.") if options.strandCol: strand_col = int(options.strandCol) - 1 mafIndexFile = "%s/maf_index.loc" % options.mafIndexFileDir # Finish parsing command line # get index for mafs based on type index = index_filename = None # using specified uid for locally cached if options.mafSourceType.lower() in ["cached"]: index = maf_utilities.maf_index_by_uid(options.mafSource, mafIndexFile) if index is None: stop_err("The MAF source specified (%s) appears to be invalid." % (options.mafSource)) elif options.mafSourceType.lower() in ["user"]: # index maf for use here, need to remove index_file when finished index, index_filename = maf_utilities.open_or_build_maf_index( options.mafSource, options.mafIndex, species=[primary_species] ) if index is None: stop_err("Your MAF file appears to be malformed.") else: stop_err("Invalid MAF source type specified.") # open output file output = open(output_file, "w") if options.geneBED: region_enumerator = maf_utilities.line_enumerator(open(interval_file, "r").readlines()) else: region_enumerator = enumerate( bx.intervals.io.NiceReaderWrapper( open(interval_file, "r"), chrom_col=chr_col, start_col=start_col, end_col=end_col, strand_col=strand_col, fix_strand=True, return_header=False, return_comments=False, ) ) # Step through intervals regions_extracted = 0 line_count = 0 for line_count, line in region_enumerator: try: if options.geneBED: # Process as Gene BED try: starts, ends, fields = maf_utilities.get_starts_ends_fields_from_gene_bed(line) # create spliced alignment object alignment = maf_utilities.get_spliced_region_alignment( index, primary_species, fields[0], starts, ends, strand="+", species=species, mincols=mincols ) primary_name = secondary_name = fields[3] alignment_strand = fields[5] except Exception, e: print "Error loading exon positions from input line %i: %s" % (line_count, e) continue else: # Process as standard intervals try: # create spliced alignment object alignment = maf_utilities.get_region_alignment( index, primary_species, line.chrom, line.start, line.end, strand="+", species=species, mincols=mincols, ) primary_name = "%s(%s):%s-%s" % (line.chrom, line.strand, line.start, line.end) secondary_name = "" alignment_strand = line.strand except Exception, e: print "Error loading region positions from input line %i: %s" % (line_count, e) continue
def __main__(): index = index_filename = None # Parse Command Line options, args = doc_optparse.parse(__doc__) if options.dbkey: dbkey = options.dbkey else: dbkey = None if dbkey in [None, "?"]: maf_utilities.tool_fail("You must specify a proper build in order to extract alignments. You can specify your genome build by clicking on the pencil icon associated with your interval file.") species = maf_utilities.parse_species_option(options.species) if options.chromCol: chromCol = int(options.chromCol) - 1 else: maf_utilities.tool_fail("Chromosome column not set, click the pencil icon in the history item to set the metadata attributes.") if options.startCol: startCol = int(options.startCol) - 1 else: maf_utilities.tool_fail("Start column not set, click the pencil icon in the history item to set the metadata attributes.") if options.endCol: endCol = int(options.endCol) - 1 else: maf_utilities.tool_fail("End column not set, click the pencil icon in the history item to set the metadata attributes.") if options.strandCol: strandCol = int(options.strandCol) - 1 else: strandCol = -1 if options.interval_file: interval_file = options.interval_file else: maf_utilities.tool_fail("Input interval file has not been specified.") if options.output_file: output_file = options.output_file else: maf_utilities.tool_fail("Output file has not been specified.") split_blocks_by_species = remove_all_gap_columns = False if options.split_blocks_by_species and options.split_blocks_by_species == 'split_blocks_by_species': split_blocks_by_species = True if options.remove_all_gap_columns and options.remove_all_gap_columns == 'remove_all_gap_columns': remove_all_gap_columns = True else: remove_all_gap_columns = True # Finish parsing command line # Open indexed access to MAFs if options.mafType: if options.indexLocation: index = maf_utilities.maf_index_by_uid(options.mafType, options.indexLocation) else: index = maf_utilities.maf_index_by_uid(options.mafType, options.mafIndexFile) if index is None: maf_utilities.tool_fail("The MAF source specified (%s) appears to be invalid." % (options.mafType)) elif options.mafFile: index, index_filename = maf_utilities.open_or_build_maf_index(options.mafFile, options.mafIndex, species=[dbkey]) if index is None: maf_utilities.tool_fail("Your MAF file appears to be malformed.") else: maf_utilities.tool_fail("Desired source MAF type has not been specified.") # Create MAF writter out = bx.align.maf.Writer(open(output_file, "w")) # Iterate over input regions num_blocks = 0 num_regions = None for num_regions, region in enumerate(bx.intervals.io.NiceReaderWrapper(open(interval_file, 'r'), chrom_col=chromCol, start_col=startCol, end_col=endCol, strand_col=strandCol, fix_strand=True, return_header=False, return_comments=False)): src = maf_utilities.src_merge(dbkey, region.chrom) for block in index.get_as_iterator(src, region.start, region.end): if split_blocks_by_species: blocks = [new_block for new_block in maf_utilities.iter_blocks_split_by_species(block) if maf_utilities.component_overlaps_region(new_block.get_component_by_src_start(dbkey), region)] else: blocks = [block] for block in blocks: block = maf_utilities.chop_block_by_region(block, src, region) if block is not None: if species is not None: block = block.limit_to_species(species) block = maf_utilities.orient_block_by_region(block, src, region) if remove_all_gap_columns: block.remove_all_gap_columns() out.write(block) num_blocks += 1 # Close output MAF out.close() # remove index file if created during run maf_utilities.remove_temp_index_file(index_filename) if num_blocks: print("%i MAF blocks extracted for %i regions." % (num_blocks, (num_regions + 1))) elif num_regions is not None: print("No MAF blocks could be extracted for %i regions." % (num_regions + 1)) else: print("No valid regions have been provided.")
def __main__(): index = index_filename = None mincols = 0 #Parse Command Line options, args = doc_optparse.parse( __doc__ ) if options.dbkey: dbkey = options.dbkey else: dbkey = None if dbkey in [None, "?"]: print >>sys.stderr, "You must specify a proper build in order to extract alignments. You can specify your genome build by clicking on the pencil icon associated with your interval file." sys.exit() species = maf_utilities.parse_species_option( options.species ) if options.chromCol: chromCol = int( options.chromCol ) - 1 else: print >>sys.stderr, "Chromosome column not set, click the pencil icon in the history item to set the metadata attributes." sys.exit() if options.startCol: startCol = int( options.startCol ) - 1 else: print >>sys.stderr, "Start column not set, click the pencil icon in the history item to set the metadata attributes." sys.exit() if options.endCol: endCol = int( options.endCol ) - 1 else: print >>sys.stderr, "End column not set, click the pencil icon in the history item to set the metadata attributes." sys.exit() if options.strandCol: strandCol = int( options.strandCol ) - 1 else: strandCol = -1 if options.interval_file: interval_file = options.interval_file else: print >>sys.stderr, "Input interval file has not been specified." sys.exit() if options.output_file: output_file = options.output_file else: print >>sys.stderr, "Output file has not been specified." sys.exit() #Finish parsing command line #Open indexed access to MAFs if options.mafType: if options.indexLocation: index = maf_utilities.maf_index_by_uid( options.mafType, options.indexLocation ) else: index = maf_utilities.maf_index_by_uid( options.mafType, options.mafIndexFile ) if index is None: print >> sys.stderr, "The MAF source specified (%s) appears to be invalid." % ( options.mafType ) sys.exit() elif options.mafFile: index, index_filename = maf_utilities.open_or_build_maf_index( options.mafFile, options.mafIndex, species = [dbkey] ) if index is None: print >> sys.stderr, "Your MAF file appears to be malformed." sys.exit() else: print >>sys.stderr, "Desired source MAF type has not been specified." sys.exit() #Create MAF writter out = bx.align.maf.Writer( open(output_file, "w") ) #Iterate over input regions num_blocks = 0 num_regions = None for num_regions, region in enumerate( bx.intervals.io.NiceReaderWrapper( open( interval_file, 'r' ), chrom_col = chromCol, start_col = startCol, end_col = endCol, strand_col = strandCol, fix_strand = True, return_header = False, return_comments = False ) ): src = "%s.%s" % ( dbkey, region.chrom ) for block in maf_utilities.get_chopped_blocks_for_region( index, src, region, species, mincols ): out.write( block ) num_blocks += 1 #Close output MAF out.close() #remove index file if created during run maf_utilities.remove_temp_index_file( index_filename ) if num_blocks: print "%i MAF blocks extracted for %i regions." % ( num_blocks, ( num_regions + 1 ) ) elif num_regions is not None: print "No MAF blocks could be extracted for %i regions." % ( num_regions + 1 ) else: print "No valid regions have been provided."