def __main__(): input_block_filename = sys.argv[1].strip() input_maf_filename = sys.argv[2].strip() output_filename1 = sys.argv[3].strip() block_col = int(sys.argv[4].strip()) - 1 if block_col < 0: print >> sys.stderr, "Invalid column specified" sys.exit(0) species = maf_utilities.parse_species_option(sys.argv[5].strip()) maf_writer = bx.align.maf.Writer(open(output_filename1, 'w')) # we want to maintain order of block file and write blocks as many times as they are listed failed_lines = [] for ctr, line in enumerate(open(input_block_filename, 'r')): try: block_wanted = int(line.split("\t")[block_col].strip()) except: failed_lines.append(str(ctr)) continue try: for count, block in enumerate( bx.align.maf.Reader(open(input_maf_filename, 'r'))): if count == block_wanted: if species: block = block.limit_to_species(species) maf_writer.write(block) break except: print >> sys.stderr, "Your MAF file appears to be malformed." sys.exit() if len(failed_lines) > 0: print "Failed to extract from %i lines (%s)." % ( len(failed_lines), ",".join(failed_lines))
def main(): species = maf_utilities.parse_species_option(sys.argv[1]) if species: spec_len = len(species) else: spec_len = 0 try: maf_reader = bx.align.maf.Reader(open(sys.argv[2], 'r')) maf_writer = bx.align.maf.Writer(open(sys.argv[3], 'w')) except Exception: print("Your MAF file appears to be malformed.", file=sys.stderr) sys.exit() allow_partial = False if int(sys.argv[4]): allow_partial = True min_species_per_block = int(sys.argv[5]) maf_blocks_kept = 0 for m in maf_reader: if species: m = m.limit_to_species(species) m.remove_all_gap_columns() spec_in_block_len = len(maf_utilities.get_species_in_block(m)) if (not species or allow_partial or spec_in_block_len == spec_len) and spec_in_block_len > min_species_per_block: maf_writer.write(m) maf_blocks_kept += 1 maf_reader.close() maf_writer.close() print("Restricted to species: %s." % ", ".join(species)) print("%i MAF blocks have been kept." % maf_blocks_kept)
def __main__(): input_block_filename = sys.argv[1].strip() input_maf_filename = sys.argv[2].strip() output_filename1 = sys.argv[3].strip() block_col = int( sys.argv[4].strip() ) - 1 if block_col < 0: print >> sys.stderr, "Invalid column specified" sys.exit(0) species = maf_utilities.parse_species_option( sys.argv[5].strip() ) maf_writer = bx.align.maf.Writer( open( output_filename1, 'w' ) ) # we want to maintain order of block file and write blocks as many times as they are listed failed_lines = [] for ctr, line in enumerate( open( input_block_filename, 'r' ) ): try: block_wanted = int( line.split( "\t" )[block_col].strip() ) except: failed_lines.append( str( ctr ) ) continue try: for count, block in enumerate( bx.align.maf.Reader( open( input_maf_filename, 'r' ) ) ): if count == block_wanted: if species: block = block.limit_to_species( species ) maf_writer.write( block ) break except: print >>sys.stderr, "Your MAF file appears to be malformed." sys.exit() if len( failed_lines ) > 0: print "Failed to extract from %i lines (%s)." % ( len( failed_lines ), ",".join( failed_lines ) )
def main(): species = maf_utilities.parse_species_option( sys.argv[1] ) if species: spec_len = len( species ) else: spec_len = 0 try: maf_reader = bx.align.maf.Reader( open( sys.argv[2], 'r' ) ) maf_writer = bx.align.maf.Writer( open( sys.argv[3], 'w' ) ) except: print("Your MAF file appears to be malformed.", file=sys.stderr) sys.exit() allow_partial = False if int( sys.argv[4] ): allow_partial = True min_species_per_block = int( sys.argv[5] ) maf_blocks_kept = 0 for m in maf_reader: if species: m = m.limit_to_species( species ) m.remove_all_gap_columns() spec_in_block_len = len( maf_utilities.get_species_in_block( m ) ) if ( not species or allow_partial or spec_in_block_len == spec_len ) and spec_in_block_len > min_species_per_block: maf_writer.write( m ) maf_blocks_kept += 1 maf_reader.close() maf_writer.close() print("Restricted to species: %s." % ", ".join( species )) print("%i MAF blocks have been kept." % maf_blocks_kept)
def main(): #Read command line arguments try: script_file = sys.argv.pop(1) maf_file = sys.argv.pop(1) out_file = sys.argv.pop(1) additional_files_path = sys.argv.pop(1) species = maf_utilities.parse_species_option(sys.argv.pop(1)) min_size = int(sys.argv.pop(1)) max_size = int(sys.argv.pop(1)) if max_size < 1: max_size = sys.maxint min_species_per_block = int(sys.argv.pop(1)) exclude_incomplete_blocks = int(sys.argv.pop(1)) if species: num_species = len(species) else: num_species = len(sys.argv.pop(1).split(',')) except: print >> sys.stderr, "One or more arguments is missing.\nUsage: maf_filter.py maf_filter_file input_maf output_maf path_to_save_debug species_to_keep" sys.exit() #Open input and output MAF files try: maf_reader = bx.align.maf.Reader(open(maf_file, 'r')) maf_writer = bx.align.maf.Writer(open(out_file, 'w')) except: print >> sys.stderr, "Your MAF file appears to be malformed." sys.exit() #Save script file for debuging/verification info later os.mkdir(additional_files_path) shutil.copy(script_file, os.path.join(additional_files_path, 'debug.txt')) #Loop through blocks, running filter on each #'maf_block' and 'ret_val' are used/shared in the provided code file #'ret_val' should be set to True if the block is to be kept i = 0 blocks_kept = 0 for i, maf_block in enumerate(maf_reader): if min_size <= maf_block.text_size <= max_size: local = {'maf_block': maf_block, 'ret_val': False} execfile(script_file, {}, local) if local['ret_val']: #Species limiting must be done after filters as filters could be run on non-requested output species if species: maf_block = maf_block.limit_to_species(species) if len(maf_block.components) >= min_species_per_block and ( not exclude_incomplete_blocks or len(maf_block.components) >= num_species): maf_writer.write(maf_block) blocks_kept += 1 maf_writer.close() maf_reader.close() if i == 0: print "Your file contains no valid maf_blocks." else: print 'Kept %s of %s blocks (%.2f%%).' % ( blocks_kept, i + 1, float(blocks_kept) / float(i + 1) * 100.0)
def main(): # Read command line arguments try: script_file = sys.argv.pop(1) maf_file = sys.argv.pop(1) out_file = sys.argv.pop(1) additional_files_path = sys.argv.pop(1) species = maf_utilities.parse_species_option(sys.argv.pop(1)) min_size = int(sys.argv.pop(1)) max_size = int(sys.argv.pop(1)) if max_size < 1: max_size = sys.maxint min_species_per_block = int(sys.argv.pop(1)) exclude_incomplete_blocks = int(sys.argv.pop(1)) if species: num_species = len(species) else: num_species = len(sys.argv.pop(1).split(",")) except: print >> sys.stderr, "One or more arguments is missing.\nUsage: maf_filter.py maf_filter_file input_maf output_maf path_to_save_debug species_to_keep" sys.exit() # Open input and output MAF files try: maf_reader = bx.align.maf.Reader(open(maf_file, "r")) maf_writer = bx.align.maf.Writer(open(out_file, "w")) except: print >> sys.stderr, "Your MAF file appears to be malformed." sys.exit() # Save script file for debuging/verification info later os.mkdir(additional_files_path) shutil.copy(script_file, os.path.join(additional_files_path, "debug.txt")) # Loop through blocks, running filter on each #'maf_block' and 'ret_val' are used/shared in the provided code file #'ret_val' should be set to True if the block is to be kept i = 0 blocks_kept = 0 for i, maf_block in enumerate(maf_reader): if min_size <= maf_block.text_size <= max_size: local = {"maf_block": maf_block, "ret_val": False} execfile(script_file, {}, local) if local["ret_val"]: # Species limiting must be done after filters as filters could be run on non-requested output species if species: maf_block = maf_block.limit_to_species(species) if len(maf_block.components) >= min_species_per_block and ( not exclude_incomplete_blocks or len(maf_block.components) >= num_species ): maf_writer.write(maf_block) blocks_kept += 1 maf_writer.close() maf_reader.close() if i == 0: print "Your file contains no valid maf_blocks." else: print "Kept %s of %s blocks (%.2f%%)." % (blocks_kept, i + 1, float(blocks_kept) / float(i + 1) * 100.0)
def __main__(): try: maf_reader = maf.Reader(open(sys.argv[1])) except Exception as e: maf_utilities.tool_fail("Error opening input MAF: %s" % e) try: file_out = open(sys.argv[2], 'w') except Exception as e: maf_utilities.tool_fail("Error opening file for output: %s" % e) try: species = maf_utilities.parse_species_option(sys.argv[3]) if species: num_species = len(species) else: num_species = 0 except Exception as e: maf_utilities.tool_fail("Error determining species value: %s" % e) try: partial = sys.argv[4] except Exception as e: maf_utilities.tool_fail("Error determining keep partial value: %s" % e) if species: print("Restricted to species: %s" % ', '.join(species)) else: print("Not restricted to species.") for block_num, block in enumerate(maf_reader): if species: block = block.limit_to_species(species) if len(maf_utilities.get_species_in_block( block)) < num_species and partial == "partial_disallowed": continue spec_counts = {} for component in block.components: spec, chrom = maf_utilities.src_split(component.src) if spec not in spec_counts: spec_counts[spec] = 0 else: spec_counts[spec] += 1 file_out.write("%s\n" % maf_utilities.get_fasta_header( component, { 'block_index': block_num, 'species': spec, 'sequence_index': spec_counts[spec] }, suffix="%s_%i_%i" % (spec, block_num, spec_counts[spec]))) file_out.write("%s\n" % component.text) file_out.write("\n") file_out.close()
def __main__(): try: species = maf_utilities.parse_species_option(sys.argv[1]) except Exception as e: maf_utilities.tool_fail("Error determining species value: %s" % e) try: input_filename = sys.argv[2] except Exception as e: maf_utilities.tool_fail("Error reading MAF filename: %s" % e) try: file_out = open(sys.argv[3], 'w') except Exception as e: maf_utilities.tool_fail("Error opening file for output: %s" % e) if species: print "Restricted to species: %s" % ', '.join(species) else: print "Not restricted to species." if not species: try: species = maf_utilities.get_species_in_maf(input_filename) except Exception as e: maf_utilities.tool_fail( "Error determining species in input MAF: %s" % e) for spec in species: file_out.write(">" + spec + "\n") try: for start_block in maf.Reader(open(input_filename, 'r')): for block in maf_utilities.iter_blocks_split_by_species( start_block): block.remove_all_gap_columns() # remove extra gaps component = block.get_component_by_src_start( spec ) # blocks only have one occurrence of a particular species, so this is safe if component: file_out.write(component.text) else: file_out.write("-" * block.text_size) except Exception as e: maf_utilities.tool_fail( "Your MAF file appears to be malformed: %s" % e) file_out.write("\n") file_out.close()
def __main__(): try: maf_reader = maf.Reader(open(sys.argv[1])) except Exception as e: maf_utilities.tool_fail("Error opening input MAF: %s" % e) try: file_out = open(sys.argv[2], 'w') except Exception as e: maf_utilities.tool_fail("Error opening file for output: %s" % e) try: species = maf_utilities.parse_species_option(sys.argv[3]) if species: num_species = len(species) else: num_species = 0 except Exception as e: maf_utilities.tool_fail("Error determining species value: %s" % e) try: partial = sys.argv[4] except Exception as e: maf_utilities.tool_fail("Error determining keep partial value: %s" % e) if species: print("Restricted to species: %s" % ', '.join(species)) else: print("Not restricted to species.") for block_num, block in enumerate(maf_reader): if species: block = block.limit_to_species(species) if len(maf_utilities.get_species_in_block(block)) < num_species and partial == "partial_disallowed": continue spec_counts = {} for component in block.components: spec, chrom = maf_utilities.src_split(component.src) if spec not in spec_counts: spec_counts[spec] = 0 else: spec_counts[spec] += 1 d = OrderedDict([('block_index', block_num), ('species', spec), ('sequence_index', spec_counts[spec])]) file_out.write("%s\n" % maf_utilities.get_fasta_header(component, d, suffix="%s_%i_%i" % (spec, block_num, spec_counts[spec]))) file_out.write("%s\n" % component.text) file_out.write("\n") file_out.close()
def __main__(): try: species = maf_utilities.parse_species_option(sys.argv[1]) except Exception as e: maf_utilities.tool_fail("Error determining species value: %s" % e) try: input_filename = sys.argv[2] except Exception as e: maf_utilities.tool_fail("Error reading MAF filename: %s" % e) try: file_out = open(sys.argv[3], 'w') except Exception as e: maf_utilities.tool_fail("Error opening file for output: %s" % e) if species: print("Restricted to species: %s" % ', '.join(species)) else: print("Not restricted to species.") if not species: try: species = maf_utilities.get_species_in_maf(input_filename) except Exception as e: maf_utilities.tool_fail("Error determining species in input MAF: %s" % e) for spec in species: file_out.write(">" + spec + "\n") try: for start_block in maf.Reader(open(input_filename, 'r')): for block in maf_utilities.iter_blocks_split_by_species(start_block): block.remove_all_gap_columns() # remove extra gaps component = block.get_component_by_src_start(spec) # blocks only have one occurrence of a particular species, so this is safe if component: file_out.write(component.text) else: file_out.write("-" * block.text_size) except Exception as e: maf_utilities.tool_fail("Your MAF file appears to be malformed: %s" % e) file_out.write("\n") file_out.close()
def __main__(): #Parse Command Line input_file = sys.argv.pop( 1 ) output_file = sys.argv.pop( 1 ) species = maf_utilities.parse_species_option( sys.argv.pop( 1 ) ) try: maf_writer = bx.align.maf.Writer( open( output_file, 'w' ) ) except: print sys.stderr, "Unable to open output file" sys.exit() try: count = 0 for count, maf in enumerate( bx.align.maf.Reader( open( input_file ) ) ): maf = maf.reverse_complement() if species: maf = maf.limit_to_species( species ) maf_writer.write( maf ) except: print >>sys.stderr, "Your MAF file appears to be malformed." sys.exit() print "%i regions were reverse complemented." % count maf_writer.close()
def __main__(): # Parse Command Line input_file = sys.argv.pop(1) output_file = sys.argv.pop(1) species = maf_utilities.parse_species_option(sys.argv.pop(1)) try: maf_writer = bx.align.maf.Writer(open(output_file, 'w')) except Exception: print(sys.stderr, "Unable to open output file") sys.exit() try: count = 0 for count, maf in enumerate(bx.align.maf.Reader(open(input_file))): # noqa: B007 maf = maf.reverse_complement() if species: maf = maf.limit_to_species(species) maf_writer.write(maf) except Exception: print("Your MAF file appears to be malformed.", file=sys.stderr) sys.exit() print("%i regions were reverse complemented." % count) maf_writer.close()
def __main__(): try: species = maf_utilities.parse_species_option(sys.argv[1]) except Exception, e: maf_utilities.tool_fail("Error determining species value: %s" % e)
from bx.align import maf from galaxy.tools.util import maf_utilities assert sys.version_info[:2] >= ( 2, 4 ) def __main__(): try: maf_reader = maf.Reader( open( sys.argv[1] ) ) except Exception, e: maf_utilities.tool_fail( "Error opening input MAF: %s" % e ) try: file_out = open( sys.argv[2], 'w' ) except Exception, e: maf_utilities.tool_fail( "Error opening file for output: %s" % e ) try: species = maf_utilities.parse_species_option( sys.argv[3] ) if species: num_species = len( species ) else: num_species = 0 except Exception, e: maf_utilities.tool_fail( "Error determining species value: %s" % e ) try: partial = sys.argv[4] except Exception, e: maf_utilities.tool_fail( "Error determining keep partial value: %s" % e ) if species: print "Restricted to species: %s" % ', '.join( species ) else: print "Not restricted to species."
def __main__(): index = index_filename = None # Parse Command Line options, args = doc_optparse.parse(__doc__) if options.dbkey: dbkey = options.dbkey else: dbkey = None if dbkey in [None, "?"]: maf_utilities.tool_fail("You must specify a proper build in order to extract alignments. You can specify your genome build by clicking on the pencil icon associated with your interval file.") species = maf_utilities.parse_species_option(options.species) if options.chromCol: chromCol = int(options.chromCol) - 1 else: maf_utilities.tool_fail("Chromosome column not set, click the pencil icon in the history item to set the metadata attributes.") if options.startCol: startCol = int(options.startCol) - 1 else: maf_utilities.tool_fail("Start column not set, click the pencil icon in the history item to set the metadata attributes.") if options.endCol: endCol = int(options.endCol) - 1 else: maf_utilities.tool_fail("End column not set, click the pencil icon in the history item to set the metadata attributes.") if options.strandCol: strandCol = int(options.strandCol) - 1 else: strandCol = -1 if options.interval_file: interval_file = options.interval_file else: maf_utilities.tool_fail("Input interval file has not been specified.") if options.output_file: output_file = options.output_file else: maf_utilities.tool_fail("Output file has not been specified.") split_blocks_by_species = remove_all_gap_columns = False if options.split_blocks_by_species and options.split_blocks_by_species == 'split_blocks_by_species': split_blocks_by_species = True if options.remove_all_gap_columns and options.remove_all_gap_columns == 'remove_all_gap_columns': remove_all_gap_columns = True else: remove_all_gap_columns = True # Finish parsing command line # Open indexed access to MAFs if options.mafType: if options.indexLocation: index = maf_utilities.maf_index_by_uid(options.mafType, options.indexLocation) else: index = maf_utilities.maf_index_by_uid(options.mafType, options.mafIndexFile) if index is None: maf_utilities.tool_fail("The MAF source specified (%s) appears to be invalid." % (options.mafType)) elif options.mafFile: index, index_filename = maf_utilities.open_or_build_maf_index(options.mafFile, options.mafIndex, species=[dbkey]) if index is None: maf_utilities.tool_fail("Your MAF file appears to be malformed.") else: maf_utilities.tool_fail("Desired source MAF type has not been specified.") # Create MAF writter out = bx.align.maf.Writer(open(output_file, "w")) # Iterate over input regions num_blocks = 0 num_regions = None for num_regions, region in enumerate(bx.intervals.io.NiceReaderWrapper(open(interval_file, 'r'), chrom_col=chromCol, start_col=startCol, end_col=endCol, strand_col=strandCol, fix_strand=True, return_header=False, return_comments=False)): src = maf_utilities.src_merge(dbkey, region.chrom) for block in index.get_as_iterator(src, region.start, region.end): if split_blocks_by_species: blocks = [new_block for new_block in maf_utilities.iter_blocks_split_by_species(block) if maf_utilities.component_overlaps_region(new_block.get_component_by_src_start(dbkey), region)] else: blocks = [block] for block in blocks: block = maf_utilities.chop_block_by_region(block, src, region) if block is not None: if species is not None: block = block.limit_to_species(species) block = maf_utilities.orient_block_by_region(block, src, region) if remove_all_gap_columns: block.remove_all_gap_columns() out.write(block) num_blocks += 1 # Close output MAF out.close() # remove index file if created during run maf_utilities.remove_temp_index_file(index_filename) if num_blocks: print("%i MAF blocks extracted for %i regions." % (num_blocks, (num_regions + 1))) elif num_regions is not None: print("No MAF blocks could be extracted for %i regions." % (num_regions + 1)) else: print("No valid regions have been provided.")
def __main__(): # Parse Command Line options, args = doc_optparse.parse(__doc__) mincols = 0 strand_col = -1 if options.dbkey: primary_species = options.dbkey else: primary_species = None if primary_species in [None, "?", "None"]: stop_err("You must specify a proper build in order to extract alignments. You can specify your genome build by clicking on the pencil icon associated with your interval file.") include_primary = True secondary_species = maf_utilities.parse_species_option(options.species) if secondary_species: species = list(secondary_species) # make copy of species list if primary_species in secondary_species: secondary_species.remove(primary_species) else: include_primary = False else: species = None if options.interval_file: interval_file = options.interval_file else: stop_err("Input interval file has not been specified.") if options.output_file: output_file = options.output_file else: stop_err("Output file has not been specified.") if not options.geneBED: if options.chromCol: chr_col = int(options.chromCol) - 1 else: stop_err("Chromosome column not set, click the pencil icon in the history item to set the metadata attributes.") if options.startCol: start_col = int(options.startCol) - 1 else: stop_err("Start column not set, click the pencil icon in the history item to set the metadata attributes.") if options.endCol: end_col = int(options.endCol) - 1 else: stop_err("End column not set, click the pencil icon in the history item to set the metadata attributes.") if options.strandCol: strand_col = int(options.strandCol) - 1 mafIndexFile = "%s/maf_index.loc" % options.mafIndexFileDir overwrite_with_gaps = True if options.overwrite_with_gaps and options.overwrite_with_gaps.lower() == 'false': overwrite_with_gaps = False # Finish parsing command line # get index for mafs based on type index = index_filename = None # using specified uid for locally cached if options.mafSourceType.lower() in ["cached"]: index = maf_utilities.maf_index_by_uid(options.mafSource, mafIndexFile) if index is None: stop_err("The MAF source specified (%s) appears to be invalid." % (options.mafSource)) elif options.mafSourceType.lower() in ["user"]: # index maf for use here, need to remove index_file when finished index, index_filename = maf_utilities.open_or_build_maf_index(options.mafSource, options.mafIndex, species=[primary_species]) if index is None: stop_err("Your MAF file appears to be malformed.") else: stop_err("Invalid MAF source type specified.") # open output file output = open(output_file, "w") if options.geneBED: region_enumerator = maf_utilities.line_enumerator(open(interval_file, "r").readlines()) else: region_enumerator = enumerate(bx.intervals.io.NiceReaderWrapper( open(interval_file, 'r'), chrom_col=chr_col, start_col=start_col, end_col=end_col, strand_col=strand_col, fix_strand=True, return_header=False, return_comments=False)) # Step through intervals regions_extracted = 0 line_count = 0 for line_count, line in region_enumerator: try: if options.geneBED: # Process as Gene BED try: starts, ends, fields = maf_utilities.get_starts_ends_fields_from_gene_bed(line) # create spliced alignment object alignment = maf_utilities.get_spliced_region_alignment( index, primary_species, fields[0], starts, ends, strand='+', species=species, mincols=mincols, overwrite_with_gaps=overwrite_with_gaps) primary_name = secondary_name = fields[3] alignment_strand = fields[5] except Exception as e: print("Error loading exon positions from input line %i: %s" % (line_count, e)) continue else: # Process as standard intervals try: # create spliced alignment object alignment = maf_utilities.get_region_alignment( index, primary_species, line.chrom, line.start, line.end, strand='+', species=species, mincols=mincols, overwrite_with_gaps=overwrite_with_gaps) primary_name = "%s(%s):%s-%s" % (line.chrom, line.strand, line.start, line.end) secondary_name = "" alignment_strand = line.strand except Exception as e: print("Error loading region positions from input line %i: %s" % (line_count, e)) continue # Write alignment to output file # Output primary species first, if requested if include_primary: output.write(">%s.%s\n" % (primary_species, primary_name)) if alignment_strand == "-": output.write(alignment.get_sequence_reverse_complement(primary_species)) else: output.write(alignment.get_sequence(primary_species)) output.write("\n") # Output all remainging species for spec in secondary_species or alignment.get_species_names(skip=primary_species): if secondary_name: output.write(">%s.%s\n" % (spec, secondary_name)) else: output.write(">%s\n" % (spec)) if alignment_strand == "-": output.write(alignment.get_sequence_reverse_complement(spec)) else: output.write(alignment.get_sequence(spec)) output.write("\n") output.write("\n") regions_extracted += 1 except Exception as e: print("Unexpected error from input line %i: %s" % (line_count, e)) continue # close output file output.close() # remove index file if created during run maf_utilities.remove_temp_index_file(index_filename) # Print message about success for user if regions_extracted > 0: print("%i regions were processed successfully." % (regions_extracted)) else: print("No regions were processed successfully.") if line_count > 0 and options.geneBED: print("This tool requires your input file to conform to the 12 column BED standard.")
def __main__(): # Parse Command Line options, args = doc_optparse.parse(__doc__) mincols = 0 strand_col = -1 if options.dbkey: primary_species = options.dbkey else: primary_species = None if primary_species in [None, "?", "None"]: stop_err( "You must specify a proper build in order to extract alignments. You can specify your genome build by clicking on the pencil icon associated with your interval file." ) include_primary = True secondary_species = maf_utilities.parse_species_option(options.species) if secondary_species: species = list(secondary_species) # make copy of species list if primary_species in secondary_species: secondary_species.remove(primary_species) else: include_primary = False else: species = None if options.interval_file: interval_file = options.interval_file else: stop_err("Input interval file has not been specified.") if options.output_file: output_file = options.output_file else: stop_err("Output file has not been specified.") if not options.geneBED: if options.chromCol: chr_col = int(options.chromCol) - 1 else: stop_err( "Chromosome column not set, click the pencil icon in the history item to set the metadata attributes." ) if options.startCol: start_col = int(options.startCol) - 1 else: stop_err("Start column not set, click the pencil icon in the history item to set the metadata attributes.") if options.endCol: end_col = int(options.endCol) - 1 else: stop_err("End column not set, click the pencil icon in the history item to set the metadata attributes.") if options.strandCol: strand_col = int(options.strandCol) - 1 mafIndexFile = "%s/maf_index.loc" % options.mafIndexFileDir # Finish parsing command line # get index for mafs based on type index = index_filename = None # using specified uid for locally cached if options.mafSourceType.lower() in ["cached"]: index = maf_utilities.maf_index_by_uid(options.mafSource, mafIndexFile) if index is None: stop_err("The MAF source specified (%s) appears to be invalid." % (options.mafSource)) elif options.mafSourceType.lower() in ["user"]: # index maf for use here, need to remove index_file when finished index, index_filename = maf_utilities.open_or_build_maf_index( options.mafSource, options.mafIndex, species=[primary_species] ) if index is None: stop_err("Your MAF file appears to be malformed.") else: stop_err("Invalid MAF source type specified.") # open output file output = open(output_file, "w") if options.geneBED: region_enumerator = maf_utilities.line_enumerator(open(interval_file, "r").readlines()) else: region_enumerator = enumerate( bx.intervals.io.NiceReaderWrapper( open(interval_file, "r"), chrom_col=chr_col, start_col=start_col, end_col=end_col, strand_col=strand_col, fix_strand=True, return_header=False, return_comments=False, ) ) # Step through intervals regions_extracted = 0 line_count = 0 for line_count, line in region_enumerator: try: if options.geneBED: # Process as Gene BED try: starts, ends, fields = maf_utilities.get_starts_ends_fields_from_gene_bed(line) # create spliced alignment object alignment = maf_utilities.get_spliced_region_alignment( index, primary_species, fields[0], starts, ends, strand="+", species=species, mincols=mincols ) primary_name = secondary_name = fields[3] alignment_strand = fields[5] except Exception, e: print "Error loading exon positions from input line %i: %s" % (line_count, e) continue else: # Process as standard intervals try: # create spliced alignment object alignment = maf_utilities.get_region_alignment( index, primary_species, line.chrom, line.start, line.end, strand="+", species=species, mincols=mincols, ) primary_name = "%s(%s):%s-%s" % (line.chrom, line.strand, line.start, line.end) secondary_name = "" alignment_strand = line.strand except Exception, e: print "Error loading region positions from input line %i: %s" % (line_count, e) continue
def __main__(): index = index_filename = None mincols = 0 #Parse Command Line options, args = doc_optparse.parse( __doc__ ) if options.dbkey: dbkey = options.dbkey else: dbkey = None if dbkey in [None, "?"]: print >>sys.stderr, "You must specify a proper build in order to extract alignments. You can specify your genome build by clicking on the pencil icon associated with your interval file." sys.exit() species = maf_utilities.parse_species_option( options.species ) if options.chromCol: chromCol = int( options.chromCol ) - 1 else: print >>sys.stderr, "Chromosome column not set, click the pencil icon in the history item to set the metadata attributes." sys.exit() if options.startCol: startCol = int( options.startCol ) - 1 else: print >>sys.stderr, "Start column not set, click the pencil icon in the history item to set the metadata attributes." sys.exit() if options.endCol: endCol = int( options.endCol ) - 1 else: print >>sys.stderr, "End column not set, click the pencil icon in the history item to set the metadata attributes." sys.exit() if options.strandCol: strandCol = int( options.strandCol ) - 1 else: strandCol = -1 if options.interval_file: interval_file = options.interval_file else: print >>sys.stderr, "Input interval file has not been specified." sys.exit() if options.output_file: output_file = options.output_file else: print >>sys.stderr, "Output file has not been specified." sys.exit() #Finish parsing command line #Open indexed access to MAFs if options.mafType: if options.indexLocation: index = maf_utilities.maf_index_by_uid( options.mafType, options.indexLocation ) else: index = maf_utilities.maf_index_by_uid( options.mafType, options.mafIndexFile ) if index is None: print >> sys.stderr, "The MAF source specified (%s) appears to be invalid." % ( options.mafType ) sys.exit() elif options.mafFile: index, index_filename = maf_utilities.open_or_build_maf_index( options.mafFile, options.mafIndex, species = [dbkey] ) if index is None: print >> sys.stderr, "Your MAF file appears to be malformed." sys.exit() else: print >>sys.stderr, "Desired source MAF type has not been specified." sys.exit() #Create MAF writter out = bx.align.maf.Writer( open(output_file, "w") ) #Iterate over input regions num_blocks = 0 num_regions = None for num_regions, region in enumerate( bx.intervals.io.NiceReaderWrapper( open( interval_file, 'r' ), chrom_col = chromCol, start_col = startCol, end_col = endCol, strand_col = strandCol, fix_strand = True, return_header = False, return_comments = False ) ): src = "%s.%s" % ( dbkey, region.chrom ) for block in maf_utilities.get_chopped_blocks_for_region( index, src, region, species, mincols ): out.write( block ) num_blocks += 1 #Close output MAF out.close() #remove index file if created during run maf_utilities.remove_temp_index_file( index_filename ) if num_blocks: print "%i MAF blocks extracted for %i regions." % ( num_blocks, ( num_regions + 1 ) ) elif num_regions is not None: print "No MAF blocks could be extracted for %i regions." % ( num_regions + 1 ) else: print "No valid regions have been provided."
def __main__(): # Parse Command Line options, args = doc_optparse.parse(__doc__) mincols = 0 strand_col = -1 if options.dbkey: primary_species = options.dbkey else: primary_species = None if primary_species in [None, "?", "None"]: stop_err( "You must specify a proper build in order to extract alignments. You can specify your genome build by clicking on the pencil icon associated with your interval file." ) include_primary = True secondary_species = maf_utilities.parse_species_option(options.species) if secondary_species: species = list(secondary_species) # make copy of species list if primary_species in secondary_species: secondary_species.remove(primary_species) else: include_primary = False else: species = None if options.interval_file: interval_file = options.interval_file else: stop_err("Input interval file has not been specified.") if options.output_file: output_file = options.output_file else: stop_err("Output file has not been specified.") if not options.geneBED: if options.chromCol: chr_col = int(options.chromCol) - 1 else: stop_err( "Chromosome column not set, click the pencil icon in the history item to set the metadata attributes." ) if options.startCol: start_col = int(options.startCol) - 1 else: stop_err( "Start column not set, click the pencil icon in the history item to set the metadata attributes." ) if options.endCol: end_col = int(options.endCol) - 1 else: stop_err( "End column not set, click the pencil icon in the history item to set the metadata attributes." ) if options.strandCol: strand_col = int(options.strandCol) - 1 mafIndexFile = "%s/maf_index.loc" % options.mafIndexFileDir overwrite_with_gaps = True if options.overwrite_with_gaps and options.overwrite_with_gaps.lower( ) == 'false': overwrite_with_gaps = False # Finish parsing command line # get index for mafs based on type index = index_filename = None # using specified uid for locally cached if options.mafSourceType.lower() in ["cached"]: index = maf_utilities.maf_index_by_uid(options.mafSource, mafIndexFile) if index is None: stop_err("The MAF source specified (%s) appears to be invalid." % (options.mafSource)) elif options.mafSourceType.lower() in ["user"]: # index maf for use here, need to remove index_file when finished index, index_filename = maf_utilities.open_or_build_maf_index( options.mafSource, options.mafIndex, species=[primary_species]) if index is None: stop_err("Your MAF file appears to be malformed.") else: stop_err("Invalid MAF source type specified.") # open output file output = open(output_file, "w") if options.geneBED: region_enumerator = maf_utilities.line_enumerator( open(interval_file, "r").readlines()) else: region_enumerator = enumerate( bx.intervals.io.NiceReaderWrapper(open(interval_file, 'r'), chrom_col=chr_col, start_col=start_col, end_col=end_col, strand_col=strand_col, fix_strand=True, return_header=False, return_comments=False)) # Step through intervals regions_extracted = 0 line_count = 0 for line_count, line in region_enumerator: try: if options.geneBED: # Process as Gene BED try: starts, ends, fields = maf_utilities.get_starts_ends_fields_from_gene_bed( line) # create spliced alignment object alignment = maf_utilities.get_spliced_region_alignment( index, primary_species, fields[0], starts, ends, strand='+', species=species, mincols=mincols, overwrite_with_gaps=overwrite_with_gaps) primary_name = secondary_name = fields[3] alignment_strand = fields[5] except Exception, e: print "Error loading exon positions from input line %i: %s" % ( line_count, e) continue else: # Process as standard intervals try: # create spliced alignment object alignment = maf_utilities.get_region_alignment( index, primary_species, line.chrom, line.start, line.end, strand='+', species=species, mincols=mincols, overwrite_with_gaps=overwrite_with_gaps) primary_name = "%s(%s):%s-%s" % (line.chrom, line.strand, line.start, line.end) secondary_name = "" alignment_strand = line.strand except Exception, e: print "Error loading region positions from input line %i: %s" % ( line_count, e) continue
def __main__(): index = index_filename = None # Parse Command Line options, args = doc_optparse.parse(__doc__) if options.dbkey: dbkey = options.dbkey else: dbkey = None if dbkey in [None, "?"]: maf_utilities.tool_fail( "You must specify a proper build in order to extract alignments. You can specify your genome build by clicking on the pencil icon associated with your interval file." ) species = maf_utilities.parse_species_option(options.species) if options.chromCol: chromCol = int(options.chromCol) - 1 else: maf_utilities.tool_fail( "Chromosome column not set, click the pencil icon in the history item to set the metadata attributes." ) if options.startCol: startCol = int(options.startCol) - 1 else: maf_utilities.tool_fail( "Start column not set, click the pencil icon in the history item to set the metadata attributes." ) if options.endCol: endCol = int(options.endCol) - 1 else: maf_utilities.tool_fail( "End column not set, click the pencil icon in the history item to set the metadata attributes." ) if options.strandCol: strandCol = int(options.strandCol) - 1 else: strandCol = -1 if options.interval_file: interval_file = options.interval_file else: maf_utilities.tool_fail("Input interval file has not been specified.") if options.output_file: output_file = options.output_file else: maf_utilities.tool_fail("Output file has not been specified.") split_blocks_by_species = remove_all_gap_columns = False if options.split_blocks_by_species and options.split_blocks_by_species == 'split_blocks_by_species': split_blocks_by_species = True if options.remove_all_gap_columns and options.remove_all_gap_columns == 'remove_all_gap_columns': remove_all_gap_columns = True else: remove_all_gap_columns = True # Finish parsing command line # Open indexed access to MAFs if options.mafType: if options.indexLocation: index = maf_utilities.maf_index_by_uid(options.mafType, options.indexLocation) else: index = maf_utilities.maf_index_by_uid(options.mafType, options.mafIndexFile) if index is None: maf_utilities.tool_fail( "The MAF source specified (%s) appears to be invalid." % (options.mafType)) elif options.mafFile: index, index_filename = maf_utilities.open_or_build_maf_index( options.mafFile, options.mafIndex, species=[dbkey]) if index is None: maf_utilities.tool_fail("Your MAF file appears to be malformed.") else: maf_utilities.tool_fail( "Desired source MAF type has not been specified.") # Create MAF writter out = bx.align.maf.Writer(open(output_file, "w")) # Iterate over input regions num_blocks = 0 num_regions = None for num_regions, region in enumerate( bx.intervals.io.NiceReaderWrapper( open(interval_file), chrom_col=chromCol, start_col=startCol, end_col=endCol, strand_col=strandCol, fix_strand=True, return_header=False, return_comments=False)): # noqa: B007 src = maf_utilities.src_merge(dbkey, region.chrom) for block in index.get_as_iterator(src, region.start, region.end): if split_blocks_by_species: blocks = [ new_block for new_block in maf_utilities.iter_blocks_split_by_species(block) if maf_utilities.component_overlaps_region( new_block.get_component_by_src_start(dbkey), region) ] else: blocks = [block] for block in blocks: block = maf_utilities.chop_block_by_region(block, src, region) if block is not None: if species is not None: block = block.limit_to_species(species) block = maf_utilities.orient_block_by_region( block, src, region) if remove_all_gap_columns: block.remove_all_gap_columns() out.write(block) num_blocks += 1 # Close output MAF out.close() # remove index file if created during run maf_utilities.remove_temp_index_file(index_filename) if num_blocks: print("%i MAF blocks extracted for %i regions." % (num_blocks, (num_regions + 1))) elif num_regions is not None: print("No MAF blocks could be extracted for %i regions." % (num_regions + 1)) else: print("No valid regions have been provided.")