def __main__(): input_filename = sys.argv[1] output_filename = sys.argv[2] output_id = sys.argv[3] # where to store files that become additional output database_tmp_dir = sys.argv[4] primary_spec = sys.argv[5] species = sys.argv[6].split( ',' ) all_species = sys.argv[7].split( ',' ) partial = sys.argv[8] keep_gaps = sys.argv[9] out_files = {} if "None" in species: species = [] if primary_spec not in species: species.append( primary_spec ) if primary_spec not in all_species: all_species.append( primary_spec ) all_species.sort() for spec in species: if spec == primary_spec: out_files[ spec ] = open( output_filename, 'wb+' ) else: out_files[ spec ] = open( os.path.join( database_tmp_dir, 'primary_%s_%s_visible_interval_%s' % ( output_id, spec, spec ) ), 'wb+' ) out_files[ spec ].write( '#chrom\tstart\tend\tstrand\tscore\tname\t%s\n' % ( '\t'.join( all_species ) ) ) num_species = len( all_species ) file_in = open( input_filename, 'r' ) maf_reader = maf.Reader( file_in ) for i, m in enumerate( maf_reader ): for j, block in enumerate( maf_utilities.iter_blocks_split_by_species( m ) ): if len( block.components ) < num_species and partial == "partial_disallowed": continue sequences = {} for c in block.components: spec, chrom = maf_utilities.src_split( c.src ) if keep_gaps == 'remove_gaps': sequences[ spec ] = c.text.replace( '-', '' ) else: sequences[ spec ] = c.text sequences = '\t'.join( [ sequences.get( _, '' ) for _ in all_species ] ) for spec in species: c = block.get_component_by_src_start( spec ) if c is not None: spec2, chrom = maf_utilities.src_split( c.src ) assert spec2 == spec, Exception( 'Species name inconsistancy found in component: %s != %s' % ( spec, spec2 ) ) out_files[ spec ].write( "%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % ( chrom, c.forward_strand_start, c.forward_strand_end, c.strand, m.score, "%s_%s_%s" % (spec, i, j), sequences ) ) file_in.close() for file_out in out_files.values(): file_out.close()
def __main__(): index_location_file = sys.argv[1] for i, line in enumerate(open(index_location_file)): try: if line.startswith('#'): continue display_name, uid, indexed_for_species, species_exist, maf_files = line.rstrip( ).split('\t') indexed_for_species = indexed_for_species.split(',') species_exist = species_exist.split(',') maf_files = maf_files.split(',') species_indexed_in_maf = [] species_found_in_maf = [] for maf_file in maf_files: indexed_maf = bx.align.maf.MAFIndexedAccess(maf_file, keep_open=True, parse_e_rows=False) for key in indexed_maf.indexes.indexes.keys(): spec = maf_utilities.src_split(key)[0] if spec not in species_indexed_in_maf: species_indexed_in_maf.append(spec) while True: # reading entire maf set will take some time block = indexed_maf.read_at_current_offset(indexed_maf.f) if block is None: break for comp in block.components: spec = maf_utilities.src_split(comp.src)[0] if spec not in species_found_in_maf: species_found_in_maf.append(spec) # indexed species for spec in indexed_for_species: if spec not in species_indexed_in_maf: print( "Line %i, %s claims to be indexed for %s, but indexes do not exist." % (i, uid, spec)) for spec in species_indexed_in_maf: if spec not in indexed_for_species: print( "Line %i, %s is indexed for %s, but is not listed in loc file." % (i, uid, spec)) # existing species for spec in species_exist: if spec not in species_found_in_maf: print( "Line %i, %s claims to have blocks for %s, but was not found in MAF files." % (i, uid, spec)) for spec in species_found_in_maf: if spec not in species_exist: print( "Line %i, %s contains %s, but is not listed in loc file." % (i, uid, spec)) except Exception as e: print("Line %i is invalid: %s" % (i, e))
def __main__(): index_location_file = sys.argv[ 1 ] for i, line in enumerate( open( index_location_file ) ): try: if line.startswith( '#' ): continue display_name, uid, indexed_for_species, species_exist, maf_files = line.rstrip().split('\t') indexed_for_species = indexed_for_species.split( ',' ) species_exist = species_exist.split( ',' ) maf_files = maf_files.split( ',' ) species_indexed_in_maf = [] species_found_in_maf = [] for maf_file in maf_files: indexed_maf = bx.align.maf.MAFIndexedAccess( maf_file, keep_open=True, parse_e_rows=False ) for key in indexed_maf.indexes.indexes.keys(): spec = maf_utilities.src_split( key )[0] if spec not in species_indexed_in_maf: species_indexed_in_maf.append( spec ) while True: # reading entire maf set will take some time block = indexed_maf.read_at_current_offset( indexed_maf.f ) if block is None: break for comp in block.components: spec = maf_utilities.src_split( comp.src )[0] if spec not in species_found_in_maf: species_found_in_maf.append( spec ) # indexed species for spec in indexed_for_species: if spec not in species_indexed_in_maf: print "Line %i, %s claims to be indexed for %s, but indexes do not exist." % ( i, uid, spec ) for spec in species_indexed_in_maf: if spec not in indexed_for_species: print "Line %i, %s is indexed for %s, but is not listed in loc file." % ( i, uid, spec ) # existing species for spec in species_exist: if spec not in species_found_in_maf: print "Line %i, %s claims to have blocks for %s, but was not found in MAF files." % ( i, uid, spec ) for spec in species_found_in_maf: if spec not in species_exist: print "Line %i, %s contains %s, but is not listed in loc file." % ( i, uid, spec ) except Exception as e: print "Line %i is invalid: %s" % ( i, e )
def __main__(): try: maf_reader = maf.Reader(open(sys.argv[1])) except Exception as e: maf_utilities.tool_fail("Error opening input MAF: %s" % e) try: file_out = open(sys.argv[2], 'w') except Exception as e: maf_utilities.tool_fail("Error opening file for output: %s" % e) try: species = maf_utilities.parse_species_option(sys.argv[3]) if species: num_species = len(species) else: num_species = 0 except Exception as e: maf_utilities.tool_fail("Error determining species value: %s" % e) try: partial = sys.argv[4] except Exception as e: maf_utilities.tool_fail("Error determining keep partial value: %s" % e) if species: print("Restricted to species: %s" % ', '.join(species)) else: print("Not restricted to species.") for block_num, block in enumerate(maf_reader): if species: block = block.limit_to_species(species) if len(maf_utilities.get_species_in_block( block)) < num_species and partial == "partial_disallowed": continue spec_counts = {} for component in block.components: spec, chrom = maf_utilities.src_split(component.src) if spec not in spec_counts: spec_counts[spec] = 0 else: spec_counts[spec] += 1 file_out.write("%s\n" % maf_utilities.get_fasta_header( component, { 'block_index': block_num, 'species': spec, 'sequence_index': spec_counts[spec] }, suffix="%s_%i_%i" % (spec, block_num, spec_counts[spec]))) file_out.write("%s\n" % component.text) file_out.write("\n") file_out.close()
def __main__(): output_name = sys.argv.pop(1) input_name = sys.argv.pop(1) species = sys.argv.pop(1) out = open(output_name,'w') count = 0 #write interval header line out.write( "#chrom\tstart\tend\tstrand\n" ) try: for block in bx.align.maf.Reader( open( input_name, 'r' ) ): for c in maf_utilities.iter_components_by_src_start( block, species ): if c is not None: out.write( "%s\t%i\t%i\t%s\n" % ( maf_utilities.src_split( c.src )[-1], c.get_forward_strand_start(), c.get_forward_strand_end(), c.strand ) ) count += 1 except Exception, e: print >> sys.stderr, "There was a problem processing your input: %s" % e
def __main__(): output_name = sys.argv.pop(1) input_name = sys.argv.pop(1) count = 0 with open(output_name, 'w') as out: for count, block in enumerate(bx.align.maf.Reader(open(input_name, 'r'))): spec_counts = {} for c in block.components: spec, chrom = maf_utilities.src_split(c.src) if spec not in spec_counts: spec_counts[spec] = 0 else: spec_counts[spec] += 1 out.write("%s\n" % maf_utilities.get_fasta_header(c, {'block_index' : count, 'species' : spec, 'sequence_index' : spec_counts[spec]}, suffix="%s_%i_%i" % (spec, count, spec_counts[spec]))) out.write("%s\n" % c.text) out.write("\n") print("%i MAF blocks converted to FASTA." % (count))
def __main__(): output_name = sys.argv.pop(1) input_name = sys.argv.pop(1) out = open( output_name, 'w' ) count = 0 for count, block in enumerate( bx.align.maf.Reader( open( input_name, 'r' ) ) ): spec_counts = {} for c in block.components: spec, chrom = maf_utilities.src_split( c.src ) if spec not in spec_counts: spec_counts[ spec ] = 0 else: spec_counts[ spec ] += 1 out.write( "%s\n" % maf_utilities.get_fasta_header( c, { 'block_index' : count, 'species' : spec, 'sequence_index' : spec_counts[ spec ] }, suffix="%s_%i_%i" % ( spec, count, spec_counts[ spec ] ) ) ) out.write( "%s\n" % c.text ) out.write( "\n" ) out.close() print "%i MAF blocks converted to FASTA." % ( count )
def __main__(): output_name = sys.argv.pop(1) input_name = sys.argv.pop(1) species = sys.argv.pop(1) out = open(output_name, 'w') count = 0 # write interval header line out.write( "#chrom\tstart\tend\tstrand\n" ) try: for block in bx.align.maf.Reader( open( input_name, 'r' ) ): for c in maf_utilities.iter_components_by_src_start( block, species ): if c is not None: out.write( "%s\t%i\t%i\t%s\n" % ( maf_utilities.src_split( c.src )[-1], c.get_forward_strand_start(), c.get_forward_strand_end(), c.strand ) ) count += 1 except Exception as e: print >> sys.stderr, "There was a problem processing your input: %s" % e out.close() print "%i MAF blocks converted to Genomic Intervals for species %s." % ( count, species )
def __main__(): output_name = sys.argv.pop(1) input_name = sys.argv.pop(1) species = sys.argv.pop(1) count = 0 with open(output_name, 'w') as out: # write interval header line out.write("#chrom\tstart\tend\tstrand\n") try: with open(input_name, 'r') as fh: for block in bx.align.maf.Reader(fh): for c in maf_utilities.iter_components_by_src_start(block, species): if c is not None: out.write("%s\t%i\t%i\t%s\n" % (maf_utilities.src_split(c.src)[-1], c.get_forward_strand_start(), c.get_forward_strand_end(), c.strand)) count += 1 except Exception as e: print("There was a problem processing your input: %s" % e, file=sys.stderr) print("%i MAF blocks converted to Genomic Intervals for species %s." % (count, species))
def __main__(): try: maf_reader = maf.Reader(open(sys.argv[1])) except Exception as e: maf_utilities.tool_fail("Error opening input MAF: %s" % e) try: file_out = open(sys.argv[2], 'w') except Exception as e: maf_utilities.tool_fail("Error opening file for output: %s" % e) try: species = maf_utilities.parse_species_option(sys.argv[3]) if species: num_species = len(species) else: num_species = 0 except Exception as e: maf_utilities.tool_fail("Error determining species value: %s" % e) try: partial = sys.argv[4] except Exception as e: maf_utilities.tool_fail("Error determining keep partial value: %s" % e) if species: print("Restricted to species: %s" % ', '.join(species)) else: print("Not restricted to species.") for block_num, block in enumerate(maf_reader): if species: block = block.limit_to_species(species) if len(maf_utilities.get_species_in_block(block)) < num_species and partial == "partial_disallowed": continue spec_counts = {} for component in block.components: spec, chrom = maf_utilities.src_split(component.src) if spec not in spec_counts: spec_counts[spec] = 0 else: spec_counts[spec] += 1 d = OrderedDict([('block_index', block_num), ('species', spec), ('sequence_index', spec_counts[spec])]) file_out.write("%s\n" % maf_utilities.get_fasta_header(component, d, suffix="%s_%i_%i" % (spec, block_num, spec_counts[spec]))) file_out.write("%s\n" % component.text) file_out.write("\n") file_out.close()
num_species = 0 except Exception, e: maf_utilities.tool_fail( "Error determining species value: %s" % e ) try: partial = sys.argv[4] except Exception, e: maf_utilities.tool_fail( "Error determining keep partial value: %s" % e ) if species: print "Restricted to species: %s" % ', '.join( species ) else: print "Not restricted to species." for block_num, block in enumerate( maf_reader ): if species: block = block.limit_to_species( species ) if len( maf_utilities.get_species_in_block( block ) ) < num_species and partial == "partial_disallowed": continue spec_counts = {} for component in block.components: spec, chrom = maf_utilities.src_split( component.src ) if spec not in spec_counts: spec_counts[ spec ] = 0 else: spec_counts[ spec ] += 1 file_out.write( "%s\n" % maf_utilities.get_fasta_header( component, { 'block_index' : block_num, 'species' : spec, 'sequence_index' : spec_counts[ spec ] }, suffix = "%s_%i_%i" % ( spec, block_num, spec_counts[ spec ] ) ) ) file_out.write( "%s\n" % component.text ) file_out.write( "\n" ) file_out.close() if __name__ == "__main__": __main__()