def __main__(): print "Restricted to species:",sys.argv[3] input_filename = sys.argv[1] output_filename = sys.argv[2] species = sys.argv[3].split(',') partial = sys.argv[4] num_species = len(species) file_in = open(input_filename, 'r') maf_reader = maf.Reader( file_in ) file_out = open(output_filename, 'w') block_num=-1 for i, m in enumerate( maf_reader ): block_num += 1 if "None" not in species: m = m.limit_to_species( species ) l = m.components if len(l) < num_species and partial == "partial_disallowed": continue for c in l: spec,chrom = maf.src_split( c.src ) file_out.write(">"+c.src+"("+c.strand+"):"+str(c.start)+"-"+str(c.end)+"|"+spec+"_"+str(block_num)+"\n") file_out.write(c.text+"\n") file_out.write("\n") file_in.close() file_out.close()
def __main__(): print "Restricted to species:", sys.argv[3] input_filename = sys.argv[1] output_filename = sys.argv[2] species = sys.argv[3].split(",") partial = sys.argv[4] num_species = len(species) file_in = open(input_filename, "r") try: maf_reader = maf.Reader(file_in) file_out = open(output_filename, "w") for block_num, block in enumerate(maf_reader): if "None" not in species: block = block.limit_to_species(species) if len(block.components) < num_species and partial == "partial_disallowed": continue for component in block.components: spec, chrom = maf.src_split(component.src) if not spec or not chrom: spec = chrom = component.src file_out.write("%s\n" % maf_utilities.get_fasta_header(component, suffix="%s_%i" % (spec, block_num))) file_out.write("%s\n" % component.text) file_out.write("\n") file_in.close() except Exception, e: print >> sys.stderr, "Your MAF file appears to be malformed:", e sys.exit()
def __main__(): print "Restricted to species:", sys.argv[3] input_filename = sys.argv[1] output_filename = sys.argv[2] species = sys.argv[3].split(',') partial = sys.argv[4] num_species = len(species) file_in = open(input_filename, 'r') maf_reader = maf.Reader(file_in) file_out = open(output_filename, 'w') block_num = -1 for i, m in enumerate(maf_reader): block_num += 1 if "None" not in species: m = m.limit_to_species(species) l = m.components if len(l) < num_species and partial == "partial_disallowed": continue for c in l: spec, chrom = maf.src_split(c.src) file_out.write(">" + c.src + "(" + c.strand + "):" + str(c.start) + "-" + str(c.end) + "|" + spec + "_" + str(block_num) + "\n") file_out.write(c.text + "\n") file_out.write("\n") file_in.close() file_out.close()
def get_available_species(input_filename): try: rval = [] species = {} file_in = open(input_filename, 'r') maf_reader = maf.Reader(file_in) for i, m in enumerate(maf_reader): l = m.components for c in l: spec, chrom = maf.src_split(c.src) species[spec] = spec file_in.close() species = species.keys() species.sort() file_in = open(input_filename, 'r') maf_reader = maf.Reader(file_in) species_sequence = {} for s in species: species_sequence[s] = [] for m in maf_reader: for s in species: c = m.get_component_by_src_start(s) if c: species_sequence[s].append(c.text) else: species_sequence[s].append("-" * m.text_size) file_in.close() rval.append(("Include all species.", 'None', False)) for spec in species: species_sequence[spec] = "".join(species_sequence[spec]) rval.append((spec + ": " + str( len(species_sequence[spec]) - species_sequence[spec].count("-")) + " nongap, " + str(len(species_sequence[spec])) + " total bases", spec, False)) return rval except: return [( "Include all species. <i>If you want to limit your conversion to a set of species, you must wait for the MAF file to be created.</i>", 'None', True)]
def get_species_names(input_filename): species = {} file_in = open(input_filename, 'r') maf_reader = maf.Reader(file_in) for i, m in enumerate(maf_reader): l = m.components for c in l: spec, chrom = maf.src_split(c.src) try: if chrom not in species[spec]: species[spec].append(chrom) except: species[spec] = [chrom] file_in.close() return species
def get_species_names( input_filename ): species={} file_in = open(input_filename, 'r') maf_reader = maf.Reader( file_in ) for i, m in enumerate( maf_reader ): l = m.components for c in l: spec, chrom = maf.src_split( c.src ) try: if chrom not in species[spec]: species[spec].append(chrom) except: species[spec] = [chrom] file_in.close() return species
def get_species(maf_filename): try: species={} file_in = open(maf_filename, 'r') maf_reader = maf.Reader( file_in ) for i, m in enumerate( maf_reader ): l = m.components for c in l: spec,chrom = maf.src_split( c.src ) species[spec]=spec file_in.close() species = species.keys() species.sort() return species except: return []
def get_species(maf_filename): try: species = {} file_in = open(maf_filename, 'r') maf_reader = maf.Reader(file_in) for i, m in enumerate(maf_reader): l = m.components for c in l: spec, chrom = maf.src_split(c.src) species[spec] = spec file_in.close() species = species.keys() species.sort() return species except: return []
def get_available_species( input_filename ): try: rval = [] species={} file_in = open(input_filename, 'r') maf_reader = maf.Reader( file_in ) for i, m in enumerate( maf_reader ): l = m.components for c in l: spec,chrom = maf.src_split( c.src ) species[spec]=spec file_in.close() species = species.keys() species.sort() file_in = open(input_filename, 'r') maf_reader = maf.Reader( file_in ) species_sequence={} for s in species: species_sequence[s] = [] for m in maf_reader: for s in species: c = m.get_component_by_src_start( s ) if c: species_sequence[s].append( c.text ) else: species_sequence[s].append( "-" * m.text_size ) file_in.close() rval.append( ("Include all species.",'None',False) ) for spec in species: species_sequence[spec] = "".join(species_sequence[spec]) rval.append( (spec + ": "+str(len(species_sequence[spec]) - species_sequence[spec].count("-"))+" nongap, "+ str(len(species_sequence[spec])) + " total bases",spec,False) ) return rval except: return [("Include all species. <i>If you want to limit your conversion to a set of species, you must wait for the MAF file to be created.</i>",'None',True)]
def __main__(): input_filename = sys.argv[1] output_filename = sys.argv[2] # where to store files that become additional output database_tmp_dir = sys.argv[5] species = sys.argv[3].split(',') partial = sys.argv[4] output_id = sys.argv[6] out_files = {} primary_spec = None if "None" in species: species = set() try: for i, m in enumerate(maf.Reader(open(input_filename, 'r'))): for c in m.components: spec, chrom = maf.src_split(c.src) if not spec or not chrom: spec = chrom = c.src species.add(spec) except: print("Invalid MAF file specified", file=sys.stderr) return if "?" in species: print("Invalid dbkey specified", file=sys.stderr) return for i, spec in enumerate(species): if i == 0: out_files[spec] = open(output_filename, 'w') primary_spec = spec else: out_files[spec] = open( os.path.join( database_tmp_dir, 'primary_%s_%s_visible_bed_%s' % (output_id, spec, spec)), 'wb+') num_species = len(species) print("Restricted to species:", ",".join(species)) file_in = open(input_filename, 'r') maf_reader = maf.Reader(file_in) block_num = -1 for i, m in enumerate(maf_reader): block_num += 1 if "None" not in species: m = m.limit_to_species(species) l = m.components if len(l) < num_species and partial == "partial_disallowed": continue for c in l: spec, chrom = maf.src_split(c.src) if not spec or not chrom: spec = chrom = c.src if spec not in out_files.keys(): out_files[spec] = open( os.path.join( database_tmp_dir, 'primary_%s_%s_visible_bed_%s' % (output_id, spec, spec)), 'wb+') if c.strand == "-": out_files[spec].write(chrom + "\t" + str(c.src_size - c.end) + "\t" + str(c.src_size - c.start) + "\t" + spec + "_" + str(block_num) + "\t" + "0\t" + c.strand + "\n") else: out_files[spec].write(chrom + "\t" + str(c.start) + "\t" + str(c.end) + "\t" + spec + "_" + str(block_num) + "\t" + "0\t" + c.strand + "\n") file_in.close() for file_out in out_files.keys(): out_files[file_out].close() print("#FILE1_DBKEY\t%s" % (primary_spec))
def __main__(): input_filename = sys.argv[1] output_filename = sys.argv[2] # where to store files that become additional output database_tmp_dir = sys.argv[5] species = sys.argv[3].split(',') partial = sys.argv[4] output_id = sys.argv[6] out_files = {} primary_spec = None if "None" in species: species = OrderedDict() try: for i, m in enumerate(maf.Reader(open(input_filename, 'r'))): for c in m.components: spec, chrom = maf.src_split(c.src) if not spec or not chrom: spec = c.src species[spec] = None species = species.keys() except Exception: print("Invalid MAF file specified", file=sys.stderr) return if "?" in species: print("Invalid dbkey specified", file=sys.stderr) return for i, spec in enumerate(species): if i == 0: out_files[spec] = open(output_filename, 'w') primary_spec = spec else: out_files[spec] = open(os.path.join(database_tmp_dir, 'primary_%s_%s_visible_bed_%s' % (output_id, spec, spec)), 'w+') num_species = len(species) print("Restricted to species:", ",".join(species)) with open(input_filename, 'r') as file_in: maf_reader = maf.Reader(file_in) block_num = -1 for i, m in enumerate(maf_reader): block_num += 1 if "None" not in species: m = m.limit_to_species(species) l = m.components if len(l) < num_species and partial == "partial_disallowed": continue for c in l: spec, chrom = maf.src_split(c.src) if not spec or not chrom: spec = chrom = c.src if spec not in out_files.keys(): out_files[spec] = open(os.path.join(database_tmp_dir, 'primary_%s_%s_visible_bed_%s' % (output_id, spec, spec)), 'wb+') if c.strand == "-": out_files[spec].write(chrom + "\t" + str(c.src_size - c.end) + "\t" + str(c.src_size - c.start) + "\t" + spec + "_" + str(block_num) + "\t" + "0\t" + c.strand + "\n") else: out_files[spec].write(chrom + "\t" + str(c.start) + "\t" + str(c.end) + "\t" + spec + "_" + str(block_num) + "\t" + "0\t" + c.strand + "\n") for file_out in out_files.keys(): out_files[file_out].close() print("#FILE1_DBKEY\t%s" % (primary_spec))
def __main__(): input_filename = sys.argv[1] output_filename = sys.argv[2] #where to store files that become additional output database_tmp_dir = sys.argv[5] species = sys.argv[3].split(',') partial = sys.argv[4] out_files = {} primary_spec = None if "None" in species: species = {} try: for i, m in enumerate( maf.Reader( open( input_filename, 'r' ) ) ): for c in m.components: spec,chrom = maf.src_split( c.src ) if not spec or not chrom: spec = chrom = c.src species[spec] = "" species = species.keys() except: print >>sys.stderr, "Invalid MAF file specified" return if "?" in species: print >>sys.stderr, "Invalid dbkey specified" return for i in range( 0, len( species ) ): spec = species[i] if i == 0: out_files[spec] = open( output_filename, 'w' ) primary_spec = spec else: out_files[spec] = tempfile.NamedTemporaryFile( mode = 'w', dir = database_tmp_dir, suffix = '.maf_to_bed' ) filename = out_files[spec].name out_files[spec].close() out_files[spec] = open( filename, 'w' ) num_species = len( species ) print "Restricted to species:", ",".join( species ) file_in = open( input_filename, 'r' ) maf_reader = maf.Reader( file_in ) block_num = -1 for i, m in enumerate( maf_reader ): block_num += 1 if "None" not in species: m = m.limit_to_species( species ) l = m.components if len(l) < num_species and partial == "partial_disallowed": continue for c in l: spec,chrom = maf.src_split( c.src ) if not spec or not chrom: spec = chrom = c.src if spec not in out_files.keys(): out_files[spec] = tempfile.NamedTemporaryFile( mode='w', dir = database_tmp_dir, suffix = '.maf_to_bed' ) filename = out_files[spec].name out_files[spec].close() out_files[spec] = open( filename, 'w' ) if c.strand == "-": out_files[spec].write( chrom + "\t" + str( c.src_size - c.end ) + "\t" + str( c.src_size - c.start ) + "\t" + spec + "_" + str( block_num ) + "\t" + "0\t" + c.strand + "\n" ) else: out_files[spec].write( chrom + "\t" + str( c.start ) + "\t" + str( c.end ) + "\t" + spec + "_" + str( block_num ) + "\t" + "0\t" + c.strand + "\n" ) file_in.close() for file_out in out_files.keys(): out_files[file_out].close() for spec in out_files.keys(): if spec != primary_spec: print "#FILE\t" + spec + "\t" + os.path.join( database_tmp_dir, os.path.split( out_files[spec].name )[1] ) else: print "#FILE1\t" + spec + "\t" + out_files[spec].name
def __main__(): input_filename = sys.argv[1] output_filename = sys.argv[2] #where to store files that become additional output database_tmp_dir = sys.argv[5] species = sys.argv[3].split(',') partial = sys.argv[4] out_files = {} primary_spec = None if "None" in species: species = {} try: for i, m in enumerate(maf.Reader(open(input_filename, 'r'))): for c in m.components: spec, chrom = maf.src_split(c.src) if not spec or not chrom: spec = chrom = c.src species[spec] = "" species = species.keys() except: print >> sys.stderr, "Invalid MAF file specified" return if "?" in species: print >> sys.stderr, "Invalid dbkey specified" return for i in range(0, len(species)): spec = species[i] if i == 0: out_files[spec] = open(output_filename, 'w') primary_spec = spec else: out_files[spec] = tempfile.NamedTemporaryFile(mode='w', dir=database_tmp_dir, suffix='.maf_to_bed') filename = out_files[spec].name out_files[spec].close() out_files[spec] = open(filename, 'w') num_species = len(species) print "Restricted to species:", ",".join(species) file_in = open(input_filename, 'r') maf_reader = maf.Reader(file_in) block_num = -1 for i, m in enumerate(maf_reader): block_num += 1 if "None" not in species: m = m.limit_to_species(species) l = m.components if len(l) < num_species and partial == "partial_disallowed": continue for c in l: spec, chrom = maf.src_split(c.src) if not spec or not chrom: spec = chrom = c.src if spec not in out_files.keys(): out_files[spec] = tempfile.NamedTemporaryFile( mode='w', dir=database_tmp_dir, suffix='.maf_to_bed') filename = out_files[spec].name out_files[spec].close() out_files[spec] = open(filename, 'w') if c.strand == "-": out_files[spec].write(chrom + "\t" + str(c.src_size - c.end) + "\t" + str(c.src_size - c.start) + "\t" + spec + "_" + str(block_num) + "\t" + "0\t" + c.strand + "\n") else: out_files[spec].write(chrom + "\t" + str(c.start) + "\t" + str(c.end) + "\t" + spec + "_" + str(block_num) + "\t" + "0\t" + c.strand + "\n") file_in.close() for file_out in out_files.keys(): out_files[file_out].close() for spec in out_files.keys(): if spec != primary_spec: print "#FILE\t" + spec + "\t" + os.path.join( database_tmp_dir, os.path.split(out_files[spec].name)[1]) else: print "#FILE1\t" + spec + "\t" + out_files[spec].name