def __main__():
    print "Restricted to species:",sys.argv[3]
        
    input_filename = sys.argv[1]
    output_filename = sys.argv[2]
    species = sys.argv[3].split(',')
    partial = sys.argv[4]
    num_species = len(species)
    
    file_in = open(input_filename, 'r')
    maf_reader = maf.Reader( file_in )
    
    file_out = open(output_filename, 'w')
    
    block_num=-1
    
    for i, m in enumerate( maf_reader ):
        block_num += 1
        if "None" not in species:
            m = m.limit_to_species( species )
        l = m.components
        if len(l) < num_species and partial == "partial_disallowed": continue
        for c in l:
            spec,chrom = maf.src_split( c.src )
            file_out.write(">"+c.src+"("+c.strand+"):"+str(c.start)+"-"+str(c.end)+"|"+spec+"_"+str(block_num)+"\n")
            file_out.write(c.text+"\n")
        file_out.write("\n")
    file_in.close()
    file_out.close()
def __main__():
    print "Restricted to species:", sys.argv[3]

    input_filename = sys.argv[1]
    output_filename = sys.argv[2]
    species = sys.argv[3].split(",")
    partial = sys.argv[4]
    num_species = len(species)

    file_in = open(input_filename, "r")
    try:
        maf_reader = maf.Reader(file_in)

        file_out = open(output_filename, "w")

        for block_num, block in enumerate(maf_reader):
            if "None" not in species:
                block = block.limit_to_species(species)
            if len(block.components) < num_species and partial == "partial_disallowed":
                continue
            for component in block.components:
                spec, chrom = maf.src_split(component.src)
                if not spec or not chrom:
                    spec = chrom = component.src
                file_out.write("%s\n" % maf_utilities.get_fasta_header(component, suffix="%s_%i" % (spec, block_num)))
                file_out.write("%s\n" % component.text)
            file_out.write("\n")
        file_in.close()
    except Exception, e:
        print >> sys.stderr, "Your MAF file appears to be malformed:", e
        sys.exit()
Ejemplo n.º 3
0
def __main__():
    print "Restricted to species:", sys.argv[3]

    input_filename = sys.argv[1]
    output_filename = sys.argv[2]
    species = sys.argv[3].split(',')
    partial = sys.argv[4]
    num_species = len(species)

    file_in = open(input_filename, 'r')
    maf_reader = maf.Reader(file_in)

    file_out = open(output_filename, 'w')

    block_num = -1

    for i, m in enumerate(maf_reader):
        block_num += 1
        if "None" not in species:
            m = m.limit_to_species(species)
        l = m.components
        if len(l) < num_species and partial == "partial_disallowed": continue
        for c in l:
            spec, chrom = maf.src_split(c.src)
            file_out.write(">" + c.src + "(" + c.strand + "):" + str(c.start) +
                           "-" + str(c.end) + "|" + spec + "_" +
                           str(block_num) + "\n")
            file_out.write(c.text + "\n")
        file_out.write("\n")
    file_in.close()
    file_out.close()
Ejemplo n.º 4
0
def get_available_species(input_filename):
    try:
        rval = []
        species = {}

        file_in = open(input_filename, 'r')
        maf_reader = maf.Reader(file_in)

        for i, m in enumerate(maf_reader):
            l = m.components
            for c in l:
                spec, chrom = maf.src_split(c.src)
                species[spec] = spec

        file_in.close()

        species = species.keys()
        species.sort()

        file_in = open(input_filename, 'r')
        maf_reader = maf.Reader(file_in)

        species_sequence = {}
        for s in species:
            species_sequence[s] = []

        for m in maf_reader:
            for s in species:
                c = m.get_component_by_src_start(s)
                if c: species_sequence[s].append(c.text)
                else: species_sequence[s].append("-" * m.text_size)

        file_in.close()

        rval.append(("Include all species.", 'None', False))
        for spec in species:
            species_sequence[spec] = "".join(species_sequence[spec])
            rval.append((spec + ": " + str(
                len(species_sequence[spec]) -
                species_sequence[spec].count("-")) + " nongap, " +
                         str(len(species_sequence[spec])) + " total bases",
                         spec, False))

        return rval
    except:
        return [(
            "Include all species. <i>If you want to limit your conversion to a set of species, you must wait for the MAF file to be created.</i>",
            'None', True)]
Ejemplo n.º 5
0
def get_species_names(input_filename):
    species = {}

    file_in = open(input_filename, 'r')
    maf_reader = maf.Reader(file_in)

    for i, m in enumerate(maf_reader):
        l = m.components
        for c in l:
            spec, chrom = maf.src_split(c.src)
            try:
                if chrom not in species[spec]:
                    species[spec].append(chrom)
            except:
                species[spec] = [chrom]

    file_in.close()
    return species
Ejemplo n.º 6
0
def get_species_names( input_filename ):
    species={}
    
    file_in = open(input_filename, 'r')
    maf_reader = maf.Reader( file_in )
    
    for i, m in enumerate( maf_reader ):
        l = m.components
        for c in l:
            spec, chrom = maf.src_split( c.src )
            try:
            	if chrom not in species[spec]:
            	   species[spec].append(chrom)
            except:
                species[spec] = [chrom]
    
    file_in.close()
    return species
def get_species(maf_filename):
        try:
            species={}
            
            file_in = open(maf_filename, 'r')
            maf_reader = maf.Reader( file_in )
            
            for i, m in enumerate( maf_reader ):
                l = m.components
                for c in l:
                    spec,chrom = maf.src_split( c.src )
                    species[spec]=spec
            
            file_in.close()
            
            species = species.keys()
            species.sort()
            return species
        except:
            return []
def get_species(maf_filename):
    try:
        species = {}

        file_in = open(maf_filename, 'r')
        maf_reader = maf.Reader(file_in)

        for i, m in enumerate(maf_reader):
            l = m.components
            for c in l:
                spec, chrom = maf.src_split(c.src)
                species[spec] = spec

        file_in.close()

        species = species.keys()
        species.sort()
        return species
    except:
        return []
def get_available_species( input_filename ):
    try:
        rval = []
        species={}
        
        file_in = open(input_filename, 'r')
        maf_reader = maf.Reader( file_in )
        
        for i, m in enumerate( maf_reader ):
            l = m.components
            for c in l:
                spec,chrom = maf.src_split( c.src )
                species[spec]=spec
        
        file_in.close()
        
        species = species.keys()
        species.sort()

        file_in = open(input_filename, 'r')
        maf_reader = maf.Reader( file_in )
        
        species_sequence={}
        for s in species: species_sequence[s] = []
        
        for m in maf_reader:
            for s in species:
                c = m.get_component_by_src_start( s ) 
                if c: species_sequence[s].append( c.text )
                else: species_sequence[s].append( "-" * m.text_size )
        
        file_in.close()
        
        rval.append( ("Include all species.",'None',False) )
        for spec in species:
            species_sequence[spec] = "".join(species_sequence[spec])
            rval.append( (spec + ": "+str(len(species_sequence[spec]) - species_sequence[spec].count("-"))+" nongap, "+ str(len(species_sequence[spec])) + " total bases",spec,False) )
                
        return rval
    except:
        return [("Include all species. <i>If you want to limit your conversion to a set of species, you must wait for the MAF file to be created.</i>",'None',True)]
Ejemplo n.º 10
0
def __main__():
    input_filename = sys.argv[1]
    output_filename = sys.argv[2]
    # where to store files that become additional output
    database_tmp_dir = sys.argv[5]

    species = sys.argv[3].split(',')
    partial = sys.argv[4]
    output_id = sys.argv[6]
    out_files = {}
    primary_spec = None

    if "None" in species:
        species = set()
        try:
            for i, m in enumerate(maf.Reader(open(input_filename, 'r'))):
                for c in m.components:
                    spec, chrom = maf.src_split(c.src)
                    if not spec or not chrom:
                        spec = chrom = c.src
                    species.add(spec)
        except:
            print("Invalid MAF file specified", file=sys.stderr)
            return

    if "?" in species:
        print("Invalid dbkey specified", file=sys.stderr)
        return

    for i, spec in enumerate(species):
        if i == 0:
            out_files[spec] = open(output_filename, 'w')
            primary_spec = spec
        else:
            out_files[spec] = open(
                os.path.join(
                    database_tmp_dir,
                    'primary_%s_%s_visible_bed_%s' % (output_id, spec, spec)),
                'wb+')
    num_species = len(species)

    print("Restricted to species:", ",".join(species))

    file_in = open(input_filename, 'r')
    maf_reader = maf.Reader(file_in)

    block_num = -1

    for i, m in enumerate(maf_reader):
        block_num += 1
        if "None" not in species:
            m = m.limit_to_species(species)
        l = m.components
        if len(l) < num_species and partial == "partial_disallowed":
            continue
        for c in l:
            spec, chrom = maf.src_split(c.src)
            if not spec or not chrom:
                spec = chrom = c.src
            if spec not in out_files.keys():
                out_files[spec] = open(
                    os.path.join(
                        database_tmp_dir, 'primary_%s_%s_visible_bed_%s' %
                        (output_id, spec, spec)), 'wb+')

            if c.strand == "-":
                out_files[spec].write(chrom + "\t" + str(c.src_size - c.end) +
                                      "\t" + str(c.src_size - c.start) + "\t" +
                                      spec + "_" + str(block_num) + "\t" +
                                      "0\t" + c.strand + "\n")
            else:
                out_files[spec].write(chrom + "\t" + str(c.start) + "\t" +
                                      str(c.end) + "\t" + spec + "_" +
                                      str(block_num) + "\t" + "0\t" +
                                      c.strand + "\n")

    file_in.close()
    for file_out in out_files.keys():
        out_files[file_out].close()

    print("#FILE1_DBKEY\t%s" % (primary_spec))
Ejemplo n.º 11
0
def __main__():
    input_filename = sys.argv[1]
    output_filename = sys.argv[2]
    # where to store files that become additional output
    database_tmp_dir = sys.argv[5]

    species = sys.argv[3].split(',')
    partial = sys.argv[4]
    output_id = sys.argv[6]
    out_files = {}
    primary_spec = None

    if "None" in species:
        species = OrderedDict()
        try:
            for i, m in enumerate(maf.Reader(open(input_filename, 'r'))):
                for c in m.components:
                    spec, chrom = maf.src_split(c.src)
                    if not spec or not chrom:
                        spec = c.src
                    species[spec] = None
            species = species.keys()
        except Exception:
            print("Invalid MAF file specified", file=sys.stderr)
            return

    if "?" in species:
        print("Invalid dbkey specified", file=sys.stderr)
        return

    for i, spec in enumerate(species):
        if i == 0:
            out_files[spec] = open(output_filename, 'w')
            primary_spec = spec
        else:
            out_files[spec] = open(os.path.join(database_tmp_dir, 'primary_%s_%s_visible_bed_%s' % (output_id, spec, spec)), 'w+')
    num_species = len(species)

    print("Restricted to species:", ",".join(species))

    with open(input_filename, 'r') as file_in:
        maf_reader = maf.Reader(file_in)

        block_num = -1

        for i, m in enumerate(maf_reader):
            block_num += 1
            if "None" not in species:
                m = m.limit_to_species(species)
            l = m.components
            if len(l) < num_species and partial == "partial_disallowed":
                continue
            for c in l:
                spec, chrom = maf.src_split(c.src)
                if not spec or not chrom:
                        spec = chrom = c.src
                if spec not in out_files.keys():
                    out_files[spec] = open(os.path.join(database_tmp_dir, 'primary_%s_%s_visible_bed_%s' % (output_id, spec, spec)), 'wb+')

                if c.strand == "-":
                    out_files[spec].write(chrom + "\t" + str(c.src_size - c.end) + "\t" + str(c.src_size - c.start) + "\t" + spec + "_" + str(block_num) + "\t" + "0\t" + c.strand + "\n")
                else:
                    out_files[spec].write(chrom + "\t" + str(c.start) + "\t" + str(c.end) + "\t" + spec + "_" + str(block_num) + "\t" + "0\t" + c.strand + "\n")

    for file_out in out_files.keys():
        out_files[file_out].close()

    print("#FILE1_DBKEY\t%s" % (primary_spec))
Ejemplo n.º 12
0
def __main__():
        
    input_filename = sys.argv[1]
    output_filename = sys.argv[2]
    #where to store files that become additional output
    database_tmp_dir = sys.argv[5]
    
    species = sys.argv[3].split(',')
    partial = sys.argv[4]
    out_files = {}
    primary_spec = None
    
    if "None" in species:
        species = {}
        try:
            for i, m in enumerate( maf.Reader( open( input_filename, 'r' ) ) ):
                for c in m.components:
                    spec,chrom = maf.src_split( c.src )
                    if not spec or not chrom:
                        spec = chrom = c.src
                    species[spec] = ""
            species = species.keys()
        except:
            print >>sys.stderr, "Invalid MAF file specified"
            return
        
    if "?" in species:
        print >>sys.stderr, "Invalid dbkey specified"
        return
        
    
    for i in range( 0, len( species ) ):
        spec = species[i]
        if i == 0:
            out_files[spec] = open( output_filename, 'w' )
            primary_spec = spec
        else:
            out_files[spec] = tempfile.NamedTemporaryFile( mode = 'w', dir = database_tmp_dir, suffix = '.maf_to_bed' )
            filename = out_files[spec].name
            out_files[spec].close()
            out_files[spec] = open( filename, 'w' )
    num_species = len( species )
    
    print "Restricted to species:", ",".join( species )
    
    file_in = open( input_filename, 'r' )
    maf_reader = maf.Reader( file_in )
    
    block_num = -1
    
    for i, m in enumerate( maf_reader ):
        block_num += 1
        if "None" not in species:
            m = m.limit_to_species( species )
        l = m.components
        if len(l) < num_species and partial == "partial_disallowed": continue
        for c in l:
            spec,chrom = maf.src_split( c.src )
            if not spec or not chrom:
                    spec = chrom = c.src
            if spec not in out_files.keys():
                out_files[spec] = tempfile.NamedTemporaryFile( mode='w', dir = database_tmp_dir, suffix = '.maf_to_bed' )
                filename = out_files[spec].name
                out_files[spec].close()
                out_files[spec] = open( filename, 'w' )
            
            if c.strand == "-":
                out_files[spec].write( chrom + "\t" + str( c.src_size - c.end ) + "\t" + str( c.src_size - c.start ) + "\t" + spec + "_" + str( block_num ) + "\t" + "0\t" + c.strand + "\n" )
            else:
                out_files[spec].write( chrom + "\t" + str( c.start ) + "\t" + str( c.end ) + "\t" + spec + "_" + str( block_num ) + "\t" + "0\t" + c.strand + "\n" )
            
    file_in.close()
    for file_out in out_files.keys():
        out_files[file_out].close()

    for spec in out_files.keys():
        if spec != primary_spec:
            print "#FILE\t" + spec + "\t" + os.path.join( database_tmp_dir, os.path.split( out_files[spec].name )[1] )
        else:
            print "#FILE1\t" + spec + "\t" + out_files[spec].name
Ejemplo n.º 13
0
def __main__():

    input_filename = sys.argv[1]
    output_filename = sys.argv[2]
    #where to store files that become additional output
    database_tmp_dir = sys.argv[5]

    species = sys.argv[3].split(',')
    partial = sys.argv[4]
    out_files = {}
    primary_spec = None

    if "None" in species:
        species = {}
        try:
            for i, m in enumerate(maf.Reader(open(input_filename, 'r'))):
                for c in m.components:
                    spec, chrom = maf.src_split(c.src)
                    if not spec or not chrom:
                        spec = chrom = c.src
                    species[spec] = ""
            species = species.keys()
        except:
            print >> sys.stderr, "Invalid MAF file specified"
            return

    if "?" in species:
        print >> sys.stderr, "Invalid dbkey specified"
        return

    for i in range(0, len(species)):
        spec = species[i]
        if i == 0:
            out_files[spec] = open(output_filename, 'w')
            primary_spec = spec
        else:
            out_files[spec] = tempfile.NamedTemporaryFile(mode='w',
                                                          dir=database_tmp_dir,
                                                          suffix='.maf_to_bed')
            filename = out_files[spec].name
            out_files[spec].close()
            out_files[spec] = open(filename, 'w')
    num_species = len(species)

    print "Restricted to species:", ",".join(species)

    file_in = open(input_filename, 'r')
    maf_reader = maf.Reader(file_in)

    block_num = -1

    for i, m in enumerate(maf_reader):
        block_num += 1
        if "None" not in species:
            m = m.limit_to_species(species)
        l = m.components
        if len(l) < num_species and partial == "partial_disallowed": continue
        for c in l:
            spec, chrom = maf.src_split(c.src)
            if not spec or not chrom:
                spec = chrom = c.src
            if spec not in out_files.keys():
                out_files[spec] = tempfile.NamedTemporaryFile(
                    mode='w', dir=database_tmp_dir, suffix='.maf_to_bed')
                filename = out_files[spec].name
                out_files[spec].close()
                out_files[spec] = open(filename, 'w')

            if c.strand == "-":
                out_files[spec].write(chrom + "\t" + str(c.src_size - c.end) +
                                      "\t" + str(c.src_size - c.start) + "\t" +
                                      spec + "_" + str(block_num) + "\t" +
                                      "0\t" + c.strand + "\n")
            else:
                out_files[spec].write(chrom + "\t" + str(c.start) + "\t" +
                                      str(c.end) + "\t" + spec + "_" +
                                      str(block_num) + "\t" + "0\t" +
                                      c.strand + "\n")

    file_in.close()
    for file_out in out_files.keys():
        out_files[file_out].close()

    for spec in out_files.keys():
        if spec != primary_spec:
            print "#FILE\t" + spec + "\t" + os.path.join(
                database_tmp_dir,
                os.path.split(out_files[spec].name)[1])
        else:
            print "#FILE1\t" + spec + "\t" + out_files[spec].name