Example #1
0
def __main__():
    input_filename = sys.argv[1]
    output_filename = sys.argv[2]
    output_id = sys.argv[3]
    # where to store files that become additional output
    database_tmp_dir = sys.argv[4]
    primary_spec = sys.argv[5]
    species = sys.argv[6].split( ',' )
    all_species = sys.argv[7].split( ',' )
    partial = sys.argv[8]
    keep_gaps = sys.argv[9]
    out_files = {}

    if "None" in species:
        species = []

    if primary_spec not in species:
        species.append( primary_spec )
    if primary_spec not in all_species:
        all_species.append( primary_spec )

    all_species.sort()
    for spec in species:
        if spec == primary_spec:
            out_files[ spec ] = open( output_filename, 'wb+' )
        else:
            out_files[ spec ] = open( os.path.join( database_tmp_dir, 'primary_%s_%s_visible_interval_%s' % ( output_id, spec, spec ) ), 'wb+' )
        out_files[ spec ].write( '#chrom\tstart\tend\tstrand\tscore\tname\t%s\n' % ( '\t'.join( all_species ) ) )
    num_species = len( all_species )

    file_in = open( input_filename, 'r' )
    maf_reader = maf.Reader( file_in )

    for i, m in enumerate( maf_reader ):
        for j, block in enumerate( maf_utilities.iter_blocks_split_by_species( m ) ):
            if len( block.components ) < num_species and partial == "partial_disallowed":
                continue
            sequences = {}
            for c in block.components:
                spec, chrom = maf_utilities.src_split( c.src )
                if keep_gaps == 'remove_gaps':
                    sequences[ spec ] = c.text.replace( '-', '' )
                else:
                    sequences[ spec ] = c.text
            sequences = '\t'.join( [ sequences.get( _, '' ) for _ in all_species ] )
            for spec in species:
                c = block.get_component_by_src_start( spec )
                if c is not None:
                    spec2, chrom = maf_utilities.src_split( c.src )
                    assert spec2 == spec, Exception( 'Species name inconsistancy found in component: %s != %s' % ( spec, spec2 ) )
                    out_files[ spec ].write( "%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % ( chrom, c.forward_strand_start, c.forward_strand_end, c.strand, m.score, "%s_%s_%s" % (spec, i, j), sequences ) )
    file_in.close()
    for file_out in out_files.values():
        file_out.close()
Example #2
0
def __main__():
    index_location_file = sys.argv[1]
    for i, line in enumerate(open(index_location_file)):
        try:
            if line.startswith('#'):
                continue
            display_name, uid, indexed_for_species, species_exist, maf_files = line.rstrip(
            ).split('\t')
            indexed_for_species = indexed_for_species.split(',')
            species_exist = species_exist.split(',')
            maf_files = maf_files.split(',')
            species_indexed_in_maf = []
            species_found_in_maf = []
            for maf_file in maf_files:
                indexed_maf = bx.align.maf.MAFIndexedAccess(maf_file,
                                                            keep_open=True,
                                                            parse_e_rows=False)
                for key in indexed_maf.indexes.indexes.keys():
                    spec = maf_utilities.src_split(key)[0]
                    if spec not in species_indexed_in_maf:
                        species_indexed_in_maf.append(spec)
                while True:  # reading entire maf set will take some time
                    block = indexed_maf.read_at_current_offset(indexed_maf.f)
                    if block is None:
                        break
                    for comp in block.components:
                        spec = maf_utilities.src_split(comp.src)[0]
                        if spec not in species_found_in_maf:
                            species_found_in_maf.append(spec)
            # indexed species
            for spec in indexed_for_species:
                if spec not in species_indexed_in_maf:
                    print(
                        "Line %i, %s claims to be indexed for %s, but indexes do not exist."
                        % (i, uid, spec))
            for spec in species_indexed_in_maf:
                if spec not in indexed_for_species:
                    print(
                        "Line %i, %s is indexed for %s, but is not listed in loc file."
                        % (i, uid, spec))
            # existing species
            for spec in species_exist:
                if spec not in species_found_in_maf:
                    print(
                        "Line %i, %s claims to have blocks for %s, but was not found in MAF files."
                        % (i, uid, spec))
            for spec in species_found_in_maf:
                if spec not in species_exist:
                    print(
                        "Line %i, %s contains %s, but is not listed in loc file."
                        % (i, uid, spec))
        except Exception as e:
            print("Line %i is invalid: %s" % (i, e))
Example #3
0
def __main__():
    index_location_file = sys.argv[ 1 ]
    for i, line in enumerate( open( index_location_file ) ):
        try:
            if line.startswith( '#' ):
                continue
            display_name, uid, indexed_for_species, species_exist, maf_files = line.rstrip().split('\t')
            indexed_for_species = indexed_for_species.split( ',' )
            species_exist = species_exist.split( ',' )
            maf_files = maf_files.split( ',' )
            species_indexed_in_maf = []
            species_found_in_maf = []
            for maf_file in maf_files:
                indexed_maf = bx.align.maf.MAFIndexedAccess( maf_file, keep_open=True, parse_e_rows=False )
                for key in indexed_maf.indexes.indexes.keys():
                    spec = maf_utilities.src_split( key )[0]
                    if spec not in species_indexed_in_maf:
                        species_indexed_in_maf.append( spec )
                while True:  # reading entire maf set will take some time
                    block = indexed_maf.read_at_current_offset( indexed_maf.f )
                    if block is None:
                        break
                    for comp in block.components:
                        spec = maf_utilities.src_split( comp.src )[0]
                        if spec not in species_found_in_maf:
                            species_found_in_maf.append( spec )
            # indexed species
            for spec in indexed_for_species:
                if spec not in species_indexed_in_maf:
                    print "Line %i, %s claims to be indexed for %s, but indexes do not exist." % ( i, uid, spec )
            for spec in species_indexed_in_maf:
                if spec not in indexed_for_species:
                    print "Line %i, %s is indexed for %s, but is not listed in loc file." % ( i, uid, spec )
            # existing species
            for spec in species_exist:
                if spec not in species_found_in_maf:
                    print "Line %i, %s claims to have blocks for %s, but was not found in MAF files." % ( i, uid, spec )
            for spec in species_found_in_maf:
                if spec not in species_exist:
                    print "Line %i, %s contains %s, but is not listed in loc file." % ( i, uid, spec )
        except Exception as e:
            print "Line %i is invalid: %s" % ( i, e )
def __main__():
    try:
        maf_reader = maf.Reader(open(sys.argv[1]))
    except Exception as e:
        maf_utilities.tool_fail("Error opening input MAF: %s" % e)
    try:
        file_out = open(sys.argv[2], 'w')
    except Exception as e:
        maf_utilities.tool_fail("Error opening file for output: %s" % e)
    try:
        species = maf_utilities.parse_species_option(sys.argv[3])
        if species:
            num_species = len(species)
        else:
            num_species = 0
    except Exception as e:
        maf_utilities.tool_fail("Error determining species value: %s" % e)
    try:
        partial = sys.argv[4]
    except Exception as e:
        maf_utilities.tool_fail("Error determining keep partial value: %s" % e)

    if species:
        print("Restricted to species: %s" % ', '.join(species))
    else:
        print("Not restricted to species.")

    for block_num, block in enumerate(maf_reader):
        if species:
            block = block.limit_to_species(species)
            if len(maf_utilities.get_species_in_block(
                    block)) < num_species and partial == "partial_disallowed":
                continue
        spec_counts = {}
        for component in block.components:
            spec, chrom = maf_utilities.src_split(component.src)
            if spec not in spec_counts:
                spec_counts[spec] = 0
            else:
                spec_counts[spec] += 1
            file_out.write("%s\n" % maf_utilities.get_fasta_header(
                component, {
                    'block_index': block_num,
                    'species': spec,
                    'sequence_index': spec_counts[spec]
                },
                suffix="%s_%i_%i" % (spec, block_num, spec_counts[spec])))
            file_out.write("%s\n" % component.text)
        file_out.write("\n")
    file_out.close()
def __main__():
    output_name = sys.argv.pop(1)
    input_name = sys.argv.pop(1)
    species = sys.argv.pop(1)
    out = open(output_name,'w')
    count = 0
    #write interval header line
    out.write( "#chrom\tstart\tend\tstrand\n" )
    try:
        for block in bx.align.maf.Reader( open( input_name, 'r' ) ):
            for c in maf_utilities.iter_components_by_src_start( block, species ):
                if c is not None:
                    out.write( "%s\t%i\t%i\t%s\n" %  ( maf_utilities.src_split( c.src )[-1], c.get_forward_strand_start(), c.get_forward_strand_end(), c.strand ) )
                    count += 1
    except Exception, e:
        print >> sys.stderr, "There was a problem processing your input: %s" % e
def __main__():
    output_name = sys.argv.pop(1)
    input_name = sys.argv.pop(1)
    count = 0
    with open(output_name, 'w') as out:
        for count, block in enumerate(bx.align.maf.Reader(open(input_name, 'r'))):
            spec_counts = {}
            for c in block.components:
                spec, chrom = maf_utilities.src_split(c.src)
                if spec not in spec_counts:
                    spec_counts[spec] = 0
                else:
                    spec_counts[spec] += 1
                out.write("%s\n" % maf_utilities.get_fasta_header(c, {'block_index' : count, 'species' : spec, 'sequence_index' : spec_counts[spec]}, suffix="%s_%i_%i" % (spec, count, spec_counts[spec])))
                out.write("%s\n" % c.text)
            out.write("\n")
    print("%i MAF blocks converted to FASTA." % (count))
Example #7
0
def __main__():
    output_name = sys.argv.pop(1)
    input_name = sys.argv.pop(1)
    out = open( output_name, 'w' )
    count = 0
    for count, block in enumerate( bx.align.maf.Reader( open( input_name, 'r' ) ) ):
        spec_counts = {}
        for c in block.components:
            spec, chrom = maf_utilities.src_split( c.src )
            if spec not in spec_counts:
                spec_counts[ spec ] = 0
            else:
                spec_counts[ spec ] += 1
            out.write( "%s\n" % maf_utilities.get_fasta_header( c, { 'block_index' : count, 'species' : spec, 'sequence_index' : spec_counts[ spec ] }, suffix="%s_%i_%i" % ( spec, count, spec_counts[ spec ] ) ) )
            out.write( "%s\n" % c.text )
        out.write( "\n" )
    out.close()
    print "%i MAF blocks converted to FASTA." % ( count )
Example #8
0
def __main__():
    output_name = sys.argv.pop(1)
    input_name = sys.argv.pop(1)
    species = sys.argv.pop(1)
    out = open(output_name, 'w')
    count = 0
    # write interval header line
    out.write( "#chrom\tstart\tend\tstrand\n" )
    try:
        for block in bx.align.maf.Reader( open( input_name, 'r' ) ):
            for c in maf_utilities.iter_components_by_src_start( block, species ):
                if c is not None:
                    out.write( "%s\t%i\t%i\t%s\n" % ( maf_utilities.src_split( c.src )[-1], c.get_forward_strand_start(), c.get_forward_strand_end(), c.strand ) )
                    count += 1
    except Exception as e:
        print >> sys.stderr, "There was a problem processing your input: %s" % e
    out.close()
    print "%i MAF blocks converted to Genomic Intervals for species %s." % ( count, species )
def __main__():
    output_name = sys.argv.pop(1)
    input_name = sys.argv.pop(1)
    species = sys.argv.pop(1)
    count = 0
    with open(output_name, 'w') as out:
        # write interval header line
        out.write("#chrom\tstart\tend\tstrand\n")
        try:
            with open(input_name, 'r') as fh:
                for block in bx.align.maf.Reader(fh):
                    for c in maf_utilities.iter_components_by_src_start(block, species):
                        if c is not None:
                            out.write("%s\t%i\t%i\t%s\n" % (maf_utilities.src_split(c.src)[-1], c.get_forward_strand_start(), c.get_forward_strand_end(), c.strand))
                            count += 1
        except Exception as e:
            print("There was a problem processing your input: %s" % e, file=sys.stderr)
    print("%i MAF blocks converted to Genomic Intervals for species %s." % (count, species))
def __main__():
    try:
        maf_reader = maf.Reader(open(sys.argv[1]))
    except Exception as e:
        maf_utilities.tool_fail("Error opening input MAF: %s" % e)
    try:
        file_out = open(sys.argv[2], 'w')
    except Exception as e:
        maf_utilities.tool_fail("Error opening file for output: %s" % e)
    try:
        species = maf_utilities.parse_species_option(sys.argv[3])
        if species:
            num_species = len(species)
        else:
            num_species = 0
    except Exception as e:
        maf_utilities.tool_fail("Error determining species value: %s" % e)
    try:
        partial = sys.argv[4]
    except Exception as e:
        maf_utilities.tool_fail("Error determining keep partial value: %s" % e)

    if species:
        print("Restricted to species: %s" % ', '.join(species))
    else:
        print("Not restricted to species.")

    for block_num, block in enumerate(maf_reader):
        if species:
            block = block.limit_to_species(species)
            if len(maf_utilities.get_species_in_block(block)) < num_species and partial == "partial_disallowed":
                continue
        spec_counts = {}
        for component in block.components:
            spec, chrom = maf_utilities.src_split(component.src)
            if spec not in spec_counts:
                spec_counts[spec] = 0
            else:
                spec_counts[spec] += 1
            d = OrderedDict([('block_index', block_num), ('species', spec), ('sequence_index', spec_counts[spec])])
            file_out.write("%s\n" % maf_utilities.get_fasta_header(component, d, suffix="%s_%i_%i" % (spec, block_num, spec_counts[spec])))
            file_out.write("%s\n" % component.text)
        file_out.write("\n")
    file_out.close()
            num_species = 0
    except Exception, e:
        maf_utilities.tool_fail( "Error determining species value: %s" % e )
    try:
        partial = sys.argv[4]
    except Exception, e:
        maf_utilities.tool_fail( "Error determining keep partial value: %s" % e )
    
    if species:
        print "Restricted to species: %s" % ', '.join( species )
    else:
        print "Not restricted to species."
    
    for block_num, block in enumerate( maf_reader ):
        if species:
            block = block.limit_to_species( species )
            if len( maf_utilities.get_species_in_block( block ) ) < num_species and partial == "partial_disallowed": continue
        spec_counts = {}
        for component in block.components:
            spec, chrom = maf_utilities.src_split( component.src )
            if spec not in spec_counts:
                spec_counts[ spec ] = 0
            else:
                spec_counts[ spec ] += 1
            file_out.write( "%s\n" % maf_utilities.get_fasta_header( component, { 'block_index' : block_num, 'species' : spec, 'sequence_index' : spec_counts[ spec ] }, suffix = "%s_%i_%i" % ( spec, block_num, spec_counts[ spec ] ) ) )
            file_out.write( "%s\n" % component.text )
        file_out.write( "\n" )
    file_out.close()

if __name__ == "__main__": __main__()