Example #1
0
def get_abundance_from_fasta(fasta, regexp=r'count=(\d+\.\d+|\d+)'):
    abundance = {}
    in_fasta_handler = open(fasta, 'r')
    p = re.compile(regexp)
    for i, [header, _] in enumerate(read_fasta_file_handle(in_fasta_handler)):
        id = header.split()[0].strip()
        m = re.search(p, header)
        if not m:
            logger.fatal("Can't retrieve abundance information:\nfasta_path:%s\nfasta_header:%s" % (fasta, header))
            sys.exit("Cant't retrieve abundance")
        abundance[id] = float(m.group(1))
    in_fasta_handler.close()
    return abundance
Example #2
0
def complete_fasta_with_abundance(input_fasta, output_fasta, abundance):
    in_fasta_handler = open(input_fasta, 'r')
    out_fasta_handler = open(output_fasta, 'w')

    for header, seq in read_fasta_file_handle(in_fasta_handler):
        id = header.split()[0].strip()
        ab = 0
        if id not in abundance:
            logger.warning("Can't find the abundance for:%s. Set to 0 by default." % id)
        else:
            ab = abundance[id]
        header = '{header} count={abundance}'.format(header=header, abundance=ab)
        out_fasta_handler.write( '>{header}\n{seq}\n'.format(header=header, seq=format_seq(seq)) )

    in_fasta_handler.close()
    out_fasta_handler.close()