def get_abundance_from_fasta(fasta, regexp=r'count=(\d+\.\d+|\d+)'): abundance = {} in_fasta_handler = open(fasta, 'r') p = re.compile(regexp) for i, [header, _] in enumerate(read_fasta_file_handle(in_fasta_handler)): id = header.split()[0].strip() m = re.search(p, header) if not m: logger.fatal("Can't retrieve abundance information:\nfasta_path:%s\nfasta_header:%s" % (fasta, header)) sys.exit("Cant't retrieve abundance") abundance[id] = float(m.group(1)) in_fasta_handler.close() return abundance
def complete_fasta_with_abundance(input_fasta, output_fasta, abundance): in_fasta_handler = open(input_fasta, 'r') out_fasta_handler = open(output_fasta, 'w') for header, seq in read_fasta_file_handle(in_fasta_handler): id = header.split()[0].strip() ab = 0 if id not in abundance: logger.warning("Can't find the abundance for:%s. Set to 0 by default." % id) else: ab = abundance[id] header = '{header} count={abundance}'.format(header=header, abundance=ab) out_fasta_handler.write( '>{header}\n{seq}\n'.format(header=header, seq=format_seq(seq)) ) in_fasta_handler.close() out_fasta_handler.close()