def peak2fasta(peak_ids, ref_genome): ''' Convert peak_id into fasta object. Args: peak_id (str or list of str): Peak_id. e.g. "chr5_0930303_9499409" or it can be a list of peak_id. e.g. ["chr5_0930303_9499409", "chr11_123445555_123445577"] ref_genome (str): Reference genome name. e.g. "mm9", "mm10", "hg19" etc Returns: gimmemotifs fasta object: DNA sequence in fasta format ''' genome_data = Genome(ref_genome) def peak2seq(peak_id): chromosome_name, start, end = decompose_chrstr(peak_id) locus = (int(start), int(end)) tmp = genome_data[chromosome_name][locus[0]:locus[1]] name = f"{tmp.name}_{tmp.start}_{tmp.end}" seq = tmp.seq return (name, seq) if type(peak_ids) is str: peak_ids = [peak_ids] fasta = Fasta() for peak_id in peak_ids: name, seq = peak2seq(peak_id) fasta.add(name, seq) return fasta
def remove_zero_seq(fasta_object): """ Remove DNA sequence with zero length """ fasta = Fasta() for i, seq in enumerate(fasta_object.seqs): if seq: name = fasta_object.ids[i] fasta.add(name, seq) return fasta