def extract_gene(uniformize_id): gene_sequence = coding_gene_base[uniformize_id] gene_sequence.id = get_id_protein(gene_sequence.id) gene_sequence.name = "" gene_sequence.description = "" SeqIO.write(gene_sequence, gene_fasta_fl, "fasta") protein_sequence = protein_base[uniformize_id] protein_sequence.id = get_id_protein(protein_sequence.id) protein_sequence.name = "" protein_sequence.description = "" SeqIO.write(protein_sequence, protein_fasta_fl, "fasta")
def one_head(df_row): for R_cell in df_row: if R_cell[0] == 'NA' or (str(R_cell[0]) == "NA"): continue body_list = R_cell[0].split() if len(body_list) > 1: one2two() else: uniformize_id = get_id_protein(body_list[0]) extract_gene(uniformize_id) yield uniformize_id
def extract_gene(protein_file, gene_base_name, pan_gene_file_name, gene_protein_mapping_table): ''' input 1: pan protein fasta file input 2: merged gene file input 3: gene_protein_mapping_table output 1:pan_gene_file_name don't froget remove ''' gene_base = SeqIO.index( gene_base_name, "fasta", key_function=uniformize_gene_to_protein.get_id_gene) with open(gene_protein_mapping_table, 'w+') as gene_protein_fl: with open(pan_gene_file_name, 'w+') as out_fl: for protein_sequence in SeqIO.parse(protein_file, "fasta"): # if re.search("\.t1",protein_sequence.id) is not None: continue # if re.search("^70-15",protein_sequence.id) is not None: continue gene_id = uniformize_gene_to_protein.get_id_protein( protein_sequence.id) gene_sequence = gene_base[gene_id] SeqIO.write(gene_sequence, out_fl, "fasta") gene_protein_fl.write("{}\t{}\n".format( gene_sequence.id, protein_sequence.id))
def extract_gene_two_head(protein_id): gene_id = get_id_protein(protein_id) gene_sequence = gene_base[gene_id] return (gene_sequence)
def extract_gene(protein_id, out_fl): gene_id = get_id_protein(protein_id) gene_sequence = gene_base[gene_id] SeqIO.write(gene_sequence, out_fl, "fasta")