def extract_gene(uniformize_id):
    gene_sequence = coding_gene_base[uniformize_id]
    gene_sequence.id = get_id_protein(gene_sequence.id)
    gene_sequence.name = ""
    gene_sequence.description = ""
    SeqIO.write(gene_sequence, gene_fasta_fl, "fasta")

    protein_sequence = protein_base[uniformize_id]
    protein_sequence.id = get_id_protein(protein_sequence.id)
    protein_sequence.name = ""
    protein_sequence.description = ""
    SeqIO.write(protein_sequence, protein_fasta_fl, "fasta")
def one_head(df_row):
    for R_cell in df_row:
        if R_cell[0] == 'NA' or (str(R_cell[0]) == "NA"):
            continue
        body_list = R_cell[0].split()
        if len(body_list) > 1:
            one2two()
        else:
            uniformize_id = get_id_protein(body_list[0])
            extract_gene(uniformize_id)
            yield uniformize_id
Example #3
0
def extract_gene(protein_file, gene_base_name, pan_gene_file_name,
                 gene_protein_mapping_table):
    '''
    input 1: pan protein fasta file
    input 2: merged gene file
    input 3: gene_protein_mapping_table
    output 1:pan_gene_file_name
    don't froget remove 
    '''
    gene_base = SeqIO.index(
        gene_base_name,
        "fasta",
        key_function=uniformize_gene_to_protein.get_id_gene)
    with open(gene_protein_mapping_table, 'w+') as gene_protein_fl:
        with open(pan_gene_file_name, 'w+') as out_fl:
            for protein_sequence in SeqIO.parse(protein_file, "fasta"):
                # if re.search("\.t1",protein_sequence.id) is not None: continue
                # if re.search("^70-15",protein_sequence.id) is not None: continue
                gene_id = uniformize_gene_to_protein.get_id_protein(
                    protein_sequence.id)
                gene_sequence = gene_base[gene_id]
                SeqIO.write(gene_sequence, out_fl, "fasta")
                gene_protein_fl.write("{}\t{}\n".format(
                    gene_sequence.id, protein_sequence.id))
def extract_gene_two_head(protein_id):
    gene_id = get_id_protein(protein_id)
    gene_sequence = gene_base[gene_id]
    return (gene_sequence)
def extract_gene(protein_id, out_fl):
    gene_id = get_id_protein(protein_id)
    gene_sequence = gene_base[gene_id]
    SeqIO.write(gene_sequence, out_fl, "fasta")