コード例 #1
0
ファイル: wgmlst.py プロジェクト: eva83531/Benga
def rename(query_dir, input_dir):
    namemap = {}
    for i, filename in enumerate(sorted(os.listdir(input_dir)), 1):
        file = SeqIO.parse(files.joinpath(input_dir, filename), "fasta")
        records = []
        for j, record in enumerate(file, 1):
            newid = "Genome_{i}::Contig_{j}".format(**locals())
            records.append(seq.new_record(newid, str(record.seq)))

        newname = "Genome_{i}.fa".format(**locals())
        SeqIO.write(records, files.joinpath(query_dir, newname), "fasta")
        namemap[files.replace_ext(newname)] = files.replace_ext(filename)
    return namemap
コード例 #2
0
def reference_self_blastp(output_dir, freq):
    ref_recs = [
        seq.new_record(locus,
                       counter.most_common(1)[0][0].translate(table=11))
        for locus, counter in freq.items()
    ]
    ref_length = {rec.id: len(rec.seq) for rec in ref_recs}
    ref_faa = os.path.join(output_dir, "ref_seq.faa")
    seq.save_records(ref_recs, ref_faa)

    ref_db = os.path.join(output_dir, "ref_db")
    seq.compile_blastpdb(ref_faa, ref_db)

    blastp_out_file = os.path.join(output_dir, "ref_db.blastp.out")
    seq.query_blastpdb(ref_faa, ref_db, blastp_out_file, seq.BLAST_COLUMNS)
    return blastp_out_file, ref_length
コード例 #3
0
ファイル: profiling.py プロジェクト: yuehhua/Benga
def make_ref_blastpdb(ref_db_file, database):
    query = "select loci.locus_id, alleles.peptide_seq " \
            "from loci inner join alleles " \
            "on loci.ref_allele = alleles.allele_id;"
    refs = db.from_sql(query, database=database)

    ref_recs = [
        seq.new_record(row["locus_id"], row["peptide_seq"], seqtype="protein")
        for _, row in refs.iterrows()
    ]
    ref_fasta = ref_db_file + ".fasta"
    seq.save_records(ref_recs, ref_fasta)
    ref_len = generate_allele_len(ref_recs)

    seq.compile_blastpdb(ref_fasta, ref_db_file)
    os.remove(ref_fasta)
    return ref_len
コード例 #4
0
ファイル: profiling.py プロジェクト: yuehhua/Benga
def blast_for_new_alleles(candidates, alleles, ref_db, temp_dir, ref_len):
    filename = "new_allele_candidates"
    candidate_file = os.path.join(temp_dir, filename + ".fasta")
    recs = [
        seq.new_record(cand, alleles[cand][1], seqtype="protein")
        for cand in candidates
    ]
    seq.save_records(recs, candidate_file)
    allele_len = generate_allele_len(recs)

    blastp_out_file = os.path.join(temp_dir, "{}.blastp.out".format(filename))
    seq.query_blastpdb(candidate_file, ref_db, blastp_out_file,
                       seq.BLAST_COLUMNS)

    blastp_out = filter_duplicates(blastp_out_file,
                                   allele_len,
                                   ref_len,
                                   identity=95)
    blastp_out = blastp_out.drop_duplicates("qseqid")
    new_allele_pairs = [(row["qseqid"], row["sseqid"])
                        for _, row in blastp_out.iterrows()]
    return new_allele_pairs
コード例 #5
0
def save_locusfiles(freq, locus_dir):
    for locus, counter in freq.items():
        records = [seq.new_record(operations.make_seqid(str(allele)), allele) for allele in counter.keys()]
        seq.save_records(records, files.joinpath(locus_dir, locus + ".fa"))
コード例 #6
0
def save_refseq(freq, refseq_file):
    refseqs = {locus: counter.most_common(1)[0][0] for locus, counter in freq.items()}
    records = [seq.new_record(str(locus), sequence) for locus, sequence in refseqs.items()]
    SeqIO.write(records, refseq_file, "fasta")
    return refseqs
コード例 #7
0
 def __write_new_format(self, source_file, sink_file):
     records = []
     for j, contig in enumerate(SeqIO.parse(source_file, "fasta"), 1):
         seqid = self.newseqid(j)
         records.append(seq.new_record(seqid, str(contig.seq)))
     SeqIO.write(records, sink_file, "fasta")