def write_in_fastafile(filename, res, min_length=50):
     corpus = []
     labels = []
     for seq, score, pval, _, _ in res:
         if len(seq) > min_length and pval < 0.05:
             corpus.append(seq)
             labels.append(' '.join(
                 ['+' if score > 0 else '-', 'p-val:' + str(pval)]))
     FileUtility.create_fasta_file(filename, corpus, labels)
    def align_markers(self, p_value_threshold):
        final_results = []
        for idx, (seq, description) in tqdm.tqdm(self.seq_IDS.items()):
            pval = float(description.split(':')[1])
            if pval <= p_value_threshold:
                FileUtility.create_fasta_file('temp.fasta', [seq], ['temp'])
                blastx_cline = NcbiblastnCommandline(
                    query='temp.fasta',
                    db=
                    "/mounts/data/proj/asgari/dissertation/git_repos/16S_datasets/EZ/raw/eztaxon_qiime_full.fasta",
                    evalue=0.001,
                    outfmt=5,
                    out="temp.xml")
                blastx_cline()
                f = open("temp.xml", 'r')
                blast_records = NCBIXML.parse(f)
                flag = False
                score = -1
                alignment_length = -1
                results = []
                for blast_record in blast_records:
                    for alignment in blast_record.alignments:
                        for hsp in alignment.hsps:
                            if not flag and score == -1:
                                score = hsp.score
                                alignment_length = hsp.align_length
                                flag = True
                            if hsp.score >= score and hsp.align_length >= alignment_length and 'Eukarya' not in self.ez_taxa_dict[
                                    alignment.hit_id]:
                                results.append(
                                    (self.ez_taxa_dict[alignment.hit_id],
                                     hsp.expect))
                if len(results) > 0:
                    res = self.lowest_certain_level(results)
                    if res:
                        final_results.append(
                            (seq, self.refine_ez_taxonomy(res) + idx[-1],
                             pval))
                    else:
                        final_results.append((seq, 'ZZZNOVEL' + idx[-1], pval))
                else:
                    final_results.append((seq, 'ZZZNOVEL' + idx[-1], pval))

        # sorted markers by the taxonomy information of the last certain level
        self.aligned_markers = sorted(final_results,
                                      key=operator.itemgetter(1),
                                      reverse=False)
        self.min_p_value = p_value_threshold
        self.update_matrix_by_markers()
    def _perform_alignment(self, idx__seq_discrpt):

        idx, (seq, description) = idx__seq_discrpt
        pval = float(description.split(':')[1])
        final_results = []
        if pval <= self.p_value_threshold:
            FileUtility.create_fasta_file('../tmp/temp' + str(idx) + '.fasta',
                                          [seq], ['temp'])
            blastx_cline = NcbiblastnCommandline(
                query='../tmp/temp' + str(idx) + '.fasta',
                db=
                "/mounts/data/proj/asgari/dissertation/git_repos/16S_datasets/EZ/raw/eztaxon_qiime_full.fasta",
                evalue=0.001,
                outfmt=5,
                out='../tmp/temp' + str(idx) + '.xml')
            blastx_cline()
            f = open('../tmp/temp' + str(idx) + '.xml', 'r')
            blast_records = NCBIXML.parse(f)
            flag = False
            score = -1
            alignment_length = -1
            results = []
            for blast_record in blast_records:
                for alignment in blast_record.alignments:
                    for hsp in alignment.hsps:
                        if not flag and score == -1:
                            score = hsp.score
                            alignment_length = hsp.align_length
                            flag = True
                        if hsp.score >= score and hsp.align_length >= alignment_length and 'Eukarya' not in self.ez_taxa_dict[
                                alignment.hit_id]:
                            results.append(
                                (self.ez_taxa_dict[alignment.hit_id],
                                 hsp.expect))
            if len(results) > 0:
                res = self.lowest_certain_level(results)
                if res:
                    final_results = (seq,
                                     self.refine_ez_taxonomy(res) + idx[-1],
                                     pval)
                else:
                    final_results = (seq, 'ZZZNOVEL' + idx[-1], pval)
            else:
                final_results = (seq, 'ZZZNOVEL' + idx[-1], pval)
        return final_results