Esempio n. 1
0
    def GetExec(self, optList, frame):
        # Respond to the "embossn" type command.
        self.frame = frame
        plugin_exe = r"C:/mEMBOSS/needle.exe"
        self.outfile = r"C:\Users\francis\Documents\Monguis\BioGui\plugins\needle.txt"
        self.outtype = "fasta"
        cline = NeedleCommandline(plugin_exe, asequence=str(self.frame.paramBoxes[1].GetValue()), bsequence=str(self.frame.paramBoxes[3].GetValue()))
        cline.outfile = self.outfile
        cline.gapopen = self.param[7].GetValue()
        cline.gapextend = self.param[9].GetValue()
        if self.param[10].GetValue():
            cline.similarity = True
        else:
            cline.similarity = False

        if self.frame.abet=="AA":
            cline.snucleotide = False
            cline.sprotein = True
        elif self.frame.abet=="DNA" or self.frame.abet=="RNA":
            cline.snucleotide = True
            cline.sprotein = False
        if self.frame.options:
            t = self.boxList[3].GetValue()
            if t != '':
                cline.datafile = str(t)   
        return str(cline)
    def out():
        filename1 = e1.get()
        filename2 = e2.get()
        outfile = e3.get()
        needle_cline = NeedleCommandline()
        needle_cline.asequence = filename1
        needle_cline.bsequence = filename2
        needle_cline.gapopen = int(gapopen)
        needle_cline.gapextend = int(gapextend)
        needle_cline.outfile = "needle.txt"
        print(needle_cline)
        print(needle_cline.outfile)
        stdout, stderr = needle_cline()
        print(stdout + stderr)
        align = AlignIO.read("needle.txt", "emboss")
        file = open("needle.txt", "r")
        # print(file.read())
        view = ("\n\n%s" % file.read())
        with open(outfile, "w") as f:
            f.write(view)

        root = Tk()
        S = Scrollbar(root)
        T = Text(root, height=50, width=500)
        S.pack(side=RIGHT, fill=Y)
        T.pack(side=LEFT, fill=Y)
        S.config(command=T.yview)
        S.config(command=T.xview)
        T.config(yscrollcommand=S.set)
        T.config(xscrollcommand=S.set)
        quote = view
        T.insert(END, quote, 'color')
        mainloop()

        win.destroy()
Esempio n. 3
0
    def GetExec(self, optList, frame):
        # Respond to the "embossn" type command.
        self.frame = frame
        plugin_exe = r"C:/mEMBOSS/needle.exe"
        self.outfile = r"C:\Users\francis\Documents\Monguis\BioGui\plugins\needle.txt"
        self.outtype = "fasta"
        cline = NeedleCommandline(
            plugin_exe,
            asequence=str(self.frame.paramBoxes[1].GetValue()),
            bsequence=str(self.frame.paramBoxes[3].GetValue()))
        cline.outfile = self.outfile
        cline.gapopen = self.param[7].GetValue()
        cline.gapextend = self.param[9].GetValue()
        if self.param[10].GetValue():
            cline.similarity = True
        else:
            cline.similarity = False

        if self.frame.abet == "AA":
            cline.snucleotide = False
            cline.sprotein = True
        elif self.frame.abet == "DNA" or self.frame.abet == "RNA":
            cline.snucleotide = True
            cline.sprotein = False
        if self.frame.options:
            t = self.boxList[3].GetValue()
            if t != '':
                cline.datafile = str(t)
        return str(cline)
Esempio n. 4
0
def each_seq_align(each_id=0, record_list=list(), pair_aln_dir=''):
    # prepare pairwise sequence alignment files
    tmp_a_seq = os.path.join(pair_aln_dir, '{0}_a.fasta'.format(str(each_id)))
    SeqIO.write(record_list[0], tmp_a_seq, 'fasta')
    tmp_b_seq = os.path.join(pair_aln_dir, '{0}_b.fasta'.format(str(each_id)))
    SeqIO.write(record_list[1], tmp_b_seq, 'fasta')
    result_file = os.path.join(pair_aln_dir, "{0}.txt".format(str(each_id)))
    needle_cline = NeedleCommandline()
    needle_cline.asequence = tmp_a_seq
    needle_cline.bsequence = tmp_b_seq
    needle_cline.gapopen = 10
    needle_cline.gapextend = 0.5
    needle_cline.outfile = result_file
    devnull = open(os.devnull, 'w')
    try:
        subprocess.call(str(needle_cline),
                        shell=True,
                        stdout=devnull,
                        stderr=devnull)
    except OSError:
        sys.exit(1)
    os.remove(tmp_a_seq)
    os.remove(tmp_b_seq)
    in_pattern = re.compile(r'Identity.*\((\d+\.\d+)%\)')
    gene_alignment_result = ''
    with open(result_file, 'r') as f1:
        for a_line in f1.readlines():
            if 'Identity' in a_line:
                m = re.search(in_pattern, a_line.strip())
                similarity = m.group(1)
                gene_alignment_result = '{0}\t{1}\n'.format(
                    str(each_id), str(similarity))
    os.remove(result_file)
    with open(result_file, 'w') as f2:
        f2.write(gene_alignment_result)
Esempio n. 5
0
def needleAlign(seq1, seq2, gapopen, gapextend):
    needle = NeedleCommandline()
    needle.asequence = seq1
    needle.bsequence = seq2
    needle.gapopen = gapopen
    needle.gapextend = gapextend
    needle.outfile = "needle.txt"

    stdout, stderr = needle()
    print(stdout)
Esempio n. 6
0
 def build_target_matrix(self):
     if self.mysubject_file.read().count(
             '>') > self.mytarget_file.read().count('>'):
         # Ensures the subject file is the smaller one
         (self.mysubject_file, self.mytarget_file) = (self.mytarget_file,
                                                      self.mysubject_file)
         (self.subjectname, self.targetname) = (self.targetname,
                                                self.subjectname)
     self.mysubject_file.seek(0)
     matrixfile = self.outdir + '/matrix'
     if os.path.exists(matrixfile):
         self.assignments = pickle.load(open(matrixfile))
         return
     count = self.mysubject_file.read().count('>')
     results = re.compile(
         r'# 2: (\w+).+?# Gaps:\s+\d+/\d+ \((\d+\.\d+)%\).+?# Score: (\d+\.\d+)',
         re.DOTALL)
     needle = NeedleCommandline()
     needle.gapopen = self.gapopen
     needle.gapextend = self.gapextend
     needle.outfile = 'stdout'
     needle.bsequence = self.mytarget_file.name
     mytargets = SeqIO.parse(self.mytarget_file.name, 'fasta')
     mytargets = SeqIO.to_dict(mytargets)
     mysubjects = SeqIO.parse(self.mysubject_file.name, 'fasta')
     print "Creating alignment matrix. Please wait..."
     pbar = ProgressBar(widgets=[Percentage(), Bar()], maxval=count).start()
     for i, subject in enumerate(mysubjects):
         needle.asequence = 'asis:%s' % str(subject.seq)
         (stdout, stderr) = needle()
         gaps = results.findall(stdout)
         gaps.sort(key=lambda x: float(x[2]), reverse=True)
         self.assignments.setdefault(subject.id,
                                     []).extend(gaps[0:self.assign])
         pbar.update(i + 1)
     #pbar.finish()
     '''
     outfile = tempfile.NamedTemporaryFile(delete=False)
     mycmd = 'ggsearch36 -s BL62 -m 8 -w 80 -f -8 -g -2 -b=3 -3 -k 500 %s %s'%(self.mysubject_file.name,self.mytarget_file.name)
     handle = subprocess.Popen(mycmd,shell=True,stdout=subprocess.PIPE)
     (stdout,stderr) = handle.communicate()
     results = re.compile('#.+?(^[^#].+?)#',re.MULTILINE|re.DOTALL)
     res = results.findall(stdout+"#")
     rows = [i.split('\n') for i in res]
     myrows = []
     for row in rows:
         row.pop()
         row.sort(key=lambda x:float(x.split('\t')[-1]),reverse=True)
         myrows.append(row[0:self.assign])
     for i in myrows:
         [self.assignments.setdefault(sub,[]).append(tar) for sub,tar in [x.split('\t')[0:2] for x in i if bool(re.search('\t',x))]]
     '''
     pickle.dump(self.assignments, open(matrixfile, 'w'))
     return
Esempio n. 7
0
if __name__ == "__main__":
    with open(os.path.join('data', 'rosalind_need.txt')) as dataset:
        ids = dataset.read().split()

    handle = Entrez.efetch(db='nucleotide', id=ids, rettype="fasta")
    records = list(SeqIO.parse(handle, 'fasta'))

    for i, r in enumerate(records):
        with open(ids[i], 'w') as f:
            SeqIO.write(r, f, 'fasta')

    needle_cline = NeedleCommandline()
    needle_cline.asequence = ids[0]
    needle_cline.bsequence = ids[1]
    needle_cline.outfile = "need.txt"
    needle_cline.gapopen = 11
    needle_cline.gapextend = 1
    needle_cline.endopen = 11
    needle_cline.endextend = 1
    needle_cline.endweight = True

    needle_cline()

    with open('need.txt') as f:
        output = f.readlines()

    for line in output:
        if 'Score:' in line:
            print(int(float(line[:-1].split(':')[-1].strip())))
Esempio n. 8
0
def each_needle_run(pair_gene_dir, tmp_gene_converted_dir,
                    pair_gene_alignment_dir, og_id, strain_dict):
    """
    This function is used to call Needle program to do pairwise sequence alignment
    :param pair_gene_dir: each homologous gene directory
    :param tmp_gene_converted_dir: used to put some temporary files and will be deleted in the end
    :param pair_gene_alignment_dir: each orthologous gene pair-wised alignment directory
    :param og_id: each orthologous gene id
    :param strain_dict: inherit from load_strains_label function with strain information
    :return: the alignment result of each gene
    """
    if not os.path.exists(pair_gene_dir):
        logger.error("There is no directory contains gene file, please check.")
        logger.error(last_exception())
        sys.exit(1)
    tmp_gene_fasta = os.path.join(pair_gene_dir, og_id + '.fasta')
    converted_records = []
    re_pattern = re.compile(r'fig\|(\d+\.\d+)\.peg\.(\d+)\s(.*)')
    in_pattern = re.compile(r'Identity.*\((\d+\.\d+)%\)')
    annotation = ''
    og_list = []
    for record in SeqIO.parse(tmp_gene_fasta, 'fasta'):
        m = re.search(re_pattern, record.description)
        strain_id = m.group(1)
        gene_id = '{0}.peg.{1}'.format(strain_id, m.group(2))
        og_list.append(gene_id)
        annotation = m.group(3)
        record.id = strain_dict[strain_id][0]
        final_record = SeqRecord(record.seq, record.id, description='')
        converted_records.append(final_record)
    the_strain_fasta = os.path.join(tmp_gene_converted_dir, 'a.fasta')
    other_strain_fasta = os.path.join(tmp_gene_converted_dir, 'b.fasta')
    SeqIO.write(converted_records[0], the_strain_fasta, 'fasta')
    SeqIO.write(converted_records[1], other_strain_fasta, 'fasta')
    result_file = os.path.join(pair_gene_alignment_dir,
                               "{0}.txt".format(og_id))
    needle_cline = NeedleCommandline()
    needle_cline.asequence = the_strain_fasta
    needle_cline.bsequence = other_strain_fasta
    needle_cline.gapopen = 10
    needle_cline.gapextend = 0.5
    needle_cline.outfile = result_file
    devnull = open(os.devnull, 'w')
    try:
        subprocess.call(str(needle_cline),
                        shell=True,
                        stdout=devnull,
                        stderr=devnull)
    except OSError:
        logger.info(
            'Try to call Needle program failed, please check if Needle has been installed successfully.'
        )
        logger.error(last_exception())
        sys.exit(1)
    os.remove(the_strain_fasta)
    os.remove(other_strain_fasta)
    gene_alignment_result = ''
    with open(result_file, 'r') as f:
        for a_line in f.readlines():
            if 'Identity' in a_line:
                m = re.search(in_pattern, a_line.strip())
                identity = m.group(1)
                gene_alignment_result = '{0}\t[{1}|{2}]\t{3}\t{4}\n'.format(
                    og_id, og_list[0], og_list[1], identity, annotation)
    return gene_alignment_result
Esempio n. 9
0
Entrez.email = "*****@*****.**"

if __name__ == "__main__":
    with open('rosalind_need.txt') as dataset:
        ids = dataset.read().split()

    handle = Entrez.efetch(db='nucleotide', id=ids, rettype="fasta")
    records = list(SeqIO.parse(handle, 'fasta'))

    for i, r in enumerate(records):
        with open(ids[i], 'w') as f:
            SeqIO.write(r, f, 'fasta')

    needle_cline = NeedleCommandline()
    needle_cline.asequence = ids[0]
    needle_cline.bsequence = ids[1]
    needle_cline.outfile = "rosalind_need_output.txt"
    needle_cline.gapopen = 10
    needle_cline.gapextend = 1
    needle_cline.endopen = 10
    needle_cline.endextend = 1
    needle_cline.endweight = True
    needle_cline()

    with open('rosalind_need_output.txt') as f:
        output = f.readlines()

    for line in output:
        if 'Score:' in line:
            print(int(float(line[:-1].split(':')[-1].strip())))
Esempio n. 10
0
handle = Entrez.efetch(db='nucleotide', id=ids, rettype='fasta')
records = list(SeqIO.parse(handle, 'fasta'))

for i, record in enumerate(records):
    with open(ids[i], 'w') as f:
        SeqIO.write(record, f, 'fasta')

# Following step can be done in command line
# using 'needle'
needle_cl = NeedleCommandline()
needle_cl.asequence = ids[0]
needle_cl.bsequence = ids[1]
needle_cl.outfile = 'need_output.txt'

needle_cl.gapopen = 10
needle_cl.gapextend = 1
needle_cl.endopen = 10
needle_cl.endextend = 1
needle_cl.endweight = True

needle_cl()

# Score
with open('need_output.txt', 'r') as f:
    content = f.readlines()

for line in content:
    if 'Score' in line:
        score = float(line.split()[-1])
        print(score)
        break