Ejemplo n.º 1
0
def align_strand(al_info):
    '''
    '''
    from Bio.Emboss.Applications import NeedleCommandline
    import subprocess

    ref_file = al_info['ref_file']
    in_file = al_info['in_file']
    out_file = al_info['out_file']

    cline = NeedleCommandline(gapopen=10.0, gapextend=1.0)
    cline.asequence = ref_file
    cline.bsequence = in_file
    cline.outfile = out_file
    cline.aformat = 'markx10'
    cml = str(cline) + ' -adesshow3 -auto'
    logfun.info(cml)
    try:
        retcode = subprocess.call(cml, shell=True)
        if retcode < 0:
            logfun.info('Child diri_sampler was terminated by signal %d' -
                        retcode)
        else:
            logfun.info('Child diri_sampler returned %d' % retcode)
    except OSError, ee:
        logfun.exception('Execution of diri_sampler failed:' + ee)
Ejemplo n.º 2
0
    def out():
        filename1 = e1.get()
        filename2 = e2.get()
        outfile = e3.get()
        needle_cline = NeedleCommandline()
        needle_cline.asequence = filename1
        needle_cline.bsequence = filename2
        needle_cline.gapopen = int(gapopen)
        needle_cline.gapextend = int(gapextend)
        needle_cline.outfile = "needle.txt"
        print(needle_cline)
        print(needle_cline.outfile)
        stdout, stderr = needle_cline()
        print(stdout + stderr)
        align = AlignIO.read("needle.txt", "emboss")
        file = open("needle.txt", "r")
        # print(file.read())
        view = ("\n\n%s" % file.read())
        with open(outfile, "w") as f:
            f.write(view)

        root = Tk()
        S = Scrollbar(root)
        T = Text(root, height=50, width=500)
        S.pack(side=RIGHT, fill=Y)
        T.pack(side=LEFT, fill=Y)
        S.config(command=T.yview)
        S.config(command=T.xview)
        T.config(yscrollcommand=S.set)
        T.config(xscrollcommand=S.set)
        quote = view
        T.insert(END, quote, 'color')
        mainloop()

        win.destroy()
Ejemplo n.º 3
0
def each_seq_align(each_id=0, record_list=list(), pair_aln_dir=''):
    # prepare pairwise sequence alignment files
    tmp_a_seq = os.path.join(pair_aln_dir, '{0}_a.fasta'.format(str(each_id)))
    SeqIO.write(record_list[0], tmp_a_seq, 'fasta')
    tmp_b_seq = os.path.join(pair_aln_dir, '{0}_b.fasta'.format(str(each_id)))
    SeqIO.write(record_list[1], tmp_b_seq, 'fasta')
    result_file = os.path.join(pair_aln_dir, "{0}.txt".format(str(each_id)))
    needle_cline = NeedleCommandline()
    needle_cline.asequence = tmp_a_seq
    needle_cline.bsequence = tmp_b_seq
    needle_cline.gapopen = 10
    needle_cline.gapextend = 0.5
    needle_cline.outfile = result_file
    devnull = open(os.devnull, 'w')
    try:
        subprocess.call(str(needle_cline),
                        shell=True,
                        stdout=devnull,
                        stderr=devnull)
    except OSError:
        sys.exit(1)
    os.remove(tmp_a_seq)
    os.remove(tmp_b_seq)
    in_pattern = re.compile(r'Identity.*\((\d+\.\d+)%\)')
    gene_alignment_result = ''
    with open(result_file, 'r') as f1:
        for a_line in f1.readlines():
            if 'Identity' in a_line:
                m = re.search(in_pattern, a_line.strip())
                similarity = m.group(1)
                gene_alignment_result = '{0}\t{1}\n'.format(
                    str(each_id), str(similarity))
    os.remove(result_file)
    with open(result_file, 'w') as f2:
        f2.write(gene_alignment_result)
Ejemplo n.º 4
0
def needleAlign(seq1, seq2, gapopen, gapextend):
    needle = NeedleCommandline()
    needle.asequence = seq1
    needle.bsequence = seq2
    needle.gapopen = gapopen
    needle.gapextend = gapextend
    needle.outfile = "needle.txt"

    stdout, stderr = needle()
    print(stdout)
Ejemplo n.º 5
0
def needle_alignment_emboss(s1, s2):
    import subprocess
    from Bio.Emboss.Applications import NeedleCommandline
    from Bio import AlignIO
    cline = NeedleCommandline(auto=True, sprotein=True, stdout=True, gapopen=10, gapextend=1)
    cline.asequence = "asis:" + s1
    cline.bsequence = "asis:" + s2
    process = subprocess.Popen(str(cline), shell=True, stdout=subprocess.PIPE,
            universal_newlines=True)
    return AlignIO.read(process.stdout, "emboss")
Ejemplo n.º 6
0
def needle_alignment_emboss(s1, s2):
    import subprocess
    from Bio.Emboss.Applications import NeedleCommandline
    from Bio import AlignIO
    cline = NeedleCommandline(auto=True, sprotein=True, stdout=True, gapopen=10, gapextend=1)
    cline.asequence = "asis:" + s1
    cline.bsequence = "asis:" + s2
    process = subprocess.Popen(str(cline), shell=True, stdout=subprocess.PIPE,
            universal_newlines=True)
    return AlignIO.read(process.stdout, "emboss")
Ejemplo n.º 7
0
 def build_target_matrix(self):
     if self.mysubject_file.read().count(
             '>') > self.mytarget_file.read().count('>'):
         # Ensures the subject file is the smaller one
         (self.mysubject_file, self.mytarget_file) = (self.mytarget_file,
                                                      self.mysubject_file)
         (self.subjectname, self.targetname) = (self.targetname,
                                                self.subjectname)
     self.mysubject_file.seek(0)
     matrixfile = self.outdir + '/matrix'
     if os.path.exists(matrixfile):
         self.assignments = pickle.load(open(matrixfile))
         return
     count = self.mysubject_file.read().count('>')
     results = re.compile(
         r'# 2: (\w+).+?# Gaps:\s+\d+/\d+ \((\d+\.\d+)%\).+?# Score: (\d+\.\d+)',
         re.DOTALL)
     needle = NeedleCommandline()
     needle.gapopen = self.gapopen
     needle.gapextend = self.gapextend
     needle.outfile = 'stdout'
     needle.bsequence = self.mytarget_file.name
     mytargets = SeqIO.parse(self.mytarget_file.name, 'fasta')
     mytargets = SeqIO.to_dict(mytargets)
     mysubjects = SeqIO.parse(self.mysubject_file.name, 'fasta')
     print "Creating alignment matrix. Please wait..."
     pbar = ProgressBar(widgets=[Percentage(), Bar()], maxval=count).start()
     for i, subject in enumerate(mysubjects):
         needle.asequence = 'asis:%s' % str(subject.seq)
         (stdout, stderr) = needle()
         gaps = results.findall(stdout)
         gaps.sort(key=lambda x: float(x[2]), reverse=True)
         self.assignments.setdefault(subject.id,
                                     []).extend(gaps[0:self.assign])
         pbar.update(i + 1)
     #pbar.finish()
     '''
     outfile = tempfile.NamedTemporaryFile(delete=False)
     mycmd = 'ggsearch36 -s BL62 -m 8 -w 80 -f -8 -g -2 -b=3 -3 -k 500 %s %s'%(self.mysubject_file.name,self.mytarget_file.name)
     handle = subprocess.Popen(mycmd,shell=True,stdout=subprocess.PIPE)
     (stdout,stderr) = handle.communicate()
     results = re.compile('#.+?(^[^#].+?)#',re.MULTILINE|re.DOTALL)
     res = results.findall(stdout+"#")
     rows = [i.split('\n') for i in res]
     myrows = []
     for row in rows:
         row.pop()
         row.sort(key=lambda x:float(x.split('\t')[-1]),reverse=True)
         myrows.append(row[0:self.assign])
     for i in myrows:
         [self.assignments.setdefault(sub,[]).append(tar) for sub,tar in [x.split('\t')[0:2] for x in i if bool(re.search('\t',x))]]
     '''
     pickle.dump(self.assignments, open(matrixfile, 'w'))
     return
Ejemplo n.º 8
0
def needle_alignment(s1, s2):
    '''
DESCRIPTION

    Does a Needleman-Wunsch Alignment of sequence s1 and s2 and
    returns a Bio.Align.Generic.Alignment object.
    '''
    import subprocess
    from Bio.Emboss.Applications import NeedleCommandline
    from Bio import AlignIO
    cline = NeedleCommandline(auto=True, sprotein=True, stdout=True, gapopen=10, gapextend=1)
    cline.asequence = "asis:" + s1
    cline.bsequence = "asis:" + s2
    process = subprocess.Popen(str(cline), shell=True, stdout=subprocess.PIPE)
    return AlignIO.read(process.stdout, "emboss")
Ejemplo n.º 9
0
def needle_alignment(s1, s2):
    '''
DESCRIPTION

    Does a Needleman-Wunsch Alignment of sequence s1 and s2 and
    returns a Bio.Align.Generic.Alignment object.
    '''
    import subprocess
    from Bio.Emboss.Applications import NeedleCommandline
    from Bio import AlignIO
    cline = NeedleCommandline(auto=True,
                              sprotein=True,
                              stdout=True,
                              gapopen=10,
                              gapextend=1)
    cline.asequence = "asis:" + s1
    cline.bsequence = "asis:" + s2
    process = subprocess.Popen(str(cline), shell=True, stdout=subprocess.PIPE)
    return AlignIO.read(process.stdout, "emboss")
Ejemplo n.º 10
0
Entrez.email = "*****@*****.**"


if __name__ == "__main__":
    with open(os.path.join('data', 'rosalind_need.txt')) as dataset:
        ids = dataset.read().split()

    handle = Entrez.efetch(db='nucleotide', id=ids, rettype="fasta")
    records = list(SeqIO.parse(handle, 'fasta'))

    for i, r in enumerate(records):
        with open(ids[i], 'w') as f:
            SeqIO.write(r, f, 'fasta')

    needle_cline = NeedleCommandline()
    needle_cline.asequence = ids[0]
    needle_cline.bsequence = ids[1]
    needle_cline.outfile = "need.txt"
    needle_cline.gapopen = 11
    needle_cline.gapextend = 1
    needle_cline.endopen = 11
    needle_cline.endextend = 1
    needle_cline.endweight = True

    needle_cline()

    with open('need.txt') as f:
        output = f.readlines()

    for line in output:
        if 'Score:' in line:
Ejemplo n.º 11
0
def each_needle_run(pair_gene_dir, tmp_gene_converted_dir,
                    pair_gene_alignment_dir, og_id, strain_dict):
    """
    This function is used to call Needle program to do pairwise sequence alignment
    :param pair_gene_dir: each homologous gene directory
    :param tmp_gene_converted_dir: used to put some temporary files and will be deleted in the end
    :param pair_gene_alignment_dir: each orthologous gene pair-wised alignment directory
    :param og_id: each orthologous gene id
    :param strain_dict: inherit from load_strains_label function with strain information
    :return: the alignment result of each gene
    """
    if not os.path.exists(pair_gene_dir):
        logger.error("There is no directory contains gene file, please check.")
        logger.error(last_exception())
        sys.exit(1)
    tmp_gene_fasta = os.path.join(pair_gene_dir, og_id + '.fasta')
    converted_records = []
    re_pattern = re.compile(r'fig\|(\d+\.\d+)\.peg\.(\d+)\s(.*)')
    in_pattern = re.compile(r'Identity.*\((\d+\.\d+)%\)')
    annotation = ''
    og_list = []
    for record in SeqIO.parse(tmp_gene_fasta, 'fasta'):
        m = re.search(re_pattern, record.description)
        strain_id = m.group(1)
        gene_id = '{0}.peg.{1}'.format(strain_id, m.group(2))
        og_list.append(gene_id)
        annotation = m.group(3)
        record.id = strain_dict[strain_id][0]
        final_record = SeqRecord(record.seq, record.id, description='')
        converted_records.append(final_record)
    the_strain_fasta = os.path.join(tmp_gene_converted_dir, 'a.fasta')
    other_strain_fasta = os.path.join(tmp_gene_converted_dir, 'b.fasta')
    SeqIO.write(converted_records[0], the_strain_fasta, 'fasta')
    SeqIO.write(converted_records[1], other_strain_fasta, 'fasta')
    result_file = os.path.join(pair_gene_alignment_dir,
                               "{0}.txt".format(og_id))
    needle_cline = NeedleCommandline()
    needle_cline.asequence = the_strain_fasta
    needle_cline.bsequence = other_strain_fasta
    needle_cline.gapopen = 10
    needle_cline.gapextend = 0.5
    needle_cline.outfile = result_file
    devnull = open(os.devnull, 'w')
    try:
        subprocess.call(str(needle_cline),
                        shell=True,
                        stdout=devnull,
                        stderr=devnull)
    except OSError:
        logger.info(
            'Try to call Needle program failed, please check if Needle has been installed successfully.'
        )
        logger.error(last_exception())
        sys.exit(1)
    os.remove(the_strain_fasta)
    os.remove(other_strain_fasta)
    gene_alignment_result = ''
    with open(result_file, 'r') as f:
        for a_line in f.readlines():
            if 'Identity' in a_line:
                m = re.search(in_pattern, a_line.strip())
                identity = m.group(1)
                gene_alignment_result = '{0}\t[{1}|{2}]\t{3}\t{4}\n'.format(
                    og_id, og_list[0], og_list[1], identity, annotation)
    return gene_alignment_result
Ejemplo n.º 12
0
Entrez.email = "*****@*****.**"

if __name__ == "__main__":
    with open('rosalind_need.txt') as dataset:
        ids = dataset.read().split()

    handle = Entrez.efetch(db='nucleotide', id=ids, rettype="fasta")
    records = list(SeqIO.parse(handle, 'fasta'))

    for i, r in enumerate(records):
        with open(ids[i], 'w') as f:
            SeqIO.write(r, f, 'fasta')

    needle_cline = NeedleCommandline()
    needle_cline.asequence = ids[0]
    needle_cline.bsequence = ids[1]
    needle_cline.outfile = "rosalind_need_output.txt"
    needle_cline.gapopen = 10
    needle_cline.gapextend = 1
    needle_cline.endopen = 10
    needle_cline.endextend = 1
    needle_cline.endweight = True
    needle_cline()

    with open('rosalind_need_output.txt') as f:
        output = f.readlines()

    for line in output:
        if 'Score:' in line:
            print(int(float(line[:-1].split(':')[-1].strip())))
Ejemplo n.º 13
0
        print "continue"
        continue
    else:
        print "good length = %i" % seqlength
        print "break"
        break
newrefrec = temprec
SeqIO.write(newrefrec, refoutfile, "fasta")


x=0
for item in protlist:
    x = x + len(item.seq)

avg = x/len(protlist)
print avg
needle_cline = NeedleCommandline()
needle_cline.asequence=refoutfile
needle_cline.bsequence=prot_outfile
needle_cline.gapopen=10
needle_cline.gapextend=0.5
needle_cline.outfile=alignment_out
print needle_cline
#stdout, stderr = needle_cline()

#logstring = stdout+stderr
#logout = open(logfile, "w")
#logout.write(logstring)
#logout.close()

Ejemplo n.º 14
0
Entrez.email = '*****@*****.**'

with open('rosalind_need.txt', 'r') as f:
    ids = f.read().split()

handle = Entrez.efetch(db='nucleotide', id=ids, rettype='fasta')
records = list(SeqIO.parse(handle, 'fasta'))

for i, record in enumerate(records):
    with open(ids[i], 'w') as f:
        SeqIO.write(record, f, 'fasta')

# Following step can be done in command line
# using 'needle'
needle_cl = NeedleCommandline()
needle_cl.asequence = ids[0]
needle_cl.bsequence = ids[1]
needle_cl.outfile = 'need_output.txt'

needle_cl.gapopen = 10
needle_cl.gapextend = 1
needle_cl.endopen = 10
needle_cl.endextend = 1
needle_cl.endweight = True

needle_cl()

# Score
with open('need_output.txt', 'r') as f:
    content = f.readlines()