def assign(self): # Prepare # self.tmp_dir = tempfile.mkdtemp() + '/' os.symlink(self.fasta.path, self.tmp_dir + 'reads.fasta') # Run # start_time = time.asctime() sh.mothur("#classify.seqs(fasta=%s, reference=%s, taxonomy=%s, cutoff=%i, processors=8, probs=F)" % (self.tmp_dir + 'reads.fasta', train_db_path, train_tax_path, 80)) process_log_file('mothur.classify.seqs.logfile', self.base_dir, start_time)
def align(self, ref_path): # Run it # sh.mothur("#align.seqs(candidate=%s, template=%s, search=blast, flip=false, processors=8);" % (self.path, ref_path)) # Move things # shutil.move(self.path[:-6] + '.align', self.aligned_path) shutil.move(self.path[:-6] + '.align.report', self.report_path) shutil.move(self.path[:-6] + '.flip.accnos', self.accnos_path) # Clean up # if os.path.exists('formatdb.log'): os.remove('formatdb.log') if os.path.exists('error.log') and os.path.getsize('error.log') == 0: os.remove('error.log') for p in sh.glob('mothur.*.logfile'): os.remove(p)
def template_align(self, ref_path): # Run it # sh.mothur("#align.seqs(candidate=%s, template=%s, search=blast, flip=false, processors=8);" % (self.path, ref_path)) # Move things # shutil.move(self.path[:-6] + '.align', self.aligned_path) shutil.move(self.path[:-6] + '.align.report', self.report_path) shutil.move(self.path[:-6] + '.flip.accnos', self.accnos_path) # Clean up # if os.path.exists('formatdb.log'): os.remove('formatdb.log') if os.path.exists('error.log') and os.path.getsize('error.log') == 0: os.remove('error.log') for p in sh.glob('mothur.*.logfile'): os.remove(p)
def align_mothur(self): """Step 1 with mothur""" # Run it # sh.mothur("#align.seqs(candidate=%s, template=%s, search=kmer, flip=false, processors=%s);" \ % (self.tax.centers, reference, 16)) # Move things # shutil.move(self.tax.centers.prefix_path + '.align', self.mothur_aligned) shutil.move(self.tax.centers.prefix_path + '.align.report', self.p.mothur_report) path = self.tax.centers.prefix_path + '.flip.accnos' if os.path.exists(path): shutil.move(path, self.p.mothur_accnos) # Clean up # for p in glob.glob('mothur.*.logfile'): os.remove(p)
def template_align(self, ref_path): """We align the sequences in the fasta file with mothur and a template.""" # Run it # msg = "#align.seqs(candidate=%s, template=%s, search=blast," \ "flip=false, processors=8);" sh.mothur(msg % (self.path, ref_path)) # Move things # shutil.move(self.path[:-6] + '.align', self.p.aligned) shutil.move(self.path[:-6] + '.align.report', self.p.report) shutil.move(self.path[:-6] + '.flip.accnos', self.p.accnos) # Clean up # if os.path.exists('formatdb.log'): os.remove('formatdb.log') if os.path.exists('error.log') and os.path.getsize('error.log') == 0: os.remove('error.log') for path in sh.glob('mothur.*.logfile'): os.remove(path) # Return # return self.p.aligned
def join(self): """Uses pandaseq 2.7 to join the foward and reverse reads together. See https://github.com/neufeld/pandaseq""" # Special case for new primers that don't join # rev_primer_name = self.info['primers']['reverse']['name'] not_joining_primers = ("1132R", "1000R") if rev_primer_name in not_joining_primers: print "No overlap special case" self.trim_and_concat.run() return # Special case for primers that highly overlap # high_overlap_primers = ("806R",) if rev_primer_name in high_overlap_primers: print "High overlap special case, using mothur" result = sh.mothur("#make.contigs(ffastq=%s, rfastq=%s);" % (self.uncomrpessed_pair.fwd, self.uncomrpessed_pair.rev)) if "ERROR" in result.stdout: raise Exception("Mothur didn't run correctly") # Move things # #shutil.move(self.tax.centers.prefix_path + '.align', self.mothur_aligned) #shutil.move(self.tax.centers.prefix_path + '.align.report', self.p.mothur_report) return # Default case # command = 'pandaseq27 -T 1 -f %s -r %s -u %s -F 1> %s 2> %s' command = command % (self.fwd, self.rev, self.unassembled.path, self.assembled.path, self.assembled.p.out) shell_call(command) # Because it exits with status 1 https://github.com/neufeld/pandaseq/issues/40
def check(self): start_time = time.asctime() sh.mothur("#chimera.uchime(fasta=%s, dereplicate=t, processors=8)" % (self.p.len_filtered_fasta)) process_log_file('mothur.chimera.uchime.logfile', self.p.orig_dir, start_time)