def run(self): # Combine reads but in fastq format this time # paths = [sample.renamed for sample in self.cluster] shell_output("cat %s > %s" % (" ".join(paths), self.reads)) # Clean # shutil.rmtree(self.p.clusters_dir) # Run command # cdhit = sh.Command(cdhit_script) cdhit("-i", self.reads, "-o", self.p.clusters_dir, "-p", TmpFile.from_string("[ACTG]")) # Create the centers file with good names # self.cdhit_centers.rename_with_num("OTU-", self.centers)
def split(self): # Call Roche binary # barcode_file = TmpFile.from_string(self.barcode_text) sh.sfffile("-s", "barcodes_keyword", "-mcf", barcode_file.path, self.path) # Check result # produced_files = set(sh.glob('454Reads.*.sff')) expected_files = set(['454Reads.%s.sff' % (sample.name.upper()) for sample in self.sample_links.values()]) assert produced_files == expected_files # Make piece objects # self.pieces = [SamplePiece(p, self) for p in sh.glob('454Reads.*.sff')] for piece in self.pieces: piece.rename() # Cleanup # barcode_file.remove()