def validate_tophat_settings(desc, settings): try: t = tophat.tophat_from_settings(settings) except (TypeError, ValueError), e: raise (SettingsError("'{}': {}".format(desc, e.message)))
def run(settings_file, reads, outdir, temp_loc=None, extend=False): if os.path.exists(outdir): if (check_output and not query_yes_no("Output path \"{}\" already exists, overwrite?" .format(outdir))): print "Cannot continue as output already exists" sys.exit(1) shutil.rmtree(outdir) os.mkdir(outdir) tempdir = tempfile.mkdtemp(prefix='waistcoat', dir=temp_loc) #Read and validate settings for waistcoat if verbose: print "Reading settings from \'{}\'".format(settings_file) my_settings = settings.loadf(settings_file) statistics.setUp(my_settings.barcodes.keys()) #run the preprocessing pipeline if verbose: print "\n========== Preprocessing ==========" remove_input = False if reads.endswith('.gz'): if verbose: print "Inflating..." gzfile = gzip.GzipFile(reads, 'r') (out, reads) = tempfile.mkstemp(dir=tempdir, prefix='input.', suffix='.inflated') out = os.fdopen(out, 'w') out.writelines(gzfile) gzfile.close() out.close() remove_input = True files = preprocess.run(reads, my_settings, tempdir, remove_input) #discard those which map to discard if verbose: print "\n========== Discard ==========" for i,(index, dcs) in enumerate(my_settings.discard): new_files = {} count = {} if verbose: print "Removing reads which map to \'{}\' ({}/{})...".format(index, i+1, len(my_settings.discard)) for sample,f in files.iteritems(): if verbose: print "\tScanning \'{}\'".format(sample) (new_files[sample], count[sample]) = ( tophat.discard_mapped(f, index, tophat_settings = dcs)) files = new_files statistics.addValues('discard_' + os.path.basename(index), count) #map to genome (target, target_settings) = my_settings.target if verbose: print "\n========== Map to {} ==========".format(os.path.basename(target)) th = tophat.tophat_from_settings(target_settings) for i,(sample,f) in enumerate(files.iteritems()): th.output_dir = os.path.join(outdir, sample) os.mkdir(th.output_dir) if verbose: print "Mapping {} ({}/{})...".format(sample, i+1, len(files)) th.run(f, index_base = target) os.remove(f) if verbose: print "\n========== Postprocess ==========" count = {} for i,(sample,f) in enumerate(files.iteritems()): if verbose: print "{} ({}/{})...".format(sample, i+1, len(files)) out = os.path.join(outdir, '{}.bam'.format(sample)) count[sample] = postprocess.run(outdir, sample, "{}.fa".format(target), extend=extend) statistics.collectFinalStats(sample, out) statistics.addValues('final_seqs', count) statistics.write(os.path.join(outdir, 'statistics')) shutil.rmtree(tempdir) if verbose: print "\n__________ Pipeline Statistics __________" print statistics.prettyString()