Example #1
0
def validate_tophat_settings(desc, settings):
    try:
        t = tophat.tophat_from_settings(settings)
    except (TypeError, ValueError), e:
        raise (SettingsError("'{}': {}".format(desc, e.message)))
Example #2
0
def run(settings_file, reads, outdir, temp_loc=None, extend=False):

	if os.path.exists(outdir):
		if (check_output and not 
				query_yes_no("Output path \"{}\" already exists, overwrite?"
					.format(outdir))):
			print "Cannot continue as output already exists"
			sys.exit(1)
		
		shutil.rmtree(outdir)

	os.mkdir(outdir)

	tempdir = tempfile.mkdtemp(prefix='waistcoat', dir=temp_loc)

	#Read and validate settings for waistcoat
	if verbose:
		print "Reading settings from \'{}\'".format(settings_file)
	my_settings = settings.loadf(settings_file)

	statistics.setUp(my_settings.barcodes.keys())

	#run the preprocessing pipeline
	if verbose:
		print "\n========== Preprocessing =========="
	remove_input = False
	if reads.endswith('.gz'):
		if verbose:
			print "Inflating..."
		gzfile = gzip.GzipFile(reads, 'r')
		(out, reads) = tempfile.mkstemp(dir=tempdir, prefix='input.', 
											suffix='.inflated')
		out = os.fdopen(out, 'w')
		out.writelines(gzfile)
		gzfile.close()
		out.close()
		remove_input = True

	files = preprocess.run(reads, my_settings, tempdir, remove_input)

	#discard those which map to discard
	if verbose:
		print "\n========== Discard =========="
	for i,(index, dcs) in enumerate(my_settings.discard):
		new_files = {}
		count = {}
		if verbose:
			print "Removing reads which map to \'{}\' ({}/{})...".format(index,
					i+1, len(my_settings.discard))
		for sample,f in files.iteritems():
			if verbose:
				print "\tScanning \'{}\'".format(sample)
			(new_files[sample], count[sample]) = (
					tophat.discard_mapped(f, index, tophat_settings = dcs))
		files = new_files
		statistics.addValues('discard_' + os.path.basename(index), count)

	#map to genome
	(target, target_settings) = my_settings.target
	if verbose:
		print "\n========== Map to {} ==========".format(os.path.basename(target))
	th = tophat.tophat_from_settings(target_settings)
	for i,(sample,f) in enumerate(files.iteritems()):
		th.output_dir = os.path.join(outdir, sample)
		os.mkdir(th.output_dir)
		if verbose: print "Mapping {} ({}/{})...".format(sample, i+1, len(files))
		th.run(f, index_base = target)
		os.remove(f)
		
	if verbose: print "\n========== Postprocess =========="
	count = {}
	for i,(sample,f) in enumerate(files.iteritems()):
		if verbose: print "{} ({}/{})...".format(sample, i+1, len(files))
		
		out = os.path.join(outdir, '{}.bam'.format(sample))
		count[sample] = postprocess.run(outdir, sample,	"{}.fa".format(target),
				extend=extend)
		statistics.collectFinalStats(sample, out)
		
	statistics.addValues('final_seqs', count)

	statistics.write(os.path.join(outdir, 'statistics'))
	
	shutil.rmtree(tempdir)

	if verbose:
		print "\n__________ Pipeline Statistics __________"
		print statistics.prettyString()