예제 #1
0
def combination(log, conf, rs, c, data_repo, results_path, conditions):

	cid = c["id"]
	ids = c["source/ids"]
	files = c["files"]
	results_url = data_repo.url(results_path)

	try:
		# prepare temporary path and files
		tmp_path = mkdtemp(prefix = "cnv_combination_")
		data_file = os.path.join(tmp_path, "data.tdm")
		columns_file = os.path.join(tmp_path, "columns.gmt")
		tmp_file = os.path.join(tmp_path, "tmp.tdm")
		log.debug("Temporary directory: {}".format(tmp_path))

		# join files to combine in a single TDM file
		log.info("Joining files ...".format(files[0]))
		outpf = FileWriter(data_file)

		log.debug("\t{} ...".format(files[0]))
		repo, path = rs.from_url(files[0])
		local_path = repo.get_local(path)
		ref_hdr = tdm.unflatten(local_path, outpf, row_column = "id",
			column_and_attr_func = lambda name: unflatten_filtered_names(name, ids[0]))
		#outpf.flush()
		#ref_hdr = tdm.read_header_names(data_file)
		repo.close_local(path)

		for i in xrange(1, len(files)):
			log.debug("\t{} ...".format(files[i]))
			repo, path = rs.from_url(files[i])
			local_path = repo.get_local(path)
			hdr = tdm.unflatten(local_path, tmp_file, row_column = "id",
				column_and_attr_func = lambda name: unflatten_filtered_names(name, ids[i]))
			tdm.append(outpf, tmp_file, ref_hdr)
			repo.close_local(path)

		outpf.close()

		# prepare conditions columns file in GMT format

		outpf = FileWriter(columns_file)
		for cond in conditions:
			outpf.write(cond)
			outpf.write("\t\t")
			outpf.write("\t".join(["_".join((sid, cond)) for sid in ids]))
			outpf.write("\n")
		outpf.close()

		# run gitools-combination with data.tdm
		log.info("Running gitools combination ...")
		log.debug("\tData: {}".format(data_file))
		log.debug("\tColumns: {}".format(columns_file))

		gitools_combination_bin = os.path.join(conf["bin_paths.gitools"], "bin", "gitools-combination")

		cmd = " ".join([ gitools_combination_bin,
			"-N", cid, "-w", tmp_path,
			"-d", data_file,
			"-c", columns_file,
			"-pn", P_VALUE_FIELD,
			"-sn n",
			"-p 1", "-debug"])

		log.debug(cmd)

		retcode = subprocess.call(args = cmd, shell = True)

		sys.stdout.write("\n")
		sys.stdout.flush()

		if retcode != 0:
			raise Exception("Combination exit code = {}".format(retcode))

		# flatten results
		log.info("Flattening results into {} ...".format(results_url))

		try:
			results_local_path = data_repo.create_local(results_path)
			tdm.flatten(os.path.join(tmp_path, cid + "-results.tdm.gz"), results_local_path,
				None, ["N", "z-score", "p-value"])

			data_repo.put_local(results_local_path)
		except:
			data_repo.close_local(results_local_path)

	finally:
		shutil.rmtree(tmp_path)
예제 #2
0
def enrichment(log, conf, rs, data_repo, results_path, data_file, e, ec,
				filtered_columns, filtered_columns_new_names):

	eid = e["id"]

	key = (e["study_id"], e["platform_id"], e["icdo_topography"], e["icdo_morphology"])

	# determine the modules file
	mod_repo, mod_path = rs.from_url(ec["modules_file"])
	mod_local_path = mod_repo.get_local(mod_path)

	# oncodrive data file
	matrix_repo, matrix_path = rs.from_url(data_file)
	matrix_local_path = matrix_repo.get_local(matrix_path)

	e["data_file"] = data_file
	e["modules_file"] = ec["modules_file"]

	results_local_path = None

	tmp_path = mkdtemp(prefix = "enrichment_")
	log.debug("Temporary directory: {}".format(tmp_path))

	valid = True

	try:
		log.info("Filtering pvalue columns from {} ...".format(data_file))

		# filter columns for pvalues
		data_local_path = os.path.join(tmp_path, "data.tsv")

		rf = of = None
		try:
			rf = FileReader(matrix_local_path)
			of = FileWriter(data_local_path)
			row_count = tsv.filter_columns(rf, of,
					filtered_columns, filtered_columns_new_names)
		finally:
			if rf is not None:
				rf.close()
			if of is not None:
				of.close()

		if row_count == 0:
			log.warn("Oncodrive results are empty: {}".format(matrix_path))
			raise EmptyResults

		# apply background if necessary
		if "population.file" in ec:
			pop_url = ec["population.file"]
			pop_missing_value = ec.get("population.missing_value", "-")
			log.info("Applying background from {} with missing value {} ...".format(pop_url, pop_missing_value))
			data2_local_path = os.path.join(tmp_path, "data-filtered.tsv")
			pop_repo, pop_path = rs.from_url(pop_url)
			pop_local_path = pop_repo.get_local(pop_path)
			cmd = " ".join([
				conf["bin_paths.python"], conf["bin_paths.matrix_background"],
				"--verbose --missing-value", pop_missing_value,
				"-o", data2_local_path,
				data_local_path, pop_local_path ])

			log.debug(cmd)
			retcode = subprocess.call(args = cmd, shell = True)

			if retcode != 0:
				raise Exception("Applying population background for ({}) [{}] failed with code {}".format(", ".join(key), eid, retcode))

			pop_repo.close_local(pop_local_path)
			data_local_path = data2_local_path

		# enrichment results
		e["results_file"] = data_repo.url(results_path)
		results_local_path = data_repo.create_local(results_path)

		log.info("Running enrichment ...")
		log.debug("\tData file: {}".format(data_local_path))
		log.debug("\tModules file: {}".format(ec["modules_file"]))

		gitools_enrichment_bin = os.path.join(conf["bin_paths.gitools"], "bin", "gitools-enrichment")

		sb = [ gitools_enrichment_bin,
			"-N", eid, "-w", tmp_path, "-p 1",
			"-mf tcm", "-m", mod_local_path,
			"-df cdm", "-d", data_local_path,
			"-t", ec["test"] ]

		if "filter" in ec:
			sb += ["-b", ec["filter"]]

		if ec.get("only_mapped_items", False, dtype=bool):
			sb += ["-only-mapped-items"]

		#if "population" in ec:
		#	pop_repo, pop_path = rs.from_url(ec["population"])
		#	pop_local_path = pop_repo.get_local(pop_path)
		#	sb += ["-P", pop_local_path]

		cmd = " ".join(sb)

		log.debug(cmd)

		retcode = subprocess.call(args = cmd, shell = True)

		sys.stdout.write("\n")
		sys.stdout.flush()

		if retcode != 0:
			raise Exception("Enrichment for ({}) [{}] failed with code {}".format(", ".join(key), eid, retcode))

		# flatten results

		log.info("Flattening results into {} ...".format(e["results_file"]))

		try:
			gitools_results = os.path.join(tmp_path, eid + "-results.tdm.gz")
			rf = FileReader(gitools_results)
			of = FileWriter(results_local_path)
			tdm.flatten(rf, of,
				{ "column" : str, "row" : str, "N" : int, "observed" : int,
				"expected-mean" : float, "expected-stdev" : float, "probability" : float,
				"right-p-value" : float, "corrected-right-p-value" : float },

				["N", "observed", "expected-mean", "expected-stdev",
				"probability", "right-p-value", "corrected-right-p-value"])
		finally:
			if rf is not None:
				rf.close()
			if of is not None:
				of.close()

		# close local paths
		data_repo.put_local(results_local_path)

	except EmptyResults:
		valid = False

	except Exception as ex:
		log.exception(ex)

		if results_local_path is not None:
			data_repo.close_local(results_local_path)

		valid = False

	finally:
		shutil.rmtree(tmp_path)
		mod_repo.close_local(mod_local_path)
		data_repo.close_local(matrix_local_path)
		#if "population" in ec:
		#	pop_repo.close_local(pop_local_path)

	return valid