Beispiel #1
0
	def write(self, name, values):
		value_to_str = self._value_to_str_func(self.dtype)

		FileWriter.write(self, name)
		for value in values:
			FileWriter.write(self, "\t")
			FileWriter.write(self, value_to_str(value))
		FileWriter.write(self, "\n")
Beispiel #2
0
	def write_header(self, header = None):
		if isinstance(header, MatrixHeader):
			FileWriter.write(self, "\t".join(header.columns))
		elif isinstance(header, (list, tuple)):
			FileWriter.write(self, "\t".join(header))
		else:
			raise Exception("Unsupported headers type: {}".format(str(type(header))))
		FileWriter.write(self, "\n")
def merge(log, input, output, gitools_output):
    """
	Merge repeated rows by the lowest pvalue, in case the pvalue is the same take the one with greater n
	"""

    f = FileReader(input)
    hdr = f.readline().rstrip().split("\t")

    upreg = {}
    downreg = {}

    upreg_count = 0
    downreg_count = 0

    mid_index = 8

    for line in f:
        line = line.rstrip()
        if len(line) == 0:
            continue

        fields = line.split("\t")
        row_name = fields[0]

        upreg_count += merge_data(row_name, fields[1:mid_index], upreg)
        downreg_count += merge_data(row_name, fields[mid_index:], downreg)

    f.close()

    upreg_keys = upreg.keys()
    downreg_keys = downreg.keys()

    log.debug("Total rows: upreg = {}, downreg = {}".format(len(upreg_keys), len(downreg_keys)))
    log.debug("Merged rows: upreg = {}, downreg = {}".format(upreg_count, downreg_count))

    ofile = FileWriter(output)
    ofile.write("\t".join(hdr))
    ofile.write("\n")

    gfile = FileWriter(gitools_output)
    gfile.write("column\trow\t")
    gfile.write("\t".join([x[6:] for x in hdr if x.startswith("upreg_")]))
    gfile.write("\n")

    for row_name in upreg_keys:
        upreg_data = upreg[row_name]
        upreg_data_join = "\t".join(upreg_data)

        downreg_data = downreg[row_name]
        downreg_data_join = "\t".join(downreg_data)

        ofile.write(row_name)
        ofile.write("\t")
        ofile.write(upreg_data_join)
        ofile.write("\t")
        ofile.write(downreg_data_join)
        ofile.write("\n")

        gfile.write("upreg\t")
        gfile.write(row_name)
        gfile.write("\t")
        gfile.write(upreg_data_join)
        gfile.write("\n")
        gfile.write("downreg\t")
        gfile.write(row_name)
        gfile.write("\t")
        gfile.write(downreg_data_join)
        gfile.write("\n")

    ofile.close()
    gfile.close()

    return (upreg_count, downreg_count)
def write_data_map(dmap, path):
	rf = FileWriter(path)
	hdr = ["id"]
	hdr.extend(["_".join(("gain", f.replace("-", "_").lower())) for f in FIELDS])
	hdr.extend(["_".join(("loss", f.replace("-", "_").lower())) for f in FIELDS])
	rf.write("\t".join(hdr) + "\n")
	for row, values in dmap.iteritems():
		rf.write(row)
		for v in values:
			rf.write("\t")
			rf.write(v)
		if len(values) == len(FIELDS):
			rf.write("\t".join(["-"] * len(FIELDS)))
		rf.write("\n")
	rf.close()
Beispiel #5
0
def combination(log, conf, rs, c, data_repo, results_path, conditions):

	cid = c["id"]
	ids = c["source/ids"]
	files = c["files"]
	results_url = data_repo.url(results_path)

	try:
		# prepare temporary path and files
		tmp_path = mkdtemp(prefix = "cnv_combination_")
		data_file = os.path.join(tmp_path, "data.tdm")
		columns_file = os.path.join(tmp_path, "columns.gmt")
		tmp_file = os.path.join(tmp_path, "tmp.tdm")
		log.debug("Temporary directory: {}".format(tmp_path))

		# join files to combine in a single TDM file
		log.info("Joining files ...".format(files[0]))
		outpf = FileWriter(data_file)

		log.debug("\t{} ...".format(files[0]))
		repo, path = rs.from_url(files[0])
		local_path = repo.get_local(path)
		ref_hdr = tdm.unflatten(local_path, outpf, row_column = "id",
			column_and_attr_func = lambda name: unflatten_filtered_names(name, ids[0]))
		#outpf.flush()
		#ref_hdr = tdm.read_header_names(data_file)
		repo.close_local(path)

		for i in xrange(1, len(files)):
			log.debug("\t{} ...".format(files[i]))
			repo, path = rs.from_url(files[i])
			local_path = repo.get_local(path)
			hdr = tdm.unflatten(local_path, tmp_file, row_column = "id",
				column_and_attr_func = lambda name: unflatten_filtered_names(name, ids[i]))
			tdm.append(outpf, tmp_file, ref_hdr)
			repo.close_local(path)

		outpf.close()

		# prepare conditions columns file in GMT format

		outpf = FileWriter(columns_file)
		for cond in conditions:
			outpf.write(cond)
			outpf.write("\t\t")
			outpf.write("\t".join(["_".join((sid, cond)) for sid in ids]))
			outpf.write("\n")
		outpf.close()

		# run gitools-combination with data.tdm
		log.info("Running gitools combination ...")
		log.debug("\tData: {}".format(data_file))
		log.debug("\tColumns: {}".format(columns_file))

		gitools_combination_bin = os.path.join(conf["bin_paths.gitools"], "bin", "gitools-combination")

		cmd = " ".join([ gitools_combination_bin,
			"-N", cid, "-w", tmp_path,
			"-d", data_file,
			"-c", columns_file,
			"-pn", P_VALUE_FIELD,
			"-sn n",
			"-p 1", "-debug"])

		log.debug(cmd)

		retcode = subprocess.call(args = cmd, shell = True)

		sys.stdout.write("\n")
		sys.stdout.flush()

		if retcode != 0:
			raise Exception("Combination exit code = {}".format(retcode))

		# flatten results
		log.info("Flattening results into {} ...".format(results_url))

		try:
			results_local_path = data_repo.create_local(results_path)
			tdm.flatten(os.path.join(tmp_path, cid + "-results.tdm.gz"), results_local_path,
				None, ["N", "z-score", "p-value"])

			data_repo.put_local(results_local_path)
		except:
			data_repo.close_local(results_local_path)

	finally:
		shutil.rmtree(tmp_path)