Python MatrixWriter.close Examples

Programming Language: Python

Namespace/Package Name: intogen.matrix

Class/Type: MatrixWriter

Method/Function: close

Examples at hotexamples.com: 3

Python MatrixWriter.close - 3 examples found. These are the top rated real world Python examples of intogen.matrix.MatrixWriter.close extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

close(3)

write(3)

write_header(3)

Frequently Used Methods

close (3)

write (3)

write_header (3)

Example #1

Show file

File: cnv_oncodrive_calc.py Project: chris-zen/phd-thesis

def mask_filtering(input_path, output_path, mask):
	mr = MatrixReader(input_path, dtype=int)
	mw = MatrixWriter(output_path, dtype=int)
	mw.write_header(mr.read_header())
	for row in mr:
		values = [1 if (v & mask) != 0 else 0 for v in row.values]
		mw.write(row.name, values)
	mr.close()
	mw.close()

Example #2

Show file

File: mrna_log2r_calc.py Project: chris-zen/phd-thesis

def run(task):
	
	# Initialization

	task.check_conf(["entities", "repositories", "repositories.assay"])
	conf = task.conf

	log = task.logger()

	task.check_in_ports(["absi_tumour_unit_ids"])
	task.check_out_ports(["log2r_ids"])

	absi_tumour_unit_port = task.ports["absi_tumour_unit_ids"]
	log2r_port = task.ports["log2r_ids"]

	es = EntityServer(conf["entities"])
	em = es.manager()

	rs = RepositoryServer(conf["repositories"])
	data_repo = rs.repository("data")

	overwrite = conf.get("overwrite", False, dtype=bool)
	
	# Run
	
	# Index normal pools by study, platform, topography
	log.debug("Indexing normal pools by study, platform and topography ...")
	pools_index = em.group_ids(
		["study_id", "platform_id", "icdo_topography"],
		types.MRNA_NORMAL_POOL, unique = True)

	# Index log2r assays by absi_id
	log.debug("Indexing log2r assays by absi assay ...")
	log2r_index = em.group_ids(
		["absi_id"],
		types.MRNA_LOG2R, unique = True)

	absi_tumour_unit_ids = absi_tumour_unit_port.read_all()
	
	log.info("Processing %i mrna absi tumour units ..." % len(absi_tumour_unit_ids))
	#log.debug("[%s]" % (", ".join(absi_tumour_unit_ids)))

	# For each abs intensity assay
	pool = None
	pool_data = {}
	for absi in iter_tumour_absi(conf, em, absi_tumour_unit_ids, log):

		absi_id = absi["id"]

		rpath = os.path.join(absi["data_file/path"], absi["data_file/name"])
		
		icdo_topography = absi["icdo_topography"]
		normal_counterpart = absi.get("normal_counterpart", icdo_topography)
		if icdo_topography != normal_counterpart:
			keystr = "(%s, %s, %s --> %s)" % (absi["study_id"], absi["platform_id"], icdo_topography, normal_counterpart)
		else:
			keystr = "(%s, %s, %s)" % (absi["study_id"], absi["platform_id"], icdo_topography)

		exists = (absi_id,) in log2r_index
		if exists:
			log2r_id = log2r_index[(absi_id,)][0]
		else:
			log2r_id = str(uuid.uuid4())

		data_file_path = types.MRNA_LOG2R.replace(".", "/")
		data_file_name = log2r_id + ".tsv.gz"
		dst_path = os.path.join(data_file_path, data_file_name)

		if not overwrite and exists and data_repo.exists(dst_path):
			log.debug("Skipping calculation of log2r for tumour assay %s %s as it is already calculated" % (keystr, absi_id))
			log2r_port.write(log2r_id)
			continue

		log.info("Processing tumour assay %s %s from %s ..." % (keystr, absi_id, rpath))

		repo = rs.repository(absi["data_file/repo"])
		if not repo.exists(rpath):
			log.error("File not found: %s" % rpath)
			continue

		# Get normal counterpart data
		if pool is None \
			or absi["study_id"] != pool["study_id"] \
			or absi["platform_id"] != pool["platform_id"] \
			or normal_counterpart != pool["icdo_topography"]:

			pool_key = (absi["study_id"], absi["platform_id"], normal_counterpart)
			if pool_key not in pools_index:
				log.error("Normal pool not found for tumour assay (%s) %s {%s}" % (", ".join(pool_key), absi_id, absi.get("source_path", "")))
				continue

			pool_id = pools_index[pool_key][0]
			pool = em.find(pool_id, types.MRNA_NORMAL_POOL)
			if pool is None:
				log.error("Normal pool %s not found by the entity manager !" % pool_id)
				continue
			
			pool_data = read_pool_data(conf, rs, pool, log)
			if pool_data is None:
				pool = None
				continue

		log.info("Using normal pool ({}) [{}]".format(", ".join(pool_key), pool_id))

		# Calculate log2 ratios
		mr = MatrixReader(repo.open_reader(rpath))
		header = mr.read_header()
		if len(header.columns) != 2:
			log.error("Unexpected number of columns: %i" % len(header.columns))
			mr.close()
			continue

		warn_count = {
			"id_not_in_pool" : 0,
			"value_is_nan" : 0,
			"pool_value_is_nan" : 0,
			"value_is_inf" : 0,
			"pool_value_is_inf" : 0}

		data = {}
		for row in mr:
			if row.name in data:
				log.error("Skipping tumour assay, duplicated row %s at file %s" % (row.name, rpath))
				break

			value = row.values[0]

			value_is_nan = numpy.isnan(value)

			if value_is_nan:
				warn_count["value_is_nan"] += 1
			elif numpy.isinf(value):
				warn_count["value_is_inf"] += 1

			if row.name not in pool_data:
				pool_value = value = numpy.nan
				warn_count["id_not_in_pool"] += 1
			else:
				pool_value = pool_data[row.name]

			pool_value_is_nan = numpy.isnan(pool_value)
			if pool_value_is_nan:
				warn_count["pool_value_is_nan"] += 1
			elif numpy.isinf(pool_value):
				warn_count["pool_value_is_inf"] += 1

			if not value_is_nan and not pool_value_is_nan: # and value != 0.0 and pool_value != 0.0:
				log2r = value - pool_value
			else:
				log2r = numpy.nan

			if not numpy.isinf(log2r):
				data[row.name] = log2r
			#else:
			#	log.warn("row = %s, log2r = %f, value = %f, pool_value = %f" % (row.name, log2r, value, pool_value))

		mr.close()
		
		sb = ["{0}={1}".format(k, v) for k, v in warn_count.items() if v > 0]
		if len(sb) > 0:
			log.warn(", ".join(sb))

		# Save log2 ratios data and assay
		log2r = deepcopy(absi)

		log2r["id"] = log2r_id
		log2r["absi_id"] = absi_id
		log2r["normal_pool_id"] = pool["id"]

		log2r["data_file/repo"] = data_repo.name()
		log2r["data_file/path"] = data_file_path
		log2r["data_file/name"] = data_file_name

		msg = {True : "Overwritting", False : "Writting"}[exists]
		log.debug("%s log2 ratio data to %s ..." % (msg, dst_path))

		mw = MatrixWriter(data_repo.open_writer(dst_path))
		mw.write_header(["id", "value"])
		for name, value in sorted(data.items()):
			mw.write(name, [value])
		mw.close()

		em.persist(log2r, types.MRNA_LOG2R)
		log2r_port.write(log2r_id)

	em.close()
	es.close()

	data_repo.close()
	rs.close()

Example #3

Show file

File: mrna_normal_pool.py Project: chris-zen/phd-thesis

def run(task):
	
	# Initialization

	task.check_conf(["entities", "repositories", "repositories.assay"])
	conf = task.conf

	log = task.logger()
	
	task.check_in_ports(["normal_pool_ids"])

	normal_pool_port = task.ports["normal_pool_ids"]
	
	es = EntityServer(conf["entities"])
	em = es.manager()

	rs = RepositoryServer(conf["repositories"])
	data_repo = rs.repository("data")

	overwrite = conf.get("overwrite", False, dtype=bool)
	
	# Run

	log.info("Processing %i mrna normal pools ..." % normal_pool_port.size())

	for pool_id in normal_pool_port:
		pool = em.find(pool_id, types.MRNA_NORMAL_POOL)
		if pool is None:
			log.error("%s not found: %s" % (types.MRNA_NORMAL_POOL, pool_id))
			continue

		mf = pool.missing_fields(["study_id", "platform_id", "icdo_topography", "size", "mrna_absi_ids"])
		if len(mf) > 0:
			log.error("Normal pool %s missing required fields: %s {%s}" % (pool_id, mf, pool.get("__doc_path", "")))
			continue

		key = (pool["study_id"], pool["platform_id"], pool["icdo_topography"])
		log.info("Normal pool (%s) [%s] with %i assays ..." % (", ".join(key), pool_id, pool["size"]))

		data_file_path = types.MRNA_NORMAL_POOL.replace(".", "/")
		data_file_name = pool_id + ".tsv.gz"
		dst_rel_path = os.path.join(data_file_path, data_file_name)
		#dst_path = os.path.join(conf["repo.data"], dst_rel_path)

		if not overwrite and data_repo.exists(dst_rel_path) \
			and "mrna_absi_ids" in pool and "pooled_assays" in pool and \
					len(pool["mrna_absi_ids"]) == pool.get("pooled_assays", dtype=int):
			log.warn("Skipping normal pool %s that already has data" % pool_id)
			continue

		method = MeanPoolMethod()

		pooled_assays = 0
		duplicated_rows = False
		for absi in em.iter_all(types.MRNA_ABS_INTENSITY, eids = pool["mrna_absi_ids"]):
			mf = absi.missing_fields(["data_file/path", "data_file/name"])
			if len(mf) > 0:
				log.error("Normal assay %s missing required fields: %s {%s}" % (absi["id"], mf, absi.get("__doc_path", "")))
				continue

			data_file = absi["data_file"]
			rel_path = os.path.join(data_file["path"], data_file["name"])
			#filename = os.path.join(conf["repo.assays"], rel_path)
			repo = rs.repository(data_file["repo"])
			if not repo.exists(rel_path):
				log.error("File not found: %s" % rel_path)
				continue

			log.debug("Processing normal assay %s for source assay %s at %s ..." % (absi["id"], absi["assay_id"], rel_path))

			pooled_assays += 1
			
			mr = MatrixReader(repo.open_reader(rel_path))
			header = mr.read_header()
			if len(header.columns) != 2:
				log.error("Unexpected number of columns: %i" % len(header.columns))
				mr.close()
				continue

			row_names = set()
			for row in mr:
				if row.name in row_names:
					log.error("Skipping normal assay, duplicated row %s at file %s" % (row.name, rel_path))
					duplicated_rows = True
					break
				else:
					row_names.add(row.name)

				value = numpy.exp2(row.values[0])
				method.process(row.name, value)

			mr.close()

		if not duplicated_rows and pooled_assays > 0:
			exists = data_repo.exists(dst_rel_path)
			msg = {True : "Overwritting", False : "Writting"}[exists]
			log.debug("%s pooled data to %s ..." % (msg, dst_rel_path))

			mw = MatrixWriter(data_repo.open_writer(dst_rel_path))
			mw.write_header(["id", "value"])
			for row in method.pooled_rows():
				value = numpy.log2(row.values[0])
				mw.write(row.name, [value])
			mw.close()

			pool["pooled_assays"] = pooled_assays
			pool["data_file/repo"] = "data"
			pool["data_file/path"] = data_file_path
			pool["data_file/name"] = data_file_name
			em.persist(pool, types.MRNA_NORMAL_POOL)

	em.close()

	return 0