Exemple #1
0
def finalize_project(project):
	log = task.logger

	config = GlobalConfig(task.conf)
	paths = PathsConfig(config)

	project_id = project["id"]

	log.info("--- [{0}] --------------------------------------------".format(project_id))

	log.info("Cleaning project ...")

	projres = ProjectResults(project)

	projres.clean(config)

	log.info("Saving project configuration ...")

	projres.save_def()

	if config.results.use_storage:
		log.info("Compressing the database ...")

		db_path = os.path.join(projres.path, "project.db")
		temp_path = tempfile.mkdtemp(prefix="intogen-mutations-{}-".format(project_id))
		compressed_db_path = os.path.join(temp_path, "project.db.gz")

		try:
			cmd = "gzip -c {} >{}".format(db_path, compressed_db_path)

			log.debug("> {}".format(cmd))

			retcode = subprocess.call(cmd, shell=True)
			if retcode == 1:
				raise Exception("Error compressing the project database")

			log.info("Uploading project files ...")

			exclude = ["sources/*", "project.db", "project.db.gz"]
			if not config.results.create_zip:
				exclude += ["results.zip"]

			object_prefix = "results/projects/{}".format(project_id)
			start_callback = lambda path: log.info("  {}".format(path))

			if os.path.exists(compressed_db_path):
				task.storage.upload(compressed_db_path, object_prefix=object_prefix, overwrite=True,
									start_callback=start_callback)

			task.storage.upload(projres.path, object_prefix=object_prefix, exclude=exclude, overwrite=True,
								start_callback=start_callback)

			if config.results.purge_after_upload:
				log.info("Purging project files ...")
				shutil.rmtree(projres.path)

		finally:
			if os.path.exists(temp_path):
				shutil.rmtree(temp_path)
Exemple #2
0
	def execute(self):
		workspace_path = os.path.join(self.results_path, self.args.workspace)
		projects_path = os.path.join(workspace_path, "projects")
		qc_path = os.path.join(workspace_path, "quality_control")

		self.log.info("Preparing output path at {} ...".format(os.path.relpath(qc_path, self.runtime_path)))

		self.create_output_path(qc_path)

		loader = ChoiceLoader([
			PackageLoader(__name__, 'qc_templates'),
			FileSystemLoader(os.path.join(self.root_path, "web", "templates"))
		])

		env = Environment(loader=loader, autoescape=False)
		env.globals.update(
			version=VERSION,
			creation_date=str(datetime.now())
		)

		env.filters["tojson"] = tojson

		self.log.info("Looking for projects ...")

		projects = []
		quality_controls = []

		for project_path in sorted(os.listdir(projects_path)):
			project_path = os.path.join(projects_path, project_path)
			proj_res = ProjectResults(path=project_path)
			project = proj_res.load_def()
			quality = proj_res.load_quality_control()
			projects += [project]
			quality_controls += [quality]

		self.log.info("Processing {} projects ...".format(len(projects)))

		for i, project in enumerate(projects):
			self.log.info("Processing report for project {} ...".format(project["id"]))

			quality = quality_controls[i]

			t = env.get_template("project.html")

			context=dict(
				index=i,
				project=project,
				quality=quality,
				projects=projects)

			t.stream(context).dump(os.path.join(qc_path, "project_{}.html".format(project["id"])))

		self.log.info("Generating index.html ...")

		t = env.get_template("index.html")

		t.stream(projects=projects).dump(os.path.join(qc_path, "index.html"))
Exemple #3
0
def variants(project):
	log = task.logger

	log.info("--- [{0}] --------------------------------------------".format(project["id"]))

	log.info("Calculating number of variants processed in each step ...")

	proj_res = ProjectResults(project)

	projdb = ProjectDb(project["db"])

	counts = projdb.count_variants()

	proj_res.save_quality_control("variants", counts)

	projdb.close()
Exemple #4
0
def __load_case_info(case, except_enabled=False, quality_enabled=False):
	wok = current_app.wok
	case_info = dict(
		id=case.id,
		name=case.name,
		state=case.state.title if case.state is not None else None,created=case.created,
		started=case.started,
		finished=case.finished,
		removed=case.removed)

	if case.state is not None:
		case_info["state"] = case.state.title

	if case.properties is not None:
		conf = get_project_conf()
		results_path = get_results_path(conf)
		project_path = os.path.join(results_path, case.properties["path"])

		quality = None
		if quality_enabled:
			project_results = ProjectResults(path=project_path)
			quality = project_results.load_quality_control()

		zip_path = os.path.join(project_path, "results.zip")
		website_path = os.path.join(project_path, "website", "results", "project.tsv")
		case_info.update(
			analysis_type=case.properties["analysis_type"],
			results_available=os.path.exists(zip_path),
			website_available=os.path.exists(website_path),
			quality=quality if quality is not None and len(quality) > 0 else None)

	engine_case = wok.engine.case(case.engine_name)
	if engine_case is not None:
		exceptions = None
		if except_enabled:
			exceptions = __engine_case_exceptions(engine_case)

		case_info.update(
			state=engine_case.state.title,
			started=engine_case.started,
			finished=engine_case.finished,
			elapsed=engine_case.elapsed,
			progress=__engine_case_progress(engine_case),
			exceptions=exceptions)

	return case_info
Exemple #5
0
def oncodrivefm(project):
    log = task.logger
    conf = task.conf

    log.info("--- [{0}] --------------------------------------------".format(project["id"]))

    config = GlobalConfig(conf)
    paths = PathsConfig(config)  # avoid that project conf override path configurations
    config = GlobalConfig(conf, project["conf"])

    ofm = OncodriveFm(config.oncodrivefm, paths, log)

    log.info("Calculating quality indicators for OncodriveFM ...")

    qc_data = quality_control(log, config, project, ofm)

    project_results = ProjectResults(project)
    project_results.save_quality_control("oncodrivefm", qc_data)
Exemple #6
0
def pack_datasets(project):
	log = task.logger

	config = GlobalConfig(task.conf)

	project_id = project["id"]

	log.info("--- [{0}] --------------------------------------------".format(project_id))

	if not config.results.create_zip:
		log.info("Creation of the results compressed file is deactivated. Skipped.")
		return

	project_path = project["path"]
	temp_path = project["temp_path"]

	dest_path = os.path.join(project_path, "results.zip")

	sigdb = SigDb(config.sigdb_path)
	sigdb.open()

	projdb = ProjectDb(project["db"])

	projres = ProjectResults(project)

	gene_sym = projdb.get_gene_symbols()

	total_samples = projdb.get_total_affected_samples()

	log.info("Compressing files ...")

	arc = None
	try:
		arc = Archive(dest_path, mode="w", fmt="zip")

		log.info("  Variant genes ...")

		with ArcFile(task, arc, project_id, "variant_genes", "w") as vf:
			write_line(vf, "PROJECT_ID", "CHR", "STRAND", "START", "ALLELE",
							"GENE_ID", "SYMBOL", "VAR_IMPACT", "VAR_IMPACT_DESC",
							"SAMPLE_FREQ", "SAMPLE_TOTAL", "SAMPLE_PROP",
							"CODING_REGION", "PROTEIN_CHANGES", "INTOGEN_DRIVER", "XREFS")

			for afg in projdb.affected_genes(join_variant=True, join_xrefs=True, join_rec=True):
				var = afg.var
				rec = afg.rec

				start, end, ref, alt = var_to_tab(var)

				xrefs = [xref for xref in var.xrefs]
				if sigdb.exists_variant(var.chr, start):
					xrefs += ["I:1"]
				xrefs = ",".join(xrefs)

				intogen_driver = 1 if sigdb.exists_gene(afg.gene_id) else 0

				write_line(vf, project_id, var.chr, var.strand, start, "{0}/{1}".format(ref, alt),
								afg.gene_id, gene_sym.get(afg.gene_id),
								afg.impact, TransFIC.class_name(afg.impact),
								rec.sample_freq or 0, total_samples, rec.sample_prop or 0,
								afg.coding_region, afg.prot_changes, intogen_driver, xrefs)

		log.info("  Variant samples ...")

		with ArcFile(task, arc, project_id, "variant_samples", "w") as vf:
			write_line(vf, "PROJECT_ID", "CHR", "STRAND", "START", "ALLELE", "SAMPLES")

			for var in projdb.variants(join_samples=True):
				start, end, ref, alt = var_to_tab(var)
				write_line(vf, project_id, var.chr, var.strand, start, "{0}/{1}".format(ref, alt),
						   ",".join([s.name for s in var.samples]))

		log.info("  Consequences ...")

		with ArcFile(task, arc, project_id, "consequences", "w") as cf:
			write_line(cf, "PROJECT_ID", "CHR", "STRAND", "START", "ALLELE", "TRANSCRIPT_ID", "CT",
					   		"GENE_ID", "SYMBOL", "UNIPROT_ID", "PROTEIN_ID", "PROTEIN_POS", "AA_CHANGE",
							"SIFT_SCORE", "SIFT_TRANSFIC", "SIFT_TRANSFIC_CLASS",
							"PPH2_SCORE", "PPH2_TRANSFIC", "PPH2_TRANSFIC_CLASS",
							"MA_SCORE", "MA_TRANSFIC", "MA_TRANSFIC_CLASS",
							"IMPACT", "IMPACT_CLASS")

			for csq in projdb.consequences(join_variant=True):
				var = csq.var
				start, end, ref, alt = var_to_tab(var)
				allele = "{0}/{1}".format(ref, alt)

				uniprot = protein = protein_pos = aa_change = None
				sift_score = sift_tfic = sift_tfic_class = None
				pph2_score = pph2_tfic = pph2_tfic_class = None
				ma_score = ma_tfic = ma_tfic_class = None
		
				if so.match(csq.ctypes, so.ONCODRIVEFM):
					uniprot, protein = csq.uniprot, csq.protein
		
				if so.match(csq.ctypes, so.NON_SYNONYMOUS):
					protein_pos, aa_change = csq.protein_pos, csq.aa_change
					sift_score, sift_tfic, sift_tfic_class = csq.sift_score, csq.sift_tfic, TransFIC.class_name(csq.sift_tfic_class)
					pph2_score, pph2_tfic, pph2_tfic_class = csq.pph2_score, csq.pph2_tfic, TransFIC.class_name(csq.pph2_tfic_class)
					ma_score, ma_tfic, ma_tfic_class = csq.ma_score, csq.ma_tfic, TransFIC.class_name(csq.ma_tfic_class)

				write_line(cf, project_id, var.chr, var.strand, start, allele, csq.transcript,
							",".join(csq.ctypes), csq.gene, gene_sym.get(csq.gene),
							uniprot, protein, protein_pos, aa_change,
							sift_score, sift_tfic, sift_tfic_class,
							pph2_score, pph2_tfic, pph2_tfic_class,
							ma_score, ma_tfic, ma_tfic_class,
							csq.impact, TransFIC.class_name(csq.impact))

		log.info("  Genes ...")

		with ArcFile(task, arc, project_id, "genes", "w") as gf:
			write_line(gf, "PROJECT_ID", "GENE_ID", "SYMBOL", "FM_PVALUE", "FM_QVALUE", "FM_EXC_CAUSE",
							"SAMPLE_FREQ", "SAMPLE_TOTAL", "SAMPLE_PROP",
							"CLUST_ZSCORE", "CLUST_PVALUE", "CLUST_QVALUE", "CLUST_EXC_CAUSE", "CLUST_COORDS",
							"INTOGEN_DRIVER", "XREFS")

			for gene in projdb.genes(join_xrefs=True, join_rec=True):
				if gene.rec.sample_freq is not None and gene.rec.sample_freq > 0:
					intogen_driver = 1 if sigdb.exists_gene(gene.id) else 0
					write_line(gf, project_id, gene.id, gene.symbol, gene.fm_pvalue, gene.fm_qvalue, gene.fm_exc_cause,
									gene.rec.sample_freq, total_samples, gene.rec.sample_prop or 0,
									gene.clust_zscore, gene.clust_pvalue, gene.clust_qvalue, gene.clust_exc_cause,
									gene.clust_coords, intogen_driver, ",".join(gene.xrefs))

		log.info("  Pathways ...")

		with ArcFile(task, arc, project_id, "pathways", "w") as pf:
			write_line(pf, "PROJECT_ID", "PATHWAY_ID", "GENE_COUNT", "FM_ZSCORE", "FM_PVALUE", "FM_QVALUE",
							"SAMPLE_FREQ", "SAMPLE_TOTAL", "SAMPLE_PROP", "GENE_FREQ", "GENE_TOTAL", "GENE_PROP")

			for pathway in projdb.pathways(join_rec=True):
				if pathway.rec.sample_freq is not None and pathway.rec.sample_freq > 0:
					write_line(pf, project_id, pathway.id, pathway.gene_count, pathway.fm_zscore, pathway.fm_pvalue, pathway.fm_qvalue,
									pathway.rec.sample_freq or 0, total_samples, pathway.rec.sample_prop or 0,
									pathway.rec.gene_freq or 0, pathway.gene_count, pathway.rec.gene_prop or 0)

		if not config.skip_oncodrivefm:

			log.info("  Genes per sample functional impact ...")

			with ArcFile(task, arc, project_id, "fimpact.gitools.tdm", "w") as f:
				write_line(f, "SAMPLE", "GENE_ID",
						   "SIFT_SCORE", "SIFT_TRANSFIC", "SIFT_TRANSFIC_CLASS",
						   "PPH2_SCORE", "PPH2_TRANSFIC", "PPH2_TRANSFIC_CLASS",
						   "MA_SCORE", "MA_TRANSFIC", "MA_TRANSFIC_CLASS")
				for fields in projdb.sample_gene_fimpacts():
					(gene, sample,
						 sift_score, sift_tfic, sift_tfic_class,
						 pph2_score, pph2_tfic, pph2_tfic_class,
						 ma_score, ma_tfic, ma_tfic_class) = fields
					write_line(f, sample, gene,
							   sift_score, sift_tfic, TransFIC.class_name(sift_tfic_class),
							   pph2_score, pph2_tfic, TransFIC.class_name(pph2_tfic_class),
							   ma_score, ma_tfic, TransFIC.class_name(ma_tfic_class))

		log.info("Saving project configuration ...")

		with ArcFile(task, arc, project_id, "project", "w") as f:
			names = ["PROJECT_ID", "ASSEMBLY", "SAMPLES_TOTAL"]
			values = [project_id, project["assembly"], total_samples]
			names, values = projres.get_annotations_to_save(config.project.annotations, project["annotations"], names=names, values=values)
			tsv.write_line(f, *names)
			tsv.write_line(f, *values, null_value="-")
	finally:
		if arc is not None:
			arc.close()
		projdb.close()
		sigdb.close()
def oncodriveclust(project):
	log = task.logger
	conf = task.conf

	log.info("--- [{0}] --------------------------------------------".format(project["id"]))

	config = GlobalConfig(conf)
	paths = PathsConfig(config) # avoid that project conf override path configurations
	config = GlobalConfig(conf, project["conf"])

	oclust = OncodriveClust(config.oncodriveclust, paths, log)

	source_genes = {}
	syn_genes = set()
	selected_genes = set()
	filter_genes = set()
	threshold_genes = set()

	source_samples = {}
	selected_samples = set()
	filter_samples = set()
	threshold_samples = set()

	selected_gene_sample_count = {} # number of samples for each selected gene
	filter_gene_sample_count = {} # number of samples per each gene passing the filter

	log.info("Retrieving gene alterations ...")

	projdb = ProjectDb(project["db"])

	data = set()

	for csq in projdb.consequences(join_samples=True):
								   #filters={ProjectDb.CSQ_CTYPES : so.PROTEIN_AFFECTING | so.SYNONYMOUS}):

		is_selected = so.match(csq.ctypes, so.ONCODRIVECLUST)
		is_synonymous = so.match(csq.ctypes, so.SYNONYMOUS)

		if csq.gene not in source_genes:
			source_genes[csq.gene] = gene_index = len(source_genes)

		if is_selected:
			selected_genes.add(gene_index)

		if is_synonymous:
			syn_genes.add(gene_index)

		for sample in csq.var.samples:
			if sample.name not in source_samples:
				source_samples[sample.name] = sample_index = len(source_samples)

			if is_selected:
				selected_samples.add(sample_index)
				data.add((csq.gene, sample_index))

	projdb.close()

	log.info("Counting selected, filtered and threshold ...")

	# calculate selected and filter counts

	data2 = set()

	for gene, sample_index in data:
		gene_index = source_genes[gene]
		if gene_index not in selected_gene_sample_count:
			selected_gene_sample_count[gene_index] = 1
		else:
			selected_gene_sample_count[gene_index] += 1

		if oclust.filter.valid(gene):
			data2.add((gene_index, sample_index))
			filter_genes.add(gene_index)
			filter_samples.add(sample_index)
			if gene_index not in filter_gene_sample_count:
				filter_gene_sample_count[gene_index] = 1
			else:
				filter_gene_sample_count[gene_index] += 1

	# calculate threshold counts

	for gene_index, sample_index in data2:
		if selected_gene_sample_count[gene_index] >= oclust.samples_threshold:
			threshold_genes.add(gene_index)
			threshold_samples.add(sample_index)

	log.info("Counting significant genes ...")

	# significance of q-values

	projdb = ProjectDb(project["db"])
	sig_thresholds = [0.0, 0.001, 0.005] + [i / 100.0 for i in range(1, 11)] + [1.0]
	sig_count = [0] * len(sig_thresholds)
	for gene in projdb.genes():
		if gene.id in source_genes and source_genes[gene.id] in threshold_genes:
			i = 0
			while i < len(sig_thresholds) and gene.fm_qvalue > sig_thresholds[i]:
				i += 1

			for j in range(i, len(sig_count)):
				sig_count[j] += 1

	projdb.close()

	source_genes_count = len(source_genes)
	syn_genes_count = len(syn_genes)
	selected_genes_count = len(selected_genes)
	filter_genes_count = len(filter_genes)
	threshold_genes_count = len(threshold_genes)

	source_samples_count = len(source_samples)
	selected_samples_count = len(selected_samples)
	filter_samples_count = len(filter_samples)
	threshold_samples_count = len(threshold_samples)

	sorted_filter_genes = sorted(filter_genes, reverse=True, key=lambda gi: filter_gene_sample_count[gi])

	qc_data = dict(
			source=dict(
				genes=sorted(source_genes.keys(), key=lambda k: source_genes[k]),
				genes_count=source_genes_count,
				genes_lost_count=max(0, source_genes_count - syn_genes_count - threshold_genes_count),
				samples=sorted(source_samples.keys(), key=lambda k: source_samples[k]),
				samples_count=source_samples_count),
				samples_lost_count=max(0, source_samples_count - threshold_samples_count),
			synonymous=dict(
				genes=sorted(syn_genes),
				genes_count=syn_genes_count,
				ratio=(float(syn_genes_count) / selected_genes_count) if selected_genes_count > 0 else 0),
			selected=dict(
				genes=sorted(selected_genes),
				genes_count=selected_genes_count,
				genes_lost=sorted(set(source_genes.values()) - syn_genes - selected_genes),
				genes_lost_count=max(0, source_genes_count - syn_genes_count - selected_genes_count),
				samples=sorted(selected_samples),
				samples_count=selected_samples_count,
				samples_lost=sorted(set(source_samples.values()) - selected_samples),
				samples_lost_count=max(0, source_samples_count - selected_samples_count)),
			filter=dict(
				genes=sorted_filter_genes,
				genes_count=filter_genes_count,
				genes_lost=sorted(selected_genes - filter_genes),
				genes_lost_count=max(0, selected_genes_count - filter_genes_count),
				genes_sample_count=[filter_gene_sample_count[gene_index] for gene_index in sorted_filter_genes],
				samples=sorted(filter_samples),
				samples_count=filter_samples_count,
				samples_lost=sorted(selected_samples - filter_samples),
				samples_lost_count=max(0, selected_samples_count - filter_samples_count)),
			threshold=dict(
				genes=sorted(threshold_genes),
				genes_count=threshold_genes_count,
				genes_lost=sorted(filter_genes - threshold_genes),
				genes_lost_count=max(0, filter_genes_count - threshold_genes_count),
				samples=sorted(threshold_samples),
				samples_count=threshold_samples_count,
				samples_threshold=oclust.samples_threshold,
				samples_lost=sorted(filter_samples - threshold_samples),
				samples_lost_count=max(0, filter_samples_count - threshold_samples_count)),
			results=dict(
				sig_thresholds=sig_thresholds[1:],
				sig_count=sig_count[1:])
			)

	project_results = ProjectResults(project)
	project_results.save_quality_control("oncodriveclust", qc_data)
def init_project(logger, config, paths, storage, project):
	project_id = project["id"]

	results_path = paths.results_path()
	project_path = paths.project_path(project_id)
	project_temp_path = paths.project_temp_path(project_path)

	if config.results.purge_on_start:
		logger.info("  Purging previous results ...")
		if os.path.isdir(project_path):
			logger.info("    {} ...".format(os.path.relpath(project_path, results_path)))
			shutil.rmtree(project_path)
		#if os.path.isdir(project_temp_path):
		#	logger.info("    {} ...".format(os.path.relpath(project_temp_path, results_path)))
		#	shutil.rmtree(project_temp_path)

		for obj_name in storage.list_objects(prefix="results/"):
			logger.info("    {} ...".format(obj_name))
			storage.delete_object("results/{}".format(obj_name))

	ensure_path_exists(project_path)
	ensure_path_exists(project_temp_path)

	projdb_path = os.path.join(project_path, "project.db")

	if "annotations" in project:
		annotations = project["annotations"]
		if not Data.is_element(annotations):
			logger.warn("Overriding project annotations field with an empty dictionary")
			project["annotations"] = annotations = Data.element()
	else:
		project["annotations"] = annotations = Data.element()

	# for backward compatibility
	for key in project.keys():
		if key not in ["id", "assembly", "files", "storage_objects", "annotations", "conf", "oncodriveclust", "oncodrivefm"]:
			value = project[key]
			del project[key]
			annotations[key] = value

	project["conf"] = pconf = project.get("conf") or Data.element()
	if not Data.is_element(pconf):
		logger.warn("Overriding project conf field with an empty dictionary")
		project["conf"] = pconf = Data.element()

	# for backward compatibility
	for key in project.keys():
		if key in ["oncodriveclust", "oncodrivefm"]:
			value = project[key]
			del project[key]
			pconf[key] = value

	project["path"] = project_path
	project["temp_path"] = project_temp_path
	project["db"] = projdb_path

	if "assembly" not in project:
		project["assembly"] = DEFAULT_ASSEMBLY

	missing_objects = []

	for obj_name in project["storage_objects"]:
		if not storage.exists_object(obj_name):
			missing_objects += [obj_name]

	if len(missing_objects) > 0:
		raise InternalError("Project {0} references some missing objects:\n{1}".format(project_id, "\n".join(missing_objects)))

	project["files"] = [str(f) for f in project["files"]] #unicode is not json serializable
	project["storage_objects"] = [str(f) for f in project["storage_objects"]] #unicode is not json serializable

	project = project.to_native()

	# save project.conf
	projres = ProjectResults(project)
	projres.save_def()

	return project
Exemple #9
0
def create_datasets(project):
	log = task.logger

	config = GlobalConfig(task.conf)
	paths = PathsConfig(config)

	project_id = project["id"]

	log.info("--- [{0}] --------------------------------------------".format(project_id))

	project_path = project["path"]
	temp_path = project["temp_path"]

	datasets_path = paths.project_results_path(project_path)
	ensure_path_exists(datasets_path)

	sigdb = SigDb(config.sigdb_path)
	sigdb.open()

	projdb = ProjectDb(project["db"])

	gene_sym = projdb.get_gene_symbols()

	total_samples = projdb.get_total_affected_samples()

	log.info("Exporting variant genes ...")

	vf = open_dataset(project_id, project_path, datasets_path, "variant_gene", "w", log)
	tsv.write_param(vf, "SAMPLE_TOTAL", total_samples)
	tsv.write_line(vf, "VAR_ID", "CHR", "STRAND", "START", "ALLELE",
					"GENE_ID", "IMPACT", "IMPACT_CLASS",
					"SAMPLE_FREQ", "SAMPLE_PROP",
					"CODING_REGION", "PROTEIN_CHANGES", "INTOGEN_DRIVER", "XREFS")

	sf = open_dataset(project_id, project_path, datasets_path, "variant-samples", "w", log)
	tsv.write_line(sf, "VAR_ID", "CHR", "STRAND", "START", "ALLELE", "SAMPLE")

	count = 0
	for afg in projdb.affected_genes(join_variant=True, join_samples=True, join_xrefs=True, join_rec=True):
		var = afg.var
		rec = afg.rec

		start, end, ref, alt = var_to_tab(var)

		allele = "{0}/{1}".format(ref, alt)

		xrefs = [xref for xref in var.xrefs]
		if sigdb.exists_variant(var.chr, start):
			xrefs += ["I:1"]
		xrefs = ",".join(xrefs)

		intogen_driver = 1 if sigdb.exists_gene(afg.gene_id) else 0

		tsv.write_line(vf, var.id, var.chr, var.strand, start, allele,
						afg.gene_id, afg.impact, TransFIC.class_name(afg.impact),
						rec.sample_freq, rec.sample_prop,
						afg.coding_region, afg.prot_changes, intogen_driver, xrefs, null_value="\N")

		for sample in var.samples:
			tsv.write_line(sf, var.id, var.chr, var.strand, start, allele, sample.name, null_value="\N")

		count += 1

	vf.close()
	sf.close()

	log.info("  {0} variant genes".format(count))

	log.info("Exporting consequences ...")

	cf = open_dataset(project_id, project_path, datasets_path, "consequence", "w", log)
	tsv.write_line(cf, "VAR_ID", "CHR", "STRAND", "START", "ALLELE", "TRANSCRIPT_ID",
				   "CT", "GENE_ID", "SYMBOL", "UNIPROT_ID", "PROTEIN_ID", "PROTEIN_POS", "AA_CHANGE",
					"SIFT_SCORE", "SIFT_TRANSFIC", "SIFT_TRANSFIC_CLASS",
					"PPH2_SCORE", "PPH2_TRANSFIC", "PPH2_TRANSFIC_CLASS",
					"MA_SCORE", "MA_TRANSFIC", "MA_TRANSFIC_CLASS",
					"IMPACT", "IMPACT_CLASS")

	count = 0
	for csq in projdb.consequences(join_variant=True):
		var = csq.var
		start, end, ref, alt = var_to_tab(var)

		allele = "{0}/{1}".format(ref, alt)

		uniprot = protein = protein_pos = aa_change = None
		sift_score = sift_tfic = sift_tfic_class = None
		pph2_score = pph2_tfic = pph2_tfic_class = None
		ma_score = ma_tfic = ma_tfic_class = None

		if so.match(csq.ctypes, so.ONCODRIVEFM):
			uniprot, protein = csq.uniprot, csq.protein

		if so.match(csq.ctypes, so.NON_SYNONYMOUS):
			protein_pos, aa_change = csq.protein_pos, csq.aa_change
			sift_score, sift_tfic, sift_tfic_class = csq.sift_score, csq.sift_tfic, TransFIC.class_name(csq.sift_tfic_class)
			pph2_score, pph2_tfic, pph2_tfic_class = csq.pph2_score, csq.pph2_tfic, TransFIC.class_name(csq.pph2_tfic_class)
			ma_score, ma_tfic, ma_tfic_class = csq.ma_score, csq.ma_tfic, TransFIC.class_name(csq.ma_tfic_class)

		tsv.write_line(cf, var.id, var.chr, var.strand, start, allele, csq.transcript,
						",".join(csq.ctypes), csq.gene, gene_sym.get(csq.gene),
						uniprot, protein, protein_pos, aa_change,
						sift_score, sift_tfic, sift_tfic_class,
						pph2_score, pph2_tfic, pph2_tfic_class,
						ma_score, ma_tfic, ma_tfic_class,
						csq.impact, TransFIC.class_name(csq.impact), null_value="\N")
		count += 1

	cf.close()

	log.info("  {0} consequences".format(count))

	log.info("Exporting genes ...")

	gf = open_dataset(project_id, project_path, datasets_path, "gene", "w", log)
	tsv.write_param(gf, "SAMPLE_TOTAL", total_samples)
	tsv.write_line(gf, "GENE_ID", "FM_PVALUE", "FM_QVALUE", "FM_EXC_CAUSE",
				   "CLUST_ZSCORE", "CLUST_PVALUE", "CLUST_QVALUE", "CLUST_EXC_CAUSE", "CLUST_COORDS",
				   "SAMPLE_FREQ", "SAMPLE_PROP", "INTOGEN_DRIVER")


	for gene in projdb.genes(join_rec=True):
		rec = gene.rec

		if rec.sample_freq is None or rec.sample_freq == 0:
			continue

		intogen_driver = 1 if sigdb.exists_gene(gene.id) else 0

		tsv.write_line(gf, gene.id, gene.fm_pvalue, gene.fm_qvalue, gene.fm_exc_cause,
					   gene.clust_zscore, gene.clust_pvalue, gene.clust_qvalue, gene.clust_exc_cause, gene.clust_coords,
					   rec.sample_freq or 0, rec.sample_prop or 0,
					   intogen_driver, null_value="\N")

	gf.close()

	log.info("Exporting pathways ...")

	pf = open_dataset(project_id, project_path, datasets_path, "pathway", "w", log)
	tsv.write_param(pf, "SAMPLE_TOTAL", total_samples)
	tsv.write_line(pf, "PATHWAY_ID", "GENE_COUNT", "FM_ZSCORE", "FM_PVALUE", "FM_QVALUE",
				   "SAMPLE_FREQ", "SAMPLE_PROP", "GENE_FREQ", "GENE_TOTAL", "GENE_PROP")

	for pathway in projdb.pathways(join_rec=True):
		rec = pathway.rec

		if rec.sample_freq is None or rec.sample_freq == 0:
			continue

		tsv.write_line(pf, pathway.id, pathway.gene_count, pathway.fm_zscore, pathway.fm_pvalue, pathway.fm_qvalue,
						rec.sample_freq or 0, rec.sample_prop or 0, rec.gene_freq or 0, pathway.gene_count, rec.gene_prop or 0, null_value="\N")

	pf.close()

	if not config.skip_oncodrivefm:

		log.info("Exporting genes per sample functional impact ...")

		with open_dataset(project_id, project_path, datasets_path, "gene_sample-fimpact", "w", log) as f:
			tsv.write_line(f, "GENE_ID", "SAMPLE",
					   "SIFT_SCORE", "SIFT_TRANSFIC", "SIFT_TRANSFIC_CLASS",
					   "PPH2_SCORE", "PPH2_TRANSFIC", "PPH2_TRANSFIC_CLASS",
					   "MA_SCORE", "MA_TRANSFIC", "MA_TRANSFIC_CLASS")

			for fields in projdb.sample_gene_fimpacts():
				(gene, sample,
					sift_score, sift_tfic, sift_tfic_class,
					pph2_score, pph2_tfic, pph2_tfic_class,
					ma_score, ma_tfic, ma_tfic_class) = fields
				tsv.write_line(f, gene, sample,
						   sift_score, sift_tfic, TransFIC.class_name(sift_tfic_class),
						   pph2_score, pph2_tfic, TransFIC.class_name(pph2_tfic_class),
						   ma_score, ma_tfic, TransFIC.class_name(ma_tfic_class), null_value="\N")

	projdb.close()

	sigdb.close()

	log.info("Saving project configuration ...")

	projres = ProjectResults(project)

	with open_dataset(project_id, project_path, datasets_path, "project.tsv", "w", log) as f:
		names = ["ASSEMBLY", "SAMPLES_TOTAL"]
		values = [project["assembly"], total_samples]
		names, values = projres.get_annotations_to_save(config.project.annotations, project["annotations"], names=names, values=values)
		tsv.write_line(f, *names)
		tsv.write_line(f, *values, null_value="\N")

	projects_port = task.ports("projects_out")
	projects_port.send(project)