def update_db(project): log = task.logger conf = task.conf projects_out_port = task.ports("projects_out") project_id = project["id"] log.info("--- [{0}] --------------------------------------------".format(project_id)) oclust = project["oncodriveclust"] del project["oncodriveclust"] if not os.path.exists(oclust["results"]): log.warn("No results have been found. Skipping it.") return log.info("Updating the project database ...") projdb = ProjectDb(project["db"]) exc_path = os.path.join(project["temp_path"], "oncodriveclust-excluded-cause.tsv") log.info(" Excluded gene causes ...") log.debug(" > {0}".format(exc_path)) count = 0 with tsv.open(exc_path, "r") as exf: for gene, cause in tsv.lines(exf, (str, str), header=True): projdb.update_gene(Gene(id=gene, clust_exc_cause=cause)) count += 1 log.debug(" {0} genes excluded".format(count)) log.info(" OncodriveCLUST results ...") with tsv.open(oclust["results"], "r") as f: types = (str, str, float, float, float) columns = ("GENE", "CLUST_COORDS", "ZSCORE", "PVALUE", "QVALUE") for gene, coords, zscore, pvalue, qvalue in tsv.lines(f, types, columns=columns, header=True, null_value="NA"): projdb.update_gene(Gene(id=gene, clust_coords=coords, clust_zscore=zscore, clust_pvalue=pvalue, clust_qvalue=qvalue, clust_exc_cause=ProjectDb.NO_GENE_EXC)) projdb.commit() projdb.close() projects_out_port.send(project)
def end(): log = task.logger projects_out_port = task.ports("projects_out") log.info("Updating the projects database ...") for project_id, projects in task.context.items(): log.info("[{0}]".format(project_id)) for index, project in enumerate(projects): projdb = ProjectDb(project["db"]) if index == 0: log.info(" Functional impact ...") projdb.delete_sample_gene_fimpact() with tsv.open(project["sample_gene_fi_data"], "r") as f: types = (int, str, float, float, int, float, float, int, float, float, int) for fields in tsv.lines(f, types, header=True, null_value="-"): projdb.add_sample_gene_fimpact(*fields) ofm = project["oncodrivefm"] del project["oncodrivefm"] exc_path = os.path.join(project["temp_path"], "oncodrivefm-excluded-cause.tsv") log.info(" Excluded gene causes ...") log.debug(" > {0}".format(exc_path)) count = 0 with tsv.open(exc_path, "r") as exf: for gene, cause in tsv.lines(exf, (str, str), header=True): projdb.update_gene(Gene(id=gene, fm_exc_cause=cause)) count += 1 log.debug(" {0} genes excluded".format(count)) for feature, results_path in ofm: log.info(" {0} ...".format(feature)) log.debug(" > {0}".format(results_path)) if feature == "genes": with tsv.open(results_path, "r") as f: count = 0 for gene, pvalue, qvalue in tsv.lines(f, (str, float, float), header=True): projdb.update_gene( Gene(id=gene, fm_pvalue=pvalue, fm_qvalue=qvalue, fm_exc_cause=ProjectDb.NO_GENE_EXC) ) count += 1 log.info(" {0} genes".format(count)) elif feature == "pathways": with tsv.open(results_path, "r") as f: count = 0 for pathway, zscore, pvalue, qvalue in tsv.lines(f, (str, float, float, float), header=True): projdb.update_pathway( Pathway(id=pathway, fm_zscore=zscore, fm_pvalue=pvalue, fm_qvalue=qvalue) ) count += 1 log.info(" {0} pathways".format(count)) projdb.commit() projdb.close() projects_out_port.send(projects[0])