def __init__(self): RunCommand.__init__(self, args_usage="<variants-file> [<variants-file> ...]", epilog=PROJECT_EPILOG) self.variants_files = [] # Gather variants files from arguments for var_file in self.args.files: if not os.path.isabs(var_file): var_file = os.path.join(os.getcwd(), var_file) if not os.path.exists(var_file): self.log.error("Variants file not found: {}".format(var_file)) exit(-1) if not os.path.isfile(var_file): self.log.error("A file is expected: {}".format(var_file)) exit(-1) self.variants_files += [var_file] # Get project id if self.args.project_id is None: self.log.error("Project identifier not specified.") exit(-1) self.project_id = normalize_id(self.args.project_id)
def datasets(projects_set): log = task.logger config = GlobalConfig(task.conf) paths = PathsConfig(config) classifier, projects = projects_set classifier_id = classifier["id"] group_values = classifier["group_values"] short_values = classifier["group_short_values"] long_values = classifier["group_long_values"] group_name = classifier["group_name"] group_short_name = classifier["group_short_name"] group_long_name = classifier["group_long_name"] group_file_prefix = normalize_id(classifier_id) log.info("--- [{0} ({1}) ({2}) ({3})] {4}".format( classifier["name"], group_long_name, group_short_name, group_name, "-" * 30)) log.info("Reading number of samples per project ...") project_ids = [] total_samples = 0 for project in projects: project_id = project["id"] project_ids += [project_id] log.info(" Project {0}".format(project["id"])) projdb = ProjectDb(project["db"]) num_samples = projdb.get_total_affected_samples() total_samples += num_samples log.debug(" {0} samples".format(num_samples)) projdb.close() log.debug(" {0} samples in total".format(total_samples)) log.info("Updating ...") combination_path = paths.combination_path() path = os.path.join(combination_path, "{0}.tsv".format(group_file_prefix)) if not os.path.exists(path): with open(path, "w") as f: tsv.write_line(f, "NAME", "SHORT_NAME", "LONG_NAME", "SAMPLES_TOTAL", "PROJECT_IDS") with open(path, "a") as f: tsv.write_line(f, group_name, group_short_name, group_long_name, total_samples, ",".join(project_ids))
def __init__(self, args_usage="", epilog="", logger_name=None): Command.__init__(self, args_usage, epilog, logger_name) signal.signal(signal.SIGINT, keyboardinterrupt_handler) signal.signal(signal.SIGTERM, keyboardinterrupt_handler) ''' # Override configuration path if required if self.args.conf_path is not None: self.conf_path = os.path.abspath(self.args.conf_path) ''' # Determine required and user configuration files and data self.engine_conf_args = ConfArgs(self.log, self.conf_path, self.args.engine_conf_files, self.args.engine_conf_data, self.DEFAULT_CONF_FILES, self.DEFAULT_REQUIRED_CONF) self.engine_conf_builder = self.engine_conf_args.conf_builder self.case_conf_args = ConfArgs(self.log, self.conf_path, self.args.case_conf_files, self.args.case_conf_data) self.case_conf_builder = self.case_conf_args.conf_builder # Workspace self.workspace = self.args.workspace # Case name self.case_name = self.args.case_name if self.case_name is not None: self.case_name = normalize_id(self.case_name) # max cores if self.args.max_cores is None: self.max_cores = 0 else: self.max_cores = self.args.max_cores
def execute(self): # Gather scan paths from arguments scan_paths = [] for scan_path in self.args.paths: if not os.path.isabs(scan_path): scan_path = os.path.join(os.getcwd(), scan_path) if not os.path.exists(scan_path): self.log.error("Path not found: {}".format(scan_path)) exit(-1) scan_paths += [scan_path] # Gather includes and excludes from options includes = [] if self.args.include is not None: for inc in self.args.include: includes += ["^{0}$".format(re.escape(inc))] if self.args.include_regex is not None: for inc_regex in self.args.include_regex: includes += [inc_regex] if self.args.include_from is not None: for file in self.args.include_from: with open(file, "r") as f: for line in f: line = line.strip() if line.startswith("#") or len(line) == 0: continue includes += ["^{0}$".format(re.escape(line))] if len(includes) == 0: includes = ["^.*$"] excludes = [] if self.args.exclude is not None: for exc in self.args.exclude: excludes += ["^{0}$".format(re.escape(exc))] if self.args.exclude_regex is not None: for exc_regex in self.args.exclude_regex: excludes += [exc_regex] if self.args.exclude_from is not None: for file in self.args.exclude_from: with open(file, "r") as f: for line in f: line = line.strip() if line.startswith("#") or len(line) == 0: continue excludes += ["^{0}$".format(re.escape(line))] # compile regular expressions includes = [re.compile(inc) for inc in includes] excludes = [re.compile(exc) for exc in excludes] # scan paths self.projects = [] project_ids = set() file_object = {} self.log.info("Looking for data projects ...") for scan_path in scan_paths: for path, project in list_projects(self.log, scan_path): if "id" not in project: self.log.warn("Discarding project missing 'id': {0}".format(path)) continue if "files" not in project: self.log.warn("Discarding project missing 'files': {0}".format(path)) continue project["id"] = normalize_id(project["id"]) project_id = project["id"] if "name" in project: project_name = ": " + project["name"] else: project_name = "" if match_id(project_id, includes) and not match_id(project_id, excludes): if project_id in project_ids: self.log.error("Duplicated project id at {0}".format(path)) exit(-1) self.log.info(" {0}{1} (included)".format(project_id, project_name)) project = init_project_files(project, os.path.dirname(path), file_object) self.projects += [project] else: self.log.info(" {0}{1} (excluded)".format(project_id, project_name)) # Create the wok engine and the workflow instance self.case_conf_builder.add_value("projects", self.projects) self._wok_run(MUTATIONS_FLOW_NAME, container="{}-{}".format(self.user_id, self.workspace))
def run(type): if type not in [COHORT_ANALYSIS, SINGLE_TUMOR_ANALYSIS]: abort(400) if request.method == "GET": form = dict( ofm_genes_threshold=ONCODRIVEFM_GENES_THRESHOLD, ofm_pathways_threshold=ONCODRIVEFM_PATHWAYS_THRESHOLD, oclust_genes_threshold=ONCODRIVECLUST_MUTATIONS_THRESHOLD) return render_template("analysis.html", type=type, form=form) if current_app.wok.cases_count(current_user) >= current_app.config.get("LIMIT_NUM_CASES", 100): flash("""There is a limit on the number of simultaneous analysis that can be managed. You must remove finished analysis before running new ones.""", "error") return redirect(url_for("cases.index")) mutations_file = request.files['mutations_file'] file_name = os.path.basename(mutations_file.filename) project_id = request.form['project_name'] if len(project_id) == 0: project_id = os.path.splitext(file_name)[0] project_id = unique_project_id(normalize_id(project_id)) ''' if not current_user.validated: flash("""You can not run an analysis with your data until you are completely registered. Please check your email and follow the instructions to validate this account.""", "error") flash("Meanwhile you can play with the included examples.") return redirect(url_for("examples")) ''' cb = ConfigBuilder() cb.add_value("user_id", current_user.nick) cb.add_value("workspace", DEFAULT_WORKSPACE) cb.add_value("project.id", project_id) #case_name = "-".join([current_user.nick, project_id]) #cb.add_value("wok.instance.name", case_name) results_path, project_path, project_temp_path = get_paths(project_id) if not current_user.is_anonymous(): cb.add_value("website.user_id", current_user.nick) if type == SINGLE_TUMOR_ANALYSIS: #request.form.get("variants_only") == "1": cb.add_value("variants_only", True) cb.add_value("skip_oncodrivefm", True) cb.add_value("skip_oncodriveclust", True) try: threshold = request.form["ofm_genes_threshold"] if re.match(r"^[1-9]\d*%?$", threshold): cb.add_value(ONCODRIVEFM_GENES_THRESHOLD_KEY, threshold) except: if type == COHORT_ANALYSIS: current_app.logger.warn("[{}] Wrong form input: {}={}".format( current_user.nick, "ofm_genes_threshold", request.form.get("ofm_genes_threshold"))) try: threshold = request.form["ofm_pathways_threshold"] if re.match(r"^[1-9]\d*%?$", threshold): cb.add_value(ONCODRIVEFM_PATHWAYS_THRESHOLD_KEY, threshold) except: if type == COHORT_ANALYSIS: current_app.logger.warn("[{}] Wrong form input: {}={}".format( current_user.nick, "ofm_pathways_threshold", reuqest.form.get("ofm_pathways_threshold"))) try: threshold = int(request.form["oclust_genes_threshold"]) if threshold >= 1: cb.add_value(ONCODRIVECLUST_GENES_THRESHOLD_KEY, threshold) except: if type == COHORT_ANALYSIS: current_app.logger.warn("[{}] Wrong form input: {}={}".format( current_user.nick, "oclust_genes_threshold", request.form.get("oclust_genes_threshold"))) genes_filter_enabled = request.form.get('genes_filter_enabled') == "1" cb.add_value(ONCODRIVEFM_FILTER_ENABLED_KEY, genes_filter_enabled) cb.add_value(ONCODRIVECLUST_FILTER_ENABLED_KEY, genes_filter_enabled) if genes_filter_enabled: try: genes_filter_file = request.files['genes_filter_file'] genes_filter_file_path = os.path.join(project_temp_path, "genes-filter.txt") genes_filter_file.save(genes_filter_file_path) if os.path.getsize(genes_filter_file_path) != 0: cb.add_value(ONCODRIVEFM_GENES_FILTER_KEY, genes_filter_file_path) cb.add_value(ONCODRIVECLUST_GENES_FILTER_KEY, genes_filter_file_path) except: current_app.logger.exception("Error retrieving genes filter from form") assembly = request.form.get("assembly", DEFAULT_ASSEMBLY).lower() project = dict( id=project_id, assembly=assembly, files=[file_name]) projects = [init_project_files(project, check_paths=False)] cb.add_value("projects", projects) properties = dict( analysis_type=type, path=os.path.relpath(project_path, results_path)) current_app.logger.info("[{}] Starting analysis {} ...".format(current_user.nick, project_id)) case = current_app.wok.create_case(current_user, project_id, cb, PROJECT_NAME, MUTATIONS_FLOW_NAME, properties=properties, start=False) engine_case = current_app.wok.engine.case(case.engine_name) #TODO use a background thread upload_files(current_app.logger, case.engine_name, engine_case.storages, projects, streams=[mutations_file.stream]) current_app.logger.info("[{}] Analysis {} started on case {}...".format( current_user.nick, project_id, case.engine_name)) engine_case.start() return redirect(url_for("cases.index", highlight=case.id))
def combination_recurrences(projects_set): log = task.logger config = GlobalConfig(task.conf) paths = PathsConfig(config) classifier, projects = projects_set classifier_id = classifier["id"] group_values = classifier["group_values"] short_values = classifier["group_short_values"] long_values = classifier["group_long_values"] group_name = classifier["group_name"] group_short_name = classifier["group_short_name"] group_long_name = classifier["group_long_name"] if len(group_values) == 0: group_file_prefix = classifier_id else: group_file_prefix = "{0}-{1}".format(classifier_id, group_short_name) group_file_prefix = normalize_id(group_file_prefix) log.info( "--- [{0} ({1}) ({2}) ({3})] {4}".format( classifier["name"], group_long_name, group_short_name, group_name, "-" * 30 ) ) log.info("Creating database ...") db_path = make_temp_file(task, suffix="-{0}.db".format(group_file_prefix)) log.debug(" > {0}".format(db_path)) conn = sqlite3.connect(db_path) conn.row_factory = sqlite3.Row create_db(conn) log.info("Combining recurrences ...") c = conn.cursor() sample_total = 0 project_ids = [] for project in projects: project_ids += [project["id"]] log.info(" Project {0}:".format(project["id"])) projdb = ProjectDb(project["db"]) project_sample_total = projdb.get_total_affected_samples() sample_total += project_sample_total log.info(" Total samples = {0}".format(project_sample_total)) log.info(" Variant genes ...") count = 0 for afg in projdb.affected_genes(join_variant=True, join_xrefs=True, join_rec=True): var = afg.var rec = afg.rec if rec.sample_freq is None: log.warn("Discarding variant gene without sample frequency: {0}".format(repr(afg))) continue start, end, ref, alt = var_to_tab(var) try: c.execute( "INSERT INTO variants (chr, strand, start, ref, alt, xrefs) VALUES (?,?,?,?,?,?)", (var.chr, var.strand, start, ref, alt, ",".join(var.xrefs)), ) var_id = c.lastrowid except sqlite3.IntegrityError: c.execute( "SELECT var_id FROM variants WHERE chr=? AND strand=? AND start=? AND ref=? AND alt=?", (var.chr, var.strand, start, ref, alt), ) r = c.fetchone() var_id = r[0] try: c.execute( "INSERT INTO variant_genes (var_id, gene_id, impact, coding_region, prot_changes, sample_freq) VALUES (?,?,?,?,?,?)", (var_id, afg.gene_id, afg.impact, afg.coding_region, afg.prot_changes, rec.sample_freq), ) except sqlite3.IntegrityError: c.execute( """ UPDATE variant_genes SET sample_freq=sample_freq + ? WHERE var_id=? AND gene_id=?""", (rec.sample_freq, var_id, afg.gene_id), ) count += 1 log.info(" {0} variant genes".format(count)) log.info(" Genes ...") count = 0 for gene in projdb.genes(join_xrefs=True, join_rec=True): rec = gene.rec if rec.sample_freq is None: continue c.execute("SELECT COUNT(*) FROM genes WHERE gene_id=?", (gene.id,)) r = c.fetchone() if r[0] == 0: c.execute("INSERT INTO genes (gene_id, sample_freq) VALUES (?,?)", (gene.id, rec.sample_freq)) else: c.execute("UPDATE genes SET sample_freq=sample_freq + ? WHERE gene_id=?", (rec.sample_freq, gene.id)) count += 1 log.info(" {0} genes".format(count)) log.info(" Pathways ...") count = 0 for pathway in projdb.pathways(join_rec=True): rec = pathway.rec if rec.sample_freq is None: continue c.execute("SELECT COUNT(*) FROM pathways WHERE pathway_id=?", (pathway.id,)) r = c.fetchone() if r[0] == 0: c.execute("INSERT INTO pathways (pathway_id, sample_freq) VALUES (?,?)", (pathway.id, rec.sample_freq)) else: c.execute( "UPDATE pathways SET sample_freq=sample_freq + ? WHERE pathway_id=?", (rec.sample_freq, pathway.id) ) count += 1 log.info(" {0} pathways".format(count)) projdb.close() log.info("Calculating proportions with {0} samples in total among projects ...".format(sample_total)) if sample_total > 0: c.execute("UPDATE variant_genes SET sample_prop=CAST(sample_freq AS REAL)/{0}.0".format(sample_total)) c.execute("UPDATE genes SET sample_prop=CAST(sample_freq AS REAL)/{0}.0".format(sample_total)) c.execute("UPDATE pathways SET sample_prop=CAST(sample_freq AS REAL)/{0}.0".format(sample_total)) c.close() conn.commit() log.info("Saving results ...") c = conn.cursor() base_path = paths.combination_path("recurrences") log.info(" Variant genes ...") with tsv.open(os.path.join(base_path, "variant_gene-{0}.tsv.gz".format(group_file_prefix)), "w") as f: tsv.write_param(f, "classifier", classifier["id"]) tsv.write_param(f, "group_id", group_name) tsv.write_param(f, "group_short_name", group_short_name) tsv.write_param(f, "group_long_name", group_long_name) tsv.write_param(f, "projects", ",".join(project_ids)) tsv.write_param(f, "SAMPLE_TOTAL", sample_total) tsv.write_line( f, "CHR", "STRAND", "START", "ALLELE", "GENE_ID", "IMPACT", "IMPACT_CLASS", "SAMPLE_FREQ", "SAMPLE_PROP", "PROT_CHANGES", "XREFS", ) for r in c.execute( "SELECT * FROM variant_genes JOIN variants USING (var_id) ORDER BY chr*1, chr, strand, start, gene_id" ): strand, ref, alt = r["strand"], r["ref"], r["alt"] allele = "{0}/{1}".format(ref, alt) tsv.write_line( f, r["chr"], strand, r["start"], allele, r["gene_id"], r["impact"], TransFIC.class_name(r["impact"]), r["sample_freq"], r["sample_prop"], r["prot_changes"], r["xrefs"], null_value="-", ) log.info(" Genes ...") with tsv.open(os.path.join(base_path, "gene-{0}.tsv.gz".format(group_file_prefix)), "w") as f: tsv.write_param(f, "classifier", classifier["id"]) tsv.write_param(f, "group_id", group_name) tsv.write_param(f, "group_short_name", group_short_name) tsv.write_param(f, "group_long_name", group_long_name) tsv.write_param(f, "projects", ",".join(project_ids)) tsv.write_param(f, "SAMPLE_TOTAL", sample_total) tsv.write_line(f, "GENE_ID", "SAMPLE_FREQ", "SAMPLE_PROP") for r in c.execute("SELECT * FROM genes ORDER BY gene_id"): tsv.write_line(f, r["gene_id"], r["sample_freq"], r["sample_prop"], null_value="-") log.info(" Pathways ...") with tsv.open(os.path.join(base_path, "pathway-{0}.tsv.gz".format(group_file_prefix)), "w") as f: tsv.write_param(f, "classifier", classifier["id"]) tsv.write_param(f, "group_id", group_name) tsv.write_param(f, "group_short_name", group_short_name) tsv.write_param(f, "group_long_name", group_long_name) tsv.write_param(f, "projects", ",".join(project_ids)) tsv.write_param(f, "SAMPLE_TOTAL", sample_total) tsv.write_line(f, "PATHWAY_ID", "SAMPLE_FREQ", "SAMPLE_PROP") for r in c.execute("SELECT * FROM pathways ORDER BY pathway_id"): tsv.write_line(f, r["pathway_id"], r["sample_freq"], r["sample_prop"], null_value="-") conn.close() remove_temp(task, db_path)
def run(type): if type not in [COHORT_ANALYSIS, SINGLE_TUMOR_ANALYSIS]: abort(400) if request.method == "GET": form = dict( ofm_genes_threshold=ONCODRIVEFM_GENES_THRESHOLD, ofm_pathways_threshold=ONCODRIVEFM_PATHWAYS_THRESHOLD, oclust_genes_threshold=ONCODRIVECLUST_MUTATIONS_THRESHOLD) return render_template("analysis.html", type=type, form=form) mutations_file = request.files['mutations_file'] file_name = os.path.basename(mutations_file.filename) project_id = request.form['project_name'] if len(project_id) == 0: project_id = os.path.splitext(file_name)[0] project_id = normalize_id(project_id) i = 0 base_id = project_id while current_app.wok.exists_case(current_user, project_id): i += 1 project_id = "{}-{}".format(base_id, i) ''' if current_app.wok.exists_case(current_user, project_id): flash("An analysis with this name already exists. Please give it a different name or remove the previous one before starting again.", "error") return render_template("analysis.html", type=type, form=request.form) ''' ''' if g.demo and current_user.max_analysis != -1 and proj_manager.get_projects_count(g.conn, g.user_id) >= current_user.max_analysis: flash("""The online version is for demo only and there is a limit for the number of simultaneous analysis a user can manage. You must remove finished analysis before running new ones. Please download the pipeline and install in your system to avoid these limitations.""", "error") return redirect(url_for("download")) if not current_user.validated: flash("""You can not run an analysis with your data until you are completely registered. Please check your email and follow the instructions to validate this account.""", "error") flash("Meanwhile you can play with the included examples.") return redirect(url_for("examples")) ''' cb = ConfigBuilder(current_app.wok.conf_builder) cb.add_value("workspace", "default") cb.add_value("project.id", project_id) #case_name = "-".join([current_user.nick, project_id]) #cb.add_value("wok.instance.name", case_name) conf = cb.get_conf() results_path = os.path.join(conf["results_path"], current_user.nick) cb.add_value("results_path", results_path) temp_path = os.path.join(conf["temp_path"], current_user.nick) cb.add_value("temp_path", temp_path) conf = cb.get_conf() project_path = get_project_path(conf, project_id) if not os.path.exists(project_path): os.makedirs(project_path) project_temp_path = get_temp_path(conf, project_id) if not os.path.exists(project_temp_path): os.makedirs(project_temp_path) if not current_user.is_anonymous(): cb.add_value("website.user_id", current_user.nick) # FIXME ? type == SINGLE_TUMOR_ANALYSIS if request.form.get("variants_only") == "1": cb.add_value("variants_only", True) cb.add_value("skip_oncodrivefm", True) cb.add_value("skip_oncodriveclust", True) try: threshold = request.form["ofm_genes_threshold"] if re.match(r"^[1-9]\d*%?$", threshold): cb.add_value(ONCODRIVEFM_GENES_THRESHOLD_KEY, threshold) except: if type == COHORT_ANALYSIS: current_app.logger.warn("Undefined form input: {}".format("ofm_genes_threshold")) try: threshold = request.form["ofm_pathways_threshold"] if re.match(r"^[1-9]\d*%?$", threshold): cb.add_value(ONCODRIVEFM_PATHWAYS_THRESHOLD_KEY, threshold) except: if type == COHORT_ANALYSIS: current_app.logger.warn("Undefined form input: {}".format("ofm_pathways_threshold")) try: threshold = int(request.form["oclust_genes_threshold"]) if threshold >= 1: cb.add_value(ONCODRIVECLUST_GENES_THRESHOLD_KEY, threshold) except: if type == COHORT_ANALYSIS: current_app.logger.warn("Undefined form input: {}".format("oclust_genes_threshold")) genes_filter_enabled = request.form.get('genes_filter_enabled') == "1" cb.add_value(ONCODRIVEFM_FILTER_ENABLED_KEY, genes_filter_enabled) cb.add_value(ONCODRIVECLUST_FILTER_ENABLED_KEY, genes_filter_enabled) if genes_filter_enabled: try: genes_filter_file = request.files['genes_filter_file'] genes_filter_file_path = os.path.join(project_temp_path, "genes-filter.txt") genes_filter_file.save(genes_filter_file_path) if os.path.getsize(genes_filter_file_path) != 0: cb.add_value(ONCODRIVEFM_GENES_FILTER_KEY, genes_filter_file_path) cb.add_value(ONCODRIVECLUST_GENES_FILTER_KEY, genes_filter_file_path) except: current_app.logger.exception("Error retrieving genes filter from form") mutations_path = os.path.join(project_temp_path, file_name) try: mutations_file.save(mutations_path) except: current_app.logger.exception("Error while saving mutations file {} into {}".format(mutations_file.filename, mutations_path)) flash("""There were some problem with the input file for mutations. Please check that a file has been loaded before submitting a new analysis. This error has been already submitted to the application administrators who will take care of it as soon as possible.""") return render_template("analysis.html", type=type, form=request.form) assembly = request.form.get("assembly", "hg19").lower() cb, flow_uri = project_analysis( mutations_path, assembly=assembly, conf_builder=cb) properties = dict( analysis_type=type, path=project_path, temp_path=project_temp_path, data_file=mutations_path) current_app.logger.info("[{}] Starting analysis {} ...".format( current_user.nick, project_id)) case = current_app.wok.create_case(current_user, project_id, cb, flow_uri, properties=properties, start=True) current_app.logger.info("[{}] Analysis {} started on case {}...".format( current_user.nick, project_id, case.engine_name)) return redirect(url_for("cases.index", highlight=case.id))
def combination_oncodrivefm(projects_set): log = task.logger config = GlobalConfig(task.conf) paths = PathsConfig(config) classifier, projects = projects_set classifier_id = classifier["id"] group_values = classifier["group_values"] short_values = classifier["group_short_values"] long_values = classifier["group_long_values"] group_name = classifier["group_name"] group_short_name = classifier["group_short_name"] group_long_name = classifier["group_long_name"] if len(group_values) == 0: group_file_prefix = classifier_id else: group_file_prefix = "{0}-{1}".format(classifier_id, group_short_name) group_file_prefix = normalize_id(group_file_prefix) log.info("--- [{0} ({1}) ({2}) ({3})] {4}".format( classifier["name"], group_long_name, group_short_name, group_name, "-" * 30)) log.info("Exporting project data ...") base_path = make_temp_dir(task, suffix=".{0}".format(group_file_prefix)) log.debug("> {0}".format(base_path)) project_ids = [] gene_files = [] pathway_files = [] for project in projects: project_id = project["id"] project_ids += [project_id] log.info(" Project {0}:".format(project["id"])) projdb = ProjectDb(project["db"]) log.info(" Genes ...") count = 0 file_path = os.path.join(base_path, "{0}-genes.tsv".format(project_id)) gene_files += [file_path] with open(file_path, "w") as f: tsv.write_param(f, "classifier", classifier_id) tsv.write_param(f, "group_id", group_name) tsv.write_param(f, "slice", project_id) tsv.write_line(f, "GENE_ID", "PVALUE") for gene in projdb.genes(): if gene.fm_pvalue is not None: tsv.write_line(f, gene.id, gene.fm_pvalue, null_value="-") count += 1 log.info(" {0} genes".format(count)) log.info(" Pathways ...") count = 0 file_path = os.path.join(base_path, "{0}-pathways.tsv".format(project_id)) pathway_files += [file_path] with open(file_path, "w") as f: tsv.write_param(f, "classifier", classifier_id) tsv.write_param(f, "group_id", group_name) tsv.write_param(f, "slice", project_id) tsv.write_line(f, "PATHWAY_ID", "ZSCORE") for pathway in projdb.pathways(): if pathway.fm_zscore is not None: tsv.write_line(f, pathway.id, pathway.fm_zscore, null_value="-") count += 1 log.info(" {0} pathways".format(count)) projdb.close() log.info("Combining ...") combination_path = paths.combination_path("oncodrivefm") log.info(" Genes ...") cmd = " ".join([ "oncodrivefm-combine", "-m median-empirical", "-o '{0}'".format(combination_path), "-n 'gene-{0}'".format(group_file_prefix), "-D 'classifier={0}'".format(classifier_id), "-D 'group_id={0}'".format(group_name), "-D 'group_short_name={0}'".format(group_short_name), "-D 'group_long_name={0}'".format(group_long_name), "--output-format tsv.gz" ] + ["'{0}'".format(name) for name in gene_files]) log.debug(cmd) ret_code = subprocess.call(cmd, shell=True) if ret_code != 0: #log.error("OncodriveFM error while combining gene pvalues:\n{0}".format(cmd)) #return -1 raise Exception("OncodriveFM error while combining gene pvalues:\n{0}".format(cmd)) log.info(" Pathways ...") cmd = " ".join([ "oncodrivefm-combine", "-m median-zscore", "-o '{0}'".format(combination_path), "-n 'pathway-{0}'".format(group_file_prefix), "-D 'classifier={0}'".format(classifier_id), "-D 'group_id={0}'".format(group_name), "-D 'group_short_name={0}'".format(group_short_name), "-D 'group_long_name={0}'".format(group_long_name), "--output-format tsv.gz" ] + ["'{0}'".format(name) for name in pathway_files]) log.debug(cmd) ret_code = subprocess.call(cmd, shell=True) if ret_code != 0: #log.error("OncodriveFM error while combining pathway zscores:\n{0}".format(cmd)) #return -1 raise Exception("OncodriveFM error while combining pathway zscores:\n{0}".format(cmd)) remove_temp(task, base_path)
def __init__(self, args_usage="", epilog="", flow_file=None, conf_files=None, conf_keys=None, logger_name=None): Command.__init__(self, args_usage, epilog, logger_name) signal.signal(signal.SIGINT, keyboardinterrupt_handler) signal.signal(signal.SIGTERM, keyboardinterrupt_handler) if conf_files is None: conf_files = [] if conf_keys is None: conf_keys = [] self.flow_file = flow_file self.conf_files = self.DEFAULT_CONF_FILES + conf_files self.conf_keys = self.DEFAULT_REQUIRED_CONF + conf_keys # Workspace self.workspace = self.args.workspace # Instance name self.instance_name = self.args.instance_name if self.instance_name is not None: self.instance_name = normalize_id(self.instance_name) ''' # Override configuration path if required if self.args.conf_path is not None: self.conf_path = os.path.abspath(self.args.conf_path) # Get required configuration files and override system.conf if required if self.args.system_conf is not None: req_conf_files = [] for cf in self.conf_files: if cf == "system.conf": req_conf_files += [self.args.system_conf] else: req_conf_files += [cf] else: req_conf_files = self.conf_files ''' req_conf_files = self.conf_files # Determine required and user configuration files and data self.required_conf_files = [os.path.join(self.conf_path, cf) for cf in req_conf_files] if self.args.conf_files is not None: self.user_conf_files = [] for cf in self.args.conf_files: if not os.path.isabs(cf): cf = os.path.join(os.getcwd(), cf) self.user_conf_files += [cf] else: self.user_conf_files = [] if self.args.conf_data is not None: self.user_conf_data = self.args.conf_data else: self.user_conf_data = [] # max cores if self.args.max_cores is None: self.max_cores = 0 else: self.max_cores = self.args.max_cores # Prepare extra configuration data self.extra_conf_data = []