def get_pca_job(conf): logger.debug(f"""Generate job for PCA:\n""" f""" uid - {conf["uid"]}\n""" f""" expression - {conf["expression"]}\n""") connect_db = HookConnect() setting_data = connect_db.get_settings_data() job = { "expression_file": [], "legend_name": [], "output_prefix": conf["uid"] + "_", "output_folder": os.path.join(setting_data["anl_data"], conf["uid"]), "uid": conf["uid"] } for idx, uid in enumerate(conf["expression"].split()): for genelist_data in connect_db.fetchall( f"SELECT tableName, name FROM genelist WHERE leaf=1 AND (parent_id like '{uid}' OR id like '{uid}')" ): exp_data = get_exp_data(genelist_data["tableName"]) job["expression_file"].append( fill_template( """{{"class": "File", "location": "{outputs[rpkm_isoforms][location]}", "format": "http://edamontology.org/format_3752"}}""", exp_data)) job["legend_name"].append(genelist_data["name"]) return job
def get_deseq_job(conf): logger.debug(f"Collecting data for genelists:\n" f" name - {conf['name']}\n" f" project_uid - {conf['project_uid']}\n" f" uid - {conf['uid']}\n" f" untreated - {conf['condition'][0]}\n" f" treated - {conf['condition'][1]}\n" f" groupby - {conf['groupby']}\n") connect_db = HookConnect() setting_data = connect_db.get_settings_data() job = { "untreated_files": [], "treated_files": [], "output_filename": conf["uid"] + "_deseq.tsv", "threads": int(setting_data["threads"]), "output_folder": os.path.join(setting_data["anl_data"], conf["uid"]), "uid": conf["uid"] } for idx, uid in enumerate(conf['condition']): logger.debug(f"Get experiment IDs for {uid}") sql_query = f"SELECT tableName FROM genelist WHERE leaf=1 AND (parent_id like '{uid}' OR id like '{uid}')" file_templates = { 1: '{{"class": "File", "location": "{outputs[rpkm_isoforms][location]}", "format": "http://edamontology.org/format_3752"}}', 2: '{{"class": "File", "location": "{outputs[rpkm_genes][location]}", "format": "http://edamontology.org/format_3475"}}', 3: '{{"class": "File", "location": "{outputs[rpkm_common_tss][location]}", "format": "http://edamontology.org/format_3475"}}' } current_file_template = file_templates[conf["groupby"]] for record in connect_db.fetchall(sql_query): exp_data = get_exp_data(record["tableName"]) if idx == 0: job["untreated_files"].append( fill_template(current_file_template, exp_data)) else: job["treated_files"].append( fill_template(current_file_template, exp_data)) return job
def get_genelist_file(uid): genelist_data = get_genelist_data(uid) genelist_file_template = '{{"class": "File", "location": "{outputs[genelist_file][location]}", "format": "http://edamontology.org/format_3475"}}' try: genelist_file = fill_template(genelist_file_template, genelist_data) except KeyError: logger.debug(f"Failed to find genelist file for: {uid}") connect_db = HookConnect() filename = os.path.join(connect_db.get_settings_data()["anl_data"], uid, uid + "_genelist.tsv") data = connect_db.fetchall( f"""SELECT * FROM experiments.`{genelist_data["tableName"]}`""") data_str = "" for idx, record in enumerate(data): if idx == 0: data_str += "\t".join([str(item) for item in record.keys()]) + "\n" else: data_str += "\t".join([str(item) for item in record.values()]) + "\n" export_to_file(data_str, filename) logger.debug(f"Export genelist file to: {filename}") genelist_data["outputs"].update({ "genelist_file": { "class": "File", "location": filename, "format": "http://edamontology.org/format_3475" } }) connect_db.execute( f"""UPDATE genelist SET params='{dumps(genelist_data["outputs"])}' WHERE id='{uid}'""" ) logger.debug( f"""Update params for {uid}\n{dumps(genelist_data["outputs"], indent=4)}""" ) genelist_file = fill_template(genelist_file_template, genelist_data) return genelist_file
def get_heatmap_job(conf): logger.debug(f"Collecting data for genelist:\n" f" name - {conf['name']}\n" f" uid - {conf['uid']}\n" f" data_uid - {conf['data_uid']}\n" f" intervals_uid - {conf['intervals_uid']}\n") connect_db = HookConnect() setting_data = connect_db.get_settings_data() exp_data = get_exp_data(get_genelist_data(conf['data_uid'])["tableName"]) job = { "bam_file": fill_template( '{{"class": "File", "location": "{outputs[bambai_pair][location]}", "format": "http://edamontology.org/format_2572"}}', exp_data), "genelist_file": get_genelist_file(conf['intervals_uid']), "fragment_size": exp_data["fragment_size"], "json_filename": "-".join([conf['data_uid'], conf['intervals_uid']]), "plot_name": conf['name'], "data_uid": conf['data_uid'], "data_name": get_genelist_data(conf['data_uid'])["name"], "intervals_uid": conf['intervals_uid'], "intervals_name": get_genelist_data(conf['intervals_uid'])["name"], "threads": int(setting_data["threads"]), "output_folder": os.path.join(setting_data["anl_data"], conf["uid"]), "uid": conf["uid"] } return job
def gen_outputs(connect_db): setting_data = connect_db.get_settings_data() sql_query = """SELECT l.uid as uid, l.params as outputs, e.etype as exp_type, e.id as exp_id FROM labdata l INNER JOIN (experimenttype e) ON (e.id=l.experimenttype_id) WHERE (l.deleted=0) AND (l.libstatus=12) AND COALESCE(l.egroup_id,'')<>'' AND COALESCE(l.name4browser,'')<>''""" logger.debug(f"Run SQL query:\n{sql_query}") for db_record in connect_db.fetchall(sql_query): logger.info(f"LOAD: {db_record['uid']} - {db_record['exp_type']}") get_to_update_stage = False get_to_upload_stage = False db_record.update(setting_data) db_record.update({"prefix": SCRIPTS_DIR}) db_record.update({ "outputs": loads(db_record["outputs"]) if db_record["outputs"] and db_record['outputs'] != "null" else {} }) for item_str in TEMPLATES.get(db_record["exp_id"], []): try: logger.debug( f"CHECK: if experiment's outputs require correction") item_parsed = fill_template(item_str, db_record) list(validate_locations(item_parsed["outputs"]) ) # TODO Use normal way to execute generator validate_outputs(db_record["outputs"], item_parsed["outputs"]) except KeyError as ex: logger.info( f"SKIP: couldn't find required experiment's output {ex}") except OSError as ex: get_to_update_stage = True logger.debug( f"GENERATE: missing file or correpospondent data in DB: {ex}" ) try: commands = " ".join(item_parsed["commands"]) logger.debug(f"RUN: {commands}") run_command(commands) add_details_to_outputs(item_parsed["outputs"]) db_record["outputs"].update(item_parsed["outputs"]) get_to_upload_stage = True except subprocess.CalledProcessError as ex: logger.error( f"FAIL: got error while running the command {ex}") except OSError as ex: logger.error(f"FAIL: couldn't locate generated files {ex}") if get_to_upload_stage: connect_db.execute( f"""UPDATE labdata SET params='{dumps(db_record["outputs"])}' WHERE uid='{db_record["uid"]}'""" ) logger.debug( f"UPDATE: new experiment's outputs\n{dumps(db_record['outputs'], indent=4)}" ) logger.info(f"SUCCESS: experiment's outputs have been corrected") elif get_to_update_stage: logger.info(f"FAIL: experiment's outputs have not been corrected") else: logger.info( f"SUCCESS: experiment's outputs are not required or cannot be corrected" )