コード例 #1
0
def get_pca_job(conf):
    logger.debug(f"""Generate job for PCA:\n"""
                 f"""  uid -        {conf["uid"]}\n"""
                 f"""  expression - {conf["expression"]}\n""")
    connect_db = HookConnect()
    setting_data = connect_db.get_settings_data()
    job = {
        "expression_file": [],
        "legend_name": [],
        "output_prefix": conf["uid"] + "_",
        "output_folder": os.path.join(setting_data["anl_data"], conf["uid"]),
        "uid": conf["uid"]
    }
    for idx, uid in enumerate(conf["expression"].split()):
        for genelist_data in connect_db.fetchall(
                f"SELECT tableName, name FROM genelist WHERE leaf=1 AND (parent_id like '{uid}' OR id like '{uid}')"
        ):
            exp_data = get_exp_data(genelist_data["tableName"])
            job["expression_file"].append(
                fill_template(
                    """{{"class": "File",
                                                             "location": "{outputs[rpkm_isoforms][location]}",
                                                             "format": "http://edamontology.org/format_3752"}}""",
                    exp_data))
            job["legend_name"].append(genelist_data["name"])
    return job
コード例 #2
0
def get_deseq_job(conf):
    logger.debug(f"Collecting data for genelists:\n"
                 f"  name -           {conf['name']}\n"
                 f"  project_uid -    {conf['project_uid']}\n"
                 f"  uid -            {conf['uid']}\n"
                 f"  untreated -      {conf['condition'][0]}\n"
                 f"  treated -        {conf['condition'][1]}\n"
                 f"  groupby -        {conf['groupby']}\n")

    connect_db = HookConnect()
    setting_data = connect_db.get_settings_data()
    job = {
        "untreated_files": [],
        "treated_files": [],
        "output_filename": conf["uid"] + "_deseq.tsv",
        "threads": int(setting_data["threads"]),
        "output_folder": os.path.join(setting_data["anl_data"], conf["uid"]),
        "uid": conf["uid"]
    }

    for idx, uid in enumerate(conf['condition']):
        logger.debug(f"Get experiment IDs for {uid}")
        sql_query = f"SELECT tableName FROM genelist WHERE leaf=1 AND (parent_id like '{uid}' OR id like '{uid}')"
        file_templates = {
            1:
            '{{"class": "File", "location": "{outputs[rpkm_isoforms][location]}", "format": "http://edamontology.org/format_3752"}}',
            2:
            '{{"class": "File", "location": "{outputs[rpkm_genes][location]}", "format": "http://edamontology.org/format_3475"}}',
            3:
            '{{"class": "File", "location": "{outputs[rpkm_common_tss][location]}", "format": "http://edamontology.org/format_3475"}}'
        }
        current_file_template = file_templates[conf["groupby"]]
        for record in connect_db.fetchall(sql_query):
            exp_data = get_exp_data(record["tableName"])
            if idx == 0:
                job["untreated_files"].append(
                    fill_template(current_file_template, exp_data))
            else:
                job["treated_files"].append(
                    fill_template(current_file_template, exp_data))
    return job
コード例 #3
0
def get_genelist_file(uid):
    genelist_data = get_genelist_data(uid)
    genelist_file_template = '{{"class": "File", "location": "{outputs[genelist_file][location]}", "format": "http://edamontology.org/format_3475"}}'
    try:
        genelist_file = fill_template(genelist_file_template, genelist_data)
    except KeyError:
        logger.debug(f"Failed to find genelist file for: {uid}")
        connect_db = HookConnect()
        filename = os.path.join(connect_db.get_settings_data()["anl_data"],
                                uid, uid + "_genelist.tsv")

        data = connect_db.fetchall(
            f"""SELECT * FROM experiments.`{genelist_data["tableName"]}`""")
        data_str = ""
        for idx, record in enumerate(data):
            if idx == 0:
                data_str += "\t".join([str(item)
                                       for item in record.keys()]) + "\n"
            else:
                data_str += "\t".join([str(item)
                                       for item in record.values()]) + "\n"

        export_to_file(data_str, filename)
        logger.debug(f"Export genelist file to: {filename}")
        genelist_data["outputs"].update({
            "genelist_file": {
                "class": "File",
                "location": filename,
                "format": "http://edamontology.org/format_3475"
            }
        })
        connect_db.execute(
            f"""UPDATE genelist SET params='{dumps(genelist_data["outputs"])}' WHERE id='{uid}'"""
        )
        logger.debug(
            f"""Update params for {uid}\n{dumps(genelist_data["outputs"], indent=4)}"""
        )
        genelist_file = fill_template(genelist_file_template, genelist_data)
    return genelist_file
コード例 #4
0
def get_heatmap_job(conf):
    logger.debug(f"Collecting data for genelist:\n"
                 f"  name          - {conf['name']}\n"
                 f"  uid           - {conf['uid']}\n"
                 f"  data_uid      - {conf['data_uid']}\n"
                 f"  intervals_uid - {conf['intervals_uid']}\n")
    connect_db = HookConnect()
    setting_data = connect_db.get_settings_data()
    exp_data = get_exp_data(get_genelist_data(conf['data_uid'])["tableName"])
    job = {
        "bam_file":
        fill_template(
            '{{"class": "File", "location": "{outputs[bambai_pair][location]}", "format": "http://edamontology.org/format_2572"}}',
            exp_data),
        "genelist_file":
        get_genelist_file(conf['intervals_uid']),
        "fragment_size":
        exp_data["fragment_size"],
        "json_filename":
        "-".join([conf['data_uid'], conf['intervals_uid']]),
        "plot_name":
        conf['name'],
        "data_uid":
        conf['data_uid'],
        "data_name":
        get_genelist_data(conf['data_uid'])["name"],
        "intervals_uid":
        conf['intervals_uid'],
        "intervals_name":
        get_genelist_data(conf['intervals_uid'])["name"],
        "threads":
        int(setting_data["threads"]),
        "output_folder":
        os.path.join(setting_data["anl_data"], conf["uid"]),
        "uid":
        conf["uid"]
    }
    return job
コード例 #5
0
def gen_outputs(connect_db):
    setting_data = connect_db.get_settings_data()
    sql_query = """SELECT
                         l.uid                    as uid,
                         l.params                 as outputs,
                         e.etype                  as exp_type,
                         e.id                     as exp_id
                   FROM  labdata l
                   INNER JOIN (experimenttype e) ON (e.id=l.experimenttype_id)
                   WHERE (l.deleted=0)                 AND
                         (l.libstatus=12)              AND
                         COALESCE(l.egroup_id,'')<>''  AND
                         COALESCE(l.name4browser,'')<>''"""
    logger.debug(f"Run SQL query:\n{sql_query}")
    for db_record in connect_db.fetchall(sql_query):
        logger.info(f"LOAD: {db_record['uid']} - {db_record['exp_type']}")
        get_to_update_stage = False
        get_to_upload_stage = False
        db_record.update(setting_data)
        db_record.update({"prefix": SCRIPTS_DIR})
        db_record.update({
            "outputs": loads(db_record["outputs"])
            if db_record["outputs"] and db_record['outputs'] != "null" else {}
        })

        for item_str in TEMPLATES.get(db_record["exp_id"], []):
            try:
                logger.debug(
                    f"CHECK: if experiment's outputs require correction")
                item_parsed = fill_template(item_str, db_record)
                list(validate_locations(item_parsed["outputs"])
                     )  # TODO Use normal way to execute generator
                validate_outputs(db_record["outputs"], item_parsed["outputs"])
            except KeyError as ex:
                logger.info(
                    f"SKIP: couldn't find required experiment's output {ex}")
            except OSError as ex:
                get_to_update_stage = True
                logger.debug(
                    f"GENERATE: missing file or correpospondent data in DB: {ex}"
                )
                try:
                    commands = " ".join(item_parsed["commands"])
                    logger.debug(f"RUN: {commands}")
                    run_command(commands)
                    add_details_to_outputs(item_parsed["outputs"])
                    db_record["outputs"].update(item_parsed["outputs"])
                    get_to_upload_stage = True
                except subprocess.CalledProcessError as ex:
                    logger.error(
                        f"FAIL: got error while running the command {ex}")
                except OSError as ex:
                    logger.error(f"FAIL: couldn't locate generated files {ex}")

        if get_to_upload_stage:
            connect_db.execute(
                f"""UPDATE labdata SET params='{dumps(db_record["outputs"])}' WHERE uid='{db_record["uid"]}'"""
            )
            logger.debug(
                f"UPDATE: new experiment's outputs\n{dumps(db_record['outputs'], indent=4)}"
            )
            logger.info(f"SUCCESS: experiment's outputs have been corrected")
        elif get_to_update_stage:
            logger.info(f"FAIL: experiment's outputs have not been corrected")
        else:
            logger.info(
                f"SUCCESS: experiment's outputs are not required or cannot be corrected"
            )