Ejemplo n.º 1
0
def test_db_any_singleton_query(sqlite_db):
    cohort_criteria = {'any_of': [('6070', "1")], 'all_of': [], 'none_of': []}
    obs = db.query_sqlite_db(con=sqlite_db, cohort_criteria=cohort_criteria)
    exp_ids = set([
        1041796, 1037058, 1024938, 1016017, 1038882, 1030520, 1003670, 1027017
    ])
    assert exp_ids == set(obs['eid'].tolist())
    obs = db.query_sqlite_db(
        con=sqlite_db, cohort_criteria=cohort_criteria
    )  #Check that any of works over fields with multiple columns
Ejemplo n.º 2
0
def test_db_any_pair_same_field_query(sqlite_db):
    cohort_criteria = {
        'any_of': [('41270', "Block H40-H42"), ('6119', "3"), ('6148', '4')],
        'all_of': [(('6070', "1"))],
        'none_of': [('read_2', "XE0of")]
    }
    obs = db.query_sqlite_db(con=sqlite_db, cohort_criteria=cohort_criteria)
    exp_ids = set([1041796, 1037058, 1030520, 1003670])
    assert exp_ids == set(obs['eid'].tolist())
Ejemplo n.º 3
0
def test_db_all_gp_clinical_read2and3(sqlite_db):
    cohort_criteria = {
        'all_of': [],
        'any_of': [('read_2', "XE0of"), ('read_3', 'XE0Gu')],
        'none_of': []
    }
    obs = db.query_sqlite_db(con=sqlite_db, cohort_criteria=cohort_criteria)
    exp_ids = set([1016017, 1037918])
    assert exp_ids == set(obs['eid'].tolist())
Ejemplo n.º 4
0
def test_db_all_gp_clinical_read_2_multiple(sqlite_db):
    cohort_criteria = {
        'all_of': [('read_2', "XE0of")],
        'any_of': [],
        'none_of': []
    }
    obs = db.query_sqlite_db(con=sqlite_db, cohort_criteria=cohort_criteria)
    exp_ids = set([1016017])
    assert exp_ids == set(obs['eid'].tolist())
Ejemplo n.º 5
0
def test_db_all_gp_clinical_read2(sqlite_db):
    cohort_criteria = {
        'all_of': [('read_2', "4662.")],
        'any_of': [],
        'none_of': []
    }
    obs = db.query_sqlite_db(con=sqlite_db, cohort_criteria=cohort_criteria)
    exp_ids = set([1041796])
    assert exp_ids == set(obs['eid'].tolist())
Ejemplo n.º 6
0
def test_db_missing_fields2(sqlite_db):
    cohort_criteria = {
        'all_of': [('DoesNotExist', "X")],
        'any_of': [],
        'none_of': []
    }
    with pytest.raises(ValueError, match=r"DoesNotExist .*"):
        obs = db.query_sqlite_db(con=sqlite_db,
                                 cohort_criteria=cohort_criteria)
Ejemplo n.º 7
0
def test_db_fields_with_spaces(sqlite_db):
    cohort_criteria = {
        'all_of': [('41270', "Block H40-H42")],
        'any_of': [],
        'none_of': []
    }
    obs = db.query_sqlite_db(con=sqlite_db, cohort_criteria=cohort_criteria)
    exp_ids = set([1033149, 1041796])
    assert exp_ids == set(obs['eid'].tolist())
Ejemplo n.º 8
0
def test_db_none_returns_empty(sqlite_db):
    cohort_criteria = {
        'all_of': [('6148', "4")],
        'any_of': [],
        'none_of': [("6070", "2"), ("6070", "1")]
    }
    obs = db.query_sqlite_db(con=sqlite_db, cohort_criteria=cohort_criteria)
    exp_ids = set([1033149, 1033388])
    assert exp_ids == set(obs['eid'].tolist())
Ejemplo n.º 9
0
def test_db_all__none_multiple_columns(sqlite_db):
    cohort_criteria = {
        'all_of': [('6148', "4")],
        'any_of': [],
        'none_of': [("6070", "2")]
    }
    obs = db.query_sqlite_db(con=sqlite_db, cohort_criteria=cohort_criteria)
    exp_ids = set([1033149, 1016017, 1033388, 1030520, 1003670])
    assert exp_ids == set(obs['eid'].tolist())
Ejemplo n.º 10
0
def test_db_any_pair_diff_field_query(sqlite_db):
    cohort_criteria = {
        'any_of': [('6070', "1"), ('6119', "1")],
        'all_of': [],
        'none_of': []
    }
    obs = db.query_sqlite_db(con=sqlite_db, cohort_criteria=cohort_criteria)
    exp_ids = set([
        1041796, 1037058, 1024938, 1016017, 1038882, 1030520, 1003670, 1027017
    ])
    assert exp_ids == set(obs['eid'].tolist())
Ejemplo n.º 11
0
def test_db_missing_fields(sqlite_db):
    cohort_criteria = {
        'all_of': [('6070', "nan")],
        'any_of': [],
        'none_of': []
    }
    obs = db.query_sqlite_db(con=sqlite_db, cohort_criteria=cohort_criteria)
    exp_ids = set([
        1003670, 1016017, 1024938, 1027017, 1030520, 1037058, 1037918, 1038882,
        1041796
    ])
    assert exp_ids == set(obs['eid'].tolist())
Ejemplo n.º 12
0
def test_db_all_query_basic(sqlite_db):
    cohort_criteria = {'all_of': [('6070', "1")], 'any_of': [], 'none_of': []}
    obs = db.query_sqlite_db(con=sqlite_db, cohort_criteria=cohort_criteria)
    exp_ids = set([
        1041796, 1037058, 1024938, 1016017, 1038882, 1030520, 1003670, 1027017
    ])

    print("df['eid']:{}".format(obs['eid'].tolist()))
    print("set(obs): {}".format(set(obs['eid'].tolist())))
    print(exp_ids)

    assert exp_ids == set(obs['eid'].tolist())
Ejemplo n.º 13
0
def submit_cohort_query(n: int, defined_terms: dict, all_terms: list,
                        any_terms: list, none_terms: list, config: dict,
                        kw_search_terms: list):
    """Run cohort search.

    Keyword arguments:
    ------------------
    n: int
        indicates number of clicks of cohort search button
    defined_terms: dict
        phenotypes
    all_terms: list
        phenotypes for all participants
    any_terms: list
        phenotypes for any participants
    none_terms: list
        phenotypes for none of the participants
    config: dict
        path configuration
    kw_search_terms: list
        search terms

    Returns:
    --------
    output_text: html object
        specifies length of IDs returned from search
    ids: list
        contains IDs returned from search

    """
    pp = pprint.PrettyPrinter(indent=4)

    ctx = dash.callback_context
    if not ctx.triggered:
        raise PreventUpdate
    if n is None:
        raise PreventUpdate

    timestamp = datetime.now().timestamp()

    logic_dictionary = {
        'all_of': all_terms,
        'any_of': any_terms,
        'none_of': none_terms
    }
    cohort_dictionaries = {
        "encoded": {
            "all_of": [],
            "any_of": [],
            "none_of": []
        },
        "decoded": {
            "all_of": [],
            "any_of": [],
            "none_of": []
        }
    }

    for logic, selected_terms in logic_dictionary.items():
        if selected_terms:
            terms_encoded, terms_decoded = _create_conditional_logic_list(
                selected_terms, defined_terms)
            cohort_dictionaries["encoded"][logic] = terms_encoded
            cohort_dictionaries["decoded"][logic] = terms_decoded

    outpath = config['cohort_path']
    if kw_search_terms:
        term_outfile = os.path.join(outpath, 'search_terms.txt')
        utils.write_txt_file(term_outfile, kw_search_terms)
        if os.path.exists(term_outfile):
            print(f"successfully saved search terms to file {term_outfile}")
        else:
            print(f"could not save search terms to file {term_outfile}")

    for k, v in cohort_dictionaries.items():
        name = "cohort_dictionary_" + k + ".txt"
        cohort_out = os.path.join(outpath, name)
        utils.write_dictionary(v, cohort_out)
        if os.path.exists(cohort_out):
            print(f"successfully saved {k} cohort dictionary to {cohort_out}")
        else:
            print(f"could not save {k} cohort dictionary to {cohort_out}")

    print('\ncreate_queries query_sqlite_db {}'.format(print_time()))

    db_filename = config['db_path']
    showcase_filename = config['showcase_path']
    coding_filename = config['codings_path']

    res = db.query_sqlite_db(db_filename=db_filename,
                             cohort_criteria=cohort_dictionaries['encoded'])
    ret = html.P(f"No matching ids found. Please change your criteria.")
    if res.shape[0]:
        t1 = tableone.TableOne(res)
        ret = dbc.Table.from_dataframe(pd.read_csv(StringIO(t1.to_csv())),
                                       striped=True,
                                       bordered=True,
                                       hover=True)

    ids = res['eid'].tolist()
    # print('\nfinished query_databases {}'.format(print_time()))
    # print('\n generating report {}'.format(print_time()))
    print(f"length of ids {len(ids)}")

    stats_dict, translation_df = stats.compute_stats_db(
        db_filename, ids, showcase_filename, coding_filename)

    # stats_fields = {"all_of": [], "any_of": [["20002", "1263"]], "none_of": []}
    # stats_dict, translation_df = stats.compute_stats(main_filename=config['main_path'],
    #                                            eids=ids,
    stats_report_dict = stats.create_report(translation_df)

    footer = dbc.ModalFooter(
        dbc.Button("Close",
                   id="close_run_query_btn_new",
                   className="ml-auto",
                   style={"margin": "5px"}))
    output_text = html.P(ret)
    output_runquery = dbc.Row(
        dbc.Col([
            output_text,
            dbc.Button("Close",
                       color='primary',
                       id="run_query_close",
                       style={"margin": "5px"})
        ]))

    return ids, timestamp, stats_report_dict
Ejemplo n.º 14
0
def test_db_empty_query(sqlite_db):
    cohort_criteria = {'any_of': [], 'all_of': [], 'none_of': []}
    obs = db.query_sqlite_db(con=sqlite_db, cohort_criteria=cohort_criteria)
    exp_ids = set([])
    assert exp_ids == set(obs['eid'].tolist())
Ejemplo n.º 15
0
def test_db_none_basic(sqlite_db):
    cohort_criteria = {'none_of': [('6070', "1")], 'all_of': [], 'any_of': []}
    obs = db.query_sqlite_db(con=sqlite_db, cohort_criteria=cohort_criteria)
    exp_ids = set([1037918, 1033149, 1033388, 1031625, 1031595, 1008947])
    assert exp_ids == set(obs['eid'].tolist())