Python get_table Examples, utils.get_table Python Examples

Example #1

0

Show file

def insert(table_name, schema):
    request_body = request.get_json()
    if isinstance(request_body, dict):
        request_body = [request_body]
    add_timestamp(request_body)

    get_table(schema, table_name).insert_many(request_body)
    return '%s successfully stored in %s.%s' % (len(request_body), schema, table_name)

Example #2

0

Show file

File: stadium.py Project: amaradatta93/Web-Scraping

def find_stadiums_within_seat_range(min_seats, max_seats, url):
    table = get_table(url)
    list_map = convert_to_dictionary(get_football_teams(table),
                                     get_stadium_capacity(table))
    for team in list_map:
        if (list_map[team] >= min_seats) and (list_map[team] <= max_seats):
            print("{0:>20}: {1:10}".format(team, str(list_map[team])))

Example #3

0

Show file

File: features.py Project: jgoerner/MarchMadness

def calculate_losses_per_team_per_season():
    """Get losses per season per team"""
    df_regular_season = get_table("t_original_regular_season_compact_results")
    df_losses_per_team_per_season = df_regular_season.groupby(
        ["season", "l_team_id"]).size().reset_index()
    df_losses_per_team_per_season.columns = ["season", "team_id", "losses"]
    write_table(df_losses_per_team_per_season, "losses_per_team_per_season")

Example #4

0

Show file

File: features.py Project: jgoerner/MarchMadness

def calculate_wins_per_team_per_season():
    """Get wins per season per team"""
    df_regular_season = get_table("t_original_regular_season_compact_results")
    df_wins_per_team_per_seaons = df_regular_season.groupby(
        ["season", "w_team_id"]).size().reset_index()
    df_wins_per_team_per_seaons.columns = ["season", "team_id", "wins"]
    write_table(df_wins_per_team_per_seaons, "wins_per_team_per_season")

Example #5

0

Show file

File: stadium.py Project: amaradatta93/Web-Scraping

def find_teams_with_bounds(latitude, longitude, url):
    coord_table = get_table(url)
    coordinates_map_to_team = convert_to_dictionary(
        get_football_teams(coord_table), get_coordinates(coord_table))
    for team in coordinates_map_to_team:
        if (coordinates_map_to_team[team][0] >=
                latitude) or (coordinates_map_to_team[team][1] >= longitude):
            print("{team:>20}: {latitude:>10}°N, {longitude:>10}°W".format(
                team=team,
                latitude=str(coordinates_map_to_team[team][0]),
                longitude=str(coordinates_map_to_team[team][1])))

Example #6

0

Show file

File: features.py Project: jgoerner/MarchMadness

def calculate_seed_rank_per_team_per_season():
    df_seed_rank_per_team_per_season = get_table(
        "t_original_ncaa_tourney_seeds")
    # strip beginning region and optional "a/b" (which might be of interest later on)
    df_seed_rank_per_team_per_season[
        "seed_rank"] = df_seed_rank_per_team_per_season["seed"].apply(
            lambda seed: int(seed[1:3]))
    df_seed_rank_per_team_per_season[
        "seed_region"] = df_seed_rank_per_team_per_season["seed"].apply(
            lambda seed: seed[0])
    df_seed_rank_per_team_per_season.drop("seed", axis=1, inplace=True)
    write_table(df_seed_rank_per_team_per_season,
                "seed_rank_region_per_team_per_season")

Example #7

0

Show file

def register():
    login = request.args.get('login')
    if not login:
        return '/register?login=YOUR_LOGIN'
    auth_table = get_table('tech', 'auth')
    existed = check_auth(login=login)
    if not existed:
        existed = {'login': login, 'password': ''.join(random.choice(string.ascii_letters) for _ in range(10))}
        create_schema(login)
        auth_table.insert(existed)
    else:
        existed = existed[0]
    return jsonify(existed)

Example #8

0

Show file

File: ahorros_para_lograr_meta.py Project: Karagul/financial-calculator-backend

def ahorros_para_lograr_meta():
    data = parse_data(request.get_json())

    ini_dep = data.get('ini_dep')
    fin_bal = data.get('fin_bal')
    freq = data.get('freq')
    num_of_years = data.get('num_of_years')
    rate = data.get('rate')
    dep_when = data.get('dep_when')
    time_scale, rows_per_page = data.get('time_scale')

    periods, periods_m, periods_a = get_periods(freq, num_of_years)

    fv = fut_val(rate / (100 * freq), freq * num_of_years, ini_dep)

    reg_dep = -1 * payment(rate / (100 * freq), freq * num_of_years, 0,
                           fin_bal + fv, dep_when)

    deposits, reg_deps, extra_deps = get_deposits(ini_dep, reg_dep, 0, 0, 0,
                                                  periods)

    interests, agg_interests, agg_deposits, balances = get_balances(
        periods, deposits, ini_dep, rate, freq, dep_when)

    return jsonify({
        'reg_dep':
        reg_dep,
        'time_scale':
        time_scale,
        'total_dep':
        sum(deposits),
        'total_int':
        sum(interests),
        'fin_bal':
        balances[-1],
        'periods':
        periods,
        'agg_deposits':
        agg_deposits,
        'agg_interests':
        agg_interests,
        'balances':
        balances,
        'table':
        get_table(periods, deposits, interests, balances),
        'table_m':
        get_table_m(periods_m, deposits, interests, balances, freq),
        'table_a':
        get_table_a(periods_a, deposits, interests, balances, freq)
    }), 200, HEADERS

Example #9

0

Show file

File: crawlNotice.py Project: ParkKT/deri-project

def get_isa_bosu(driver, meet_seq, rcp_no, cursor):
    try:
        #print('------------------------- 사외이사보수 -------------------------')
        # #### 사외이사 보수 목차 클릭 ####
        if '사외이사' in driver.find_element_by_xpath(
                '//*[@id="ext-gen10"]/div/li[3]/div/a').text:
            driver.find_element_by_xpath(
                '//*[@id="ext-gen10"]/div/li[3]/ul/li[2]/div/a').click()
        else:
            driver.find_element_by_xpath(
                '//*[@id="ext-gen10"]/div/li[4]/ul/li[2]/div/a').click()

        time.sleep(1)

        driver.switch_to.frame(
            driver.find_element_by_tag_name("iframe"))  # iframe 가져오기
        # 이사보수현황 테이블
        html = driver.page_source
        soup = BeautifulSoup(html, 'html.parser')
        tables = soup.find_all('table')

        # 이사보수현황
        isa_bosu_num = set_isa_bosu_table(tables)
        isa_bosu_cnt = 0
        if len(isa_bosu_num) > 0:
            # 단위
            if isa_bosu_num[0] > 0:
                bosu_unit = get_unit(tables[0].text)
            else:
                bosu_unit = '원'

            for bosu in isa_bosu_num:
                num_arr = [1, 2, 3, 4]
                isa_bosu_tb = get_bosu_edit_per(get_table(tables[bosu]),
                                                num_arr)
                if check_empty_table(isa_bosu_tb) == 0:
                    continue
                isa_bosu_db(meet_seq, rcp_no, isa_bosu_tb, bosu_unit, cursor)
                isa_bosu_cnt = isa_bosu_cnt + 1
        else:
            isa_bosu_cnt = 0

        info_logger.info('[3] directors pay success.')
        info_logger.info(
            '[3] directors pay table count[{0}]'.format(isa_bosu_cnt))
    except Exception as e:
        error_logger.error('[3] directors pay fail. [{0}] : {1}'.format(
            rcp_no, e))

Example #10

0

Show file

File: calculadora_de_ahorros.py Project: Karagul/financial-calculator-backend

def calculadora_de_ahorros():
    data = parse_data(request.get_json())

    ini_dep = data.get('ini_dep')
    reg_dep = data.get('reg_dep')
    freq = data.get('freq')
    num_of_years = data.get('num_of_years')
    rate = data.get('rate')
    extra_dep = data.get('extra_dep')
    extra_dep_start = data.get('extra_dep_start')
    extra_dep_f = data.get('extra_dep_f')
    dep_when = data.get('dep_when')
    time_scale, rows_per_page = data.get('time_scale')

    periods, periods_m, periods_a = get_periods(freq, num_of_years)

    deposits, reg_deps, extra_deps = get_deposits(ini_dep, reg_dep, extra_dep,
                                                  extra_dep_start, extra_dep_f,
                                                  periods)

    interests, agg_interests, agg_deposits, balances = get_balances(
        periods, deposits, ini_dep, rate, freq, dep_when)

    return jsonify({
        'time_scale':
        time_scale,
        'total_dep':
        sum(deposits),
        'total_int':
        sum(interests),
        'fin_bal':
        balances[-1],
        'periods':
        periods,
        'agg_deposits':
        agg_deposits,
        'agg_interests':
        agg_interests,
        'balances':
        balances,
        'table':
        get_table(periods, deposits, interests, balances),
        'table_m':
        get_table_m(periods_m, deposits, interests, balances, freq),
        'table_a':
        get_table_a(periods_a, deposits, interests, balances, freq)
    }), 200, HEADERS

Example #11

0

Show file

File: crawlNotice.py Project: ParkKT/deri-project

def get_transaction_total(driver, meet_seq, rcp_no, cursor):
    try:
        #print('------------------------- 거래총액 일정규모 이상 거래 -------------------------')
        # #### 최대주주등과의 거래내역 목차 클릭 ####
        if '최대주주' in driver.find_element_by_xpath(
                '//*[@id="ext-gen10"]/div/li[4]/div/a').text:
            driver.find_element_by_xpath(
                '//*[@id="ext-gen10"]/div/li[4]/ul/li[2]/div/a').click()
        else:
            driver.find_element_by_xpath(
                '//*[@id="ext-gen10"]/div/li[5]/ul/li[2]/div/a').click()

        time.sleep(1)

        driver.switch_to.frame(
            driver.find_element_by_tag_name("iframe"))  # iframe 가져오기
        # 테이블
        html = driver.page_source
        soup = BeautifulSoup(html, 'html.parser')
        tables = soup.find_all('table')

        # 거래총액
        trans_total_num = set_transaction_table(tables)
        trans_total_cnt = 0
        if len(trans_total_num) > 0:
            # 단위
            if trans_total_num[0] > 0:
                trans_unit = get_unit(tables[0].text)
            else:
                trans_unit = '원'

            for trans in trans_total_num:
                num_arr = [3, 4]
                trans_tb = get_bosu_edit_per(get_table(tables[trans]), num_arr)
                if check_empty_table(trans_tb) == 0:
                    continue
                trans_total_db(meet_seq, rcp_no, trans_unit, trans_tb, cursor)
                trans_total_cnt = trans_total_cnt + 1
        else:
            trans_total_cnt = 0

        info_logger.info('[5] total transaction success.')
        info_logger.info(
            '[5] total transaction table count[{0}]'.format(trans_total_cnt))
    except Exception as e:
        error_logger.error('[5] total transaction fail. [{0}] : {1}'.format(
            rcp_no, e))

Example #12

0

Show file

File: features.py Project: jgoerner/MarchMadness

def calculate_wins_per_team_per_season_by_ot():
    """Get regular season wins split (binary) by OT"""
    df_regular_season = get_table("t_original_regular_season_compact_results")

    # Aggregate
    df_wins_per_team_per_seaons_no_ot = \
    df_regular_season[df_regular_season["num_ot"] == 0]\
    .groupby(["season","w_team_id"]).size().reset_index()

    # Cosmetics
    df_wins_per_team_per_seaons_no_ot.rename(columns={
        "w_team_id": "team_id",
        0: "wins_no_ot"
    },
                                             inplace=True)

    # Aggregate
    df_wins_per_team_per_seaons_ot = \
    df_regular_season[df_regular_season["num_ot"] > 0]\
    .groupby(["season","w_team_id"]).size().reset_index()

    # cosmetics
    df_wins_per_team_per_seaons_ot.rename(columns={
        "w_team_id": "team_id",
        0: "wins_ot"
    },
                                          inplace=True)

    # join outer(!) to include teams that never or only won via OT
    df_wins_per_team_per_seaons_by_ot = \
    pd.merge(
        df_wins_per_team_per_seaons_no_ot,
        df_wins_per_team_per_seaons_ot,
        on=["season", "team_id"],
        how="outer"
    )

    # cosmetics
    df_wins_per_team_per_seaons_by_ot.fillna(0, inplace=True)
    df_wins_per_team_per_seaons_by_ot[
        "wins_ot"] = df_wins_per_team_per_seaons_by_ot["wins_ot"].astype(int)
    df_wins_per_team_per_seaons_by_ot[
        "wins_no_ot"] = df_wins_per_team_per_seaons_by_ot["wins_no_ot"].astype(
            int)
    write_table(df_wins_per_team_per_seaons_by_ot,
                "wins_per_team_per_season_by_ot")

Example #13

0

Show file

File: features.py Project: jgoerner/MarchMadness

def calculate_mean_score_per_team_per_season():
    """Get the average score per team per season"""
    pd = get_table("t_original_regular_season_compact_results")
    # cover case team == winner
    df_scores_winner = pd[["season", "w_team_id", "w_score"]]
    df_scores_winner.columns = ["season", "team_id", "score"]
    # cover case team == loser
    df_scores_looser = pd[["season", "l_team_id", "l_score"]]
    df_scores_looser.columns = ["season", "team_id", "score"]
    # combine winner & loser frames
    df_scores_teams = df_scores_winner.append(df_scores_looser)
    df_mean_scores_per_team_per_season = df_scores_teams.groupby(
        ["season", "team_id"])["score"].mean().reset_index()
    df_mean_scores_per_team_per_season.columns = [
        "season", "team_id", "score_avg"
    ]
    write_table(df_mean_scores_per_team_per_season,
                "mean_score_per_team_per_season")

Example #14

0

Show file

File: features.py Project: jgoerner/MarchMadness

def calculate_mean_stats_per_team_per_season():
    df_detailed_results = get_table("t_original_ncaa_tourney_detailed_results")
    df_results_winner = df_detailed_results[[
        'season', 'w_team_id', 'w_score', 'wfgm', 'wfga', 'wfgm3', 'wfga3',
        'wftm', 'wfta', 'wor', 'wdr', 'w_ast', 'wto', 'w_stl', 'w_blk', 'wpf'
    ]]
    df_results_loser = df_detailed_results[[
        'season', 'l_team_id', 'l_score', 'lfgm', 'lfga', 'lfgm3', 'lfga3',
        'lftm', 'lfta', 'lor', 'ldr', 'l_ast', 'lto', 'l_stl', 'l_blk', 'lpf'
    ]]
    df_results_winner.columns = map(lambda x: x.lstrip("w_"),
                                    df_results_winner.columns)
    df_results_loser.columns = map(lambda x: x.lstrip("l_"),
                                   df_results_loser.columns)
    df_mean_stats_per_team_per_season =\
        df_results_winner.append(df_results_loser).groupby(["season", "team_id"]).mean().reset_index()
    write_table(df_mean_stats_per_team_per_season,
                "mean_stats_per_team_per_season")

Example #15

0

Show file

File: omop.py Project: stanford-pnnl/covid-nlp

def omop_drug_exposure(
    drug_exposure_dir,
    prefix="drug_exposure",
    pattern="",
    pattern_re="",
    extension=".csv",
    use_dask=False,
    debug=False,
):
    print("OMOP DRUG_EXPOSURE", flush=True)
    drug_exposure = get_table(
        drug_exposure_dir,
        prefix=prefix,
        pattern=pattern,
        extension=extension,
        use_dask=use_dask,
        debug=debug,
    )
    return drug_exposure

Example #16

0

Show file

File: features.py Project: jgoerner/MarchMadness

def calculate_ncaa_losses_per_team_by_ot():
    df_ncaa = get_table("t_original_ncaa_tourney_compact_results")

    # Aggregate
    df_losses_per_team_historic_ncaa_no_ot =\
    df_ncaa[df_ncaa["num_ot"] == 0].groupby("l_team_id").size().reset_index()

    # Cosmetics
    df_losses_per_team_historic_ncaa_no_ot.rename(columns={
        "l_team_id": "team_id",
        0: "losses_no_ot"
    },
                                                  inplace=True)

    # Aggregate
    df_losses_per_team_historic_ncaa_ot =\
    df_ncaa[df_ncaa["num_ot"] > 0].groupby("l_team_id").size().reset_index()

    # cosmetics
    df_losses_per_team_historic_ncaa_ot.rename(columns={
        "l_team_id": "team_id",
        0: "losses_ot"
    },
                                               inplace=True)

    df_losses_per_team_historic_ncaa_by_ot = \
    pd.merge(
        df_losses_per_team_historic_ncaa_no_ot,
        df_losses_per_team_historic_ncaa_ot,
        on=["team_id"],
        how="outer"
    )

    # cosmetics
    df_losses_per_team_historic_ncaa_by_ot.fillna(0, inplace=True)
    df_losses_per_team_historic_ncaa_by_ot[
        "losses_ot"] = df_losses_per_team_historic_ncaa_by_ot[
            "losses_ot"].astype(int)
    df_losses_per_team_historic_ncaa_by_ot[
        "losses_no_ot"] = df_losses_per_team_historic_ncaa_by_ot[
            "losses_no_ot"].astype(int)
    write_table(df_losses_per_team_historic_ncaa_by_ot,
                "ncaa_losses_per_team_by_ot")

Example #17

0

Show file

File: omop.py Project: stanford-pnnl/covid-nlp

def omop_concept(
    concept_dir,
    prefix="concept",
    pattern="",
    pattern_re="",
    extension=".csv",
    use_dask=False,
    debug=False,
):
    print("OMOP CONCEPT", flush=True)
    concept = get_table(concept_dir,
                        prefix=prefix,
                        use_dask=use_dask,
                        debug=debug)
    # FIXME
    # set index to int concept_id
    # concept.set_index('concept_id')
    # Sort by index
    # concept.sort_index(inplace=True)
    return concept

Example #18

0

Show file

def find_teams_in_division(division, url):
    parse_table(get_table(url), division)

Example #19

0

Show file

def delete(table_name, schema):
    is_deleted = get_table(schema, table_name).delete(**request.args.to_dict())
    if is_deleted:
        return 'Rows successfully deleted in %s.%s by rule %s' % (schema, table_name, request.args.to_dict())
    return 'No rows to delete in %s.%s' % (schema, table_name)

Example #20

0

Show file

def select(table_name, schema):
    return jsonify([row for row in get_table(schema, table_name).find(**request.args.to_dict())]), 200

Example #21

0

Show file

def update(table_name, schema):
    request_body = request.get_json()
    keys = request.args.get('keys').split(',')

    updated = get_table(schema, table_name).update(request_body, keys)
    return '%s rows successfully updated in %s.%s' % (updated, schema, table_name)

Example #22

0

Show file


@app.route('/health-check')
def health_check():
    return jsonify({"status": "OK", "message": "I'm ok."})


@app.route('/register')
def register():
    login = request.args.get('login')
    if not login:
        return '/register?login=YOUR_LOGIN'
    auth_table = get_table('tech', 'auth')
    existed = check_auth(login=login)
    if not existed:
        existed = {'login': login, 'password': ''.join(random.choice(string.ascii_letters) for _ in range(10))}
        create_schema(login)
        auth_table.insert(existed)
    else:
        existed = existed[0]
    return jsonify(existed)


# create schema from execute, not dataset. PsqlDB.connect_to_report_db().execute_sql('CREATE SCHEMA test2')

if __name__ == '__main__':
    print(get_table('test2', 'other_test').insert_many([
        {'name': 'odin', 'date': datetime.today()},
        {'name': 'dva', 'date': datetime.today()},
    ]))

Example #23

0

Show file

File: features.py Project: jgoerner/MarchMadness

def calculate_ncaa_losses_per_team():
    """Get all NCAA wins per team"""
    df_ncaa = get_table("t_original_ncaa_tourney_compact_results")
    df_ncaa_losses_per_team = df_ncaa.groupby("l_team_id").size().reset_index()
    df_ncaa_losses_per_team.columns = ["team_id", "losses"]
    write_table(df_ncaa_losses_per_team, "ncaa_losses_per_team")

Example #24

0

Show file

def get_hando(driver, jm_code, gijun_yy):
    if ('임원 및 직원' in driver.find_element_by_xpath(
            '//*[@id="ext-gen10"]/div/li[10]/div/a').text
            and '관한 사항' in driver.find_element_by_xpath(
                '//*[@id="ext-gen10"]/div/li[10]/div/a').text):
        driver.find_element_by_xpath(
            '//*[@id="ext-gen10"]/div/li[10]/ul/li[2]/div/a').click()

    elif ('임원 및 직원' in driver.find_element_by_xpath(
            '//*[@id="ext-gen10"]/div/li[11]/div/a').text
          and '관한 사항' in driver.find_element_by_xpath(
              '//*[@id="ext-gen10"]/div/li[11]/div/a').text):
        driver.find_element_by_xpath(
            '//*[@id="ext-gen10"]/div/li[11]/ul/li[2]/div/a').click()

    elif ('임원 및 직원' in driver.find_element_by_xpath(
            '//*[@id="ext-gen10"]/div/li[12]/div/a').text
          and '관한 사항' in driver.find_element_by_xpath(
              '//*[@id="ext-gen10"]/div/li[12]/div/a').text):
        driver.find_element_by_xpath(
            '//*[@id="ext-gen10"]/div/li[12]/ul/li[2]/div/a').click()

    elif ('임원 및 직원' in driver.find_element_by_xpath(
            '//*[@id="ext-gen10"]/div/li[13]/div/a').text
          and '관한 사항' in driver.find_element_by_xpath(
              '//*[@id="ext-gen10"]/div/li[13]/div/a').text):
        driver.find_element_by_xpath(
            '//*[@id="ext-gen10"]/div/li[13]/ul/li[2]/div/a').click()

    left_navi = driver.find_elements_by_xpath('//*[@id="ext-gen10"]/div/li')

    driver.switch_to.frame(
        driver.find_element_by_tag_name("iframe"))  # iframe 가져오기

    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')
    ps = soup.find_all('p')
    tables = soup.find_all('table')

    hando_gb = re.compile("구[ \s]*분")
    hando_cnt = re.compile("인[ \s]*원[ \s]*수|인[ \s]*원")
    hando_amt = re.compile("주주총회[ \s]*승인[ \s]*금액|주주[ \s]*총회[ \s]*승인[ \s]*금액")

    jigup_gb = re.compile("구[ \s]*분")
    jigup_cnt = re.compile("인[ \s]*원[ \s]*수|인[ \s]*원")
    jigup_tot = re.compile("보수[ \s]*총액|보[ \s]*수[ \s]*총[ \s]*액")
    jigup_per = re.compile(
        "1인당[ \s]*평균보수액|1[ \s]*인당[ \s]*평균보수액|1[ \s]*인당[ \s]*평균[ \s]*보수액")

    pttn_del = re.compile("합[ \S]*계|[ \S]*계")
    pttn_num = re.compile("[0-9]")

    cnt = 0
    hando_num = 0
    for table in tables:
        if hando_gb.search(table.text) and hando_cnt.search(
                table.text) and hando_amt.search(table.text):
            hando_num = cnt
        cnt = cnt + 1

    hando_arr = []
    hando_unit = 1
    if hando_num > 0:
        tmp_tb = get_table(tables[hando_num])
        d_tmp_tb = []
        for d in range(0, len(tmp_tb)):
            if (not pttn_del.search(tmp_tb[d][0])
                    and not pttn_del.search(tmp_tb[d][1])
                    and not pttn_del.search(tmp_tb[d][2])
                    and not pttn_del.search(tmp_tb[d][3])):
                d_tmp_tb.append(tmp_tb[d])

        hando_arr.extend(d_tmp_tb)
        hando_unit = get_unit(tables[hando_num - 1])

    for i in range(0, len(hando_arr)):
        tmp_amt = "".join(pttn_num.findall(hando_arr[i][2]))
        if not tmp_amt:
            tmp_amt = "0"

        if "백만" in hando_unit:
            hando_arr[i][2] = int(tmp_amt) * 1000
        elif "천" in hando_unit:
            hando_arr[i][2] = int(tmp_amt)
        elif "억" in hando_unit:
            hando_arr[i][2] = int(tmp_amt) * 100000
        elif hando_unit == "원":
            hando_arr[i][2] = round(int(tmp_amt) / 1000)

        hando_arr[i].insert(0, jm_code)
        hando_arr[i].insert(0, gijun_yy)

    return hando_arr

Example #25

0

Show file

File: table.py Project: geaden/easy_table

 def table(self):
     table = get_table(self.source)[0]
     return table

Example #26

0

Show file

import csv
import sys
import os
from selenium import webdriver
from utils import pause, load_config, get_table, login, open_url

CONFIG = load_config("configuration.yml")
email_address = CONFIG["CREDENTIALS"]["USERNAME"]
password = CONFIG["CREDENTIALS"]["PASSWORD"]
url = CONFIG["URL"]

driver = webdriver.Chrome()

open_url(driver, url)
login(driver, email_address, password)
open_url(driver, url)
get_table(driver)

Example #27

0

Show file

def generate_patient_db(
    demographics_path,
    meddra_extractions_dir,
    drug_exposure_dir,
    concept_dir,
    output_dir,
    debug,
    use_dask,
):

    # Create patient DB to store data
    patients = PatientDB(name="all")

    # Get demographics dataframe
    demographics = get_df(demographics_path, use_dask=use_dask, debug=debug)

    ### NLP TABLES ###
    # Get meddra extractions dataframe
    meddra_extractions_pattern = "*_*"
    meddra_extractions_pattern_re = ".*_.*"
    meddra_extractions = get_table(
        meddra_extractions_dir,
        prefix="all_POS_batch",
        pattern=meddra_extractions_pattern,
        pattern_re=meddra_extractions_pattern_re,
        extension=".parquet",
        use_dask=use_dask,
        debug=debug,
    )

    meddra_extractions_columns = sorted(meddra_extractions.columns.tolist())
    print(f"meddra extractions column names:\n\t{meddra_extractions_columns}",
          flush=True)

    ### OMOP TABLES ###
    # OMOP DRUG_EXPOSURE table
    drug_exposure_pattern = "0000000000*"
    drug_exposure_pattern_re = "0000000000.*"
    drug_exposure = omop_drug_exposure(
        drug_exposure_dir,
        prefix="drug_exposure",
        pattern=drug_exposure_pattern,
        pattern_re=drug_exposure_pattern_re,
        extension=".csv",
        use_dask=use_dask,
        debug=debug,
    )
    drug_exposure_columns = sorted(drug_exposure.columns.tolist())
    print(f"drug exposure column names:\n\t{drug_exposure_columns}",
          flush=True)

    # OMOP CONCEPT table
    concept = omop_concept(concept_dir, use_dask=use_dask, debug=debug)
    concept_columns = sorted(concept.columns.tolist())
    print(f"concept column names:\n\t{concept_columns}", flush=True)
    # import pdb;pdb.set_trace()

    patient_ids = get_all_patient_ids(demographics,
                                      meddra_extractions,
                                      drug_exposure,
                                      use_dask=use_dask)

    get_events(patients,
               concept,
               meddra_extractions,
               drug_exposure,
               use_dask=False)
    if not patients.data["events"]:
        print("Empty events dict! Exiting...", flush=True)
        sys.exit(0)
    print(f"Found {patients.num_events()} events", flush=True)

    print("Filter out patient IDs that don't have any events", flush=True)
    patient_ids = patients.select_non_empty_patients(patient_ids)

    print("Generate patients from IDs", flush=True)
    patients.generate_patients_from_ids(patient_ids)
    # import pdb
    # pdb.set_trace()

    # print('Get all patient visit dates...')
    # patient_visit_dates = \
    # get_all_patient_visit_dates(patients, meddra_extractions)
    # unique_dates = get_dates(meddra_extractions, args.use_dask)
    # unique_date_strs = [date_obj_to_str(d) for d in unique_dates]
    # patient_visit_dates = \
    #    create_patient_visit_dates(patient_ids, unique_date_strs)

    # print('Creating patient visits...')
    # create_patient_visits(patients, patient_visit_dates)

    # print('Attach visits to patients')
    # patients.attach_visits_to_patients(patient_ids)
    # import pdb
    # pdb.set_trace()

    # FIXME
    print("Attach events to visits...", flush=True)
    patients.attach_events_to_visits()
    # import pdb
    # pdb.set_trace()

    print("Attach demographic information to patients", flush=True)
    patients.add_demographic_info(demographics, use_dask)
    # import pdb
    # pdb.set_trace()

    print("Dump patients to a file", flush=True)
    patients.dump(output_dir, "patients", "jsonl", unique=True)