def insert(table_name, schema): request_body = request.get_json() if isinstance(request_body, dict): request_body = [request_body] add_timestamp(request_body) get_table(schema, table_name).insert_many(request_body) return '%s successfully stored in %s.%s' % (len(request_body), schema, table_name)
def find_stadiums_within_seat_range(min_seats, max_seats, url): table = get_table(url) list_map = convert_to_dictionary(get_football_teams(table), get_stadium_capacity(table)) for team in list_map: if (list_map[team] >= min_seats) and (list_map[team] <= max_seats): print("{0:>20}: {1:10}".format(team, str(list_map[team])))
def calculate_losses_per_team_per_season(): """Get losses per season per team""" df_regular_season = get_table("t_original_regular_season_compact_results") df_losses_per_team_per_season = df_regular_season.groupby( ["season", "l_team_id"]).size().reset_index() df_losses_per_team_per_season.columns = ["season", "team_id", "losses"] write_table(df_losses_per_team_per_season, "losses_per_team_per_season")
def calculate_wins_per_team_per_season(): """Get wins per season per team""" df_regular_season = get_table("t_original_regular_season_compact_results") df_wins_per_team_per_seaons = df_regular_season.groupby( ["season", "w_team_id"]).size().reset_index() df_wins_per_team_per_seaons.columns = ["season", "team_id", "wins"] write_table(df_wins_per_team_per_seaons, "wins_per_team_per_season")
def find_teams_with_bounds(latitude, longitude, url): coord_table = get_table(url) coordinates_map_to_team = convert_to_dictionary( get_football_teams(coord_table), get_coordinates(coord_table)) for team in coordinates_map_to_team: if (coordinates_map_to_team[team][0] >= latitude) or (coordinates_map_to_team[team][1] >= longitude): print("{team:>20}: {latitude:>10}°N, {longitude:>10}°W".format( team=team, latitude=str(coordinates_map_to_team[team][0]), longitude=str(coordinates_map_to_team[team][1])))
def calculate_seed_rank_per_team_per_season(): df_seed_rank_per_team_per_season = get_table( "t_original_ncaa_tourney_seeds") # strip beginning region and optional "a/b" (which might be of interest later on) df_seed_rank_per_team_per_season[ "seed_rank"] = df_seed_rank_per_team_per_season["seed"].apply( lambda seed: int(seed[1:3])) df_seed_rank_per_team_per_season[ "seed_region"] = df_seed_rank_per_team_per_season["seed"].apply( lambda seed: seed[0]) df_seed_rank_per_team_per_season.drop("seed", axis=1, inplace=True) write_table(df_seed_rank_per_team_per_season, "seed_rank_region_per_team_per_season")
def register(): login = request.args.get('login') if not login: return '/register?login=YOUR_LOGIN' auth_table = get_table('tech', 'auth') existed = check_auth(login=login) if not existed: existed = {'login': login, 'password': ''.join(random.choice(string.ascii_letters) for _ in range(10))} create_schema(login) auth_table.insert(existed) else: existed = existed[0] return jsonify(existed)
def ahorros_para_lograr_meta(): data = parse_data(request.get_json()) ini_dep = data.get('ini_dep') fin_bal = data.get('fin_bal') freq = data.get('freq') num_of_years = data.get('num_of_years') rate = data.get('rate') dep_when = data.get('dep_when') time_scale, rows_per_page = data.get('time_scale') periods, periods_m, periods_a = get_periods(freq, num_of_years) fv = fut_val(rate / (100 * freq), freq * num_of_years, ini_dep) reg_dep = -1 * payment(rate / (100 * freq), freq * num_of_years, 0, fin_bal + fv, dep_when) deposits, reg_deps, extra_deps = get_deposits(ini_dep, reg_dep, 0, 0, 0, periods) interests, agg_interests, agg_deposits, balances = get_balances( periods, deposits, ini_dep, rate, freq, dep_when) return jsonify({ 'reg_dep': reg_dep, 'time_scale': time_scale, 'total_dep': sum(deposits), 'total_int': sum(interests), 'fin_bal': balances[-1], 'periods': periods, 'agg_deposits': agg_deposits, 'agg_interests': agg_interests, 'balances': balances, 'table': get_table(periods, deposits, interests, balances), 'table_m': get_table_m(periods_m, deposits, interests, balances, freq), 'table_a': get_table_a(periods_a, deposits, interests, balances, freq) }), 200, HEADERS
def get_isa_bosu(driver, meet_seq, rcp_no, cursor): try: #print('------------------------- 사외이사보수 -------------------------') # #### 사외이사 보수 목차 클릭 #### if '사외이사' in driver.find_element_by_xpath( '//*[@id="ext-gen10"]/div/li[3]/div/a').text: driver.find_element_by_xpath( '//*[@id="ext-gen10"]/div/li[3]/ul/li[2]/div/a').click() else: driver.find_element_by_xpath( '//*[@id="ext-gen10"]/div/li[4]/ul/li[2]/div/a').click() time.sleep(1) driver.switch_to.frame( driver.find_element_by_tag_name("iframe")) # iframe 가져오기 # 이사보수현황 테이블 html = driver.page_source soup = BeautifulSoup(html, 'html.parser') tables = soup.find_all('table') # 이사보수현황 isa_bosu_num = set_isa_bosu_table(tables) isa_bosu_cnt = 0 if len(isa_bosu_num) > 0: # 단위 if isa_bosu_num[0] > 0: bosu_unit = get_unit(tables[0].text) else: bosu_unit = '원' for bosu in isa_bosu_num: num_arr = [1, 2, 3, 4] isa_bosu_tb = get_bosu_edit_per(get_table(tables[bosu]), num_arr) if check_empty_table(isa_bosu_tb) == 0: continue isa_bosu_db(meet_seq, rcp_no, isa_bosu_tb, bosu_unit, cursor) isa_bosu_cnt = isa_bosu_cnt + 1 else: isa_bosu_cnt = 0 info_logger.info('[3] directors pay success.') info_logger.info( '[3] directors pay table count[{0}]'.format(isa_bosu_cnt)) except Exception as e: error_logger.error('[3] directors pay fail. [{0}] : {1}'.format( rcp_no, e))
def calculadora_de_ahorros(): data = parse_data(request.get_json()) ini_dep = data.get('ini_dep') reg_dep = data.get('reg_dep') freq = data.get('freq') num_of_years = data.get('num_of_years') rate = data.get('rate') extra_dep = data.get('extra_dep') extra_dep_start = data.get('extra_dep_start') extra_dep_f = data.get('extra_dep_f') dep_when = data.get('dep_when') time_scale, rows_per_page = data.get('time_scale') periods, periods_m, periods_a = get_periods(freq, num_of_years) deposits, reg_deps, extra_deps = get_deposits(ini_dep, reg_dep, extra_dep, extra_dep_start, extra_dep_f, periods) interests, agg_interests, agg_deposits, balances = get_balances( periods, deposits, ini_dep, rate, freq, dep_when) return jsonify({ 'time_scale': time_scale, 'total_dep': sum(deposits), 'total_int': sum(interests), 'fin_bal': balances[-1], 'periods': periods, 'agg_deposits': agg_deposits, 'agg_interests': agg_interests, 'balances': balances, 'table': get_table(periods, deposits, interests, balances), 'table_m': get_table_m(periods_m, deposits, interests, balances, freq), 'table_a': get_table_a(periods_a, deposits, interests, balances, freq) }), 200, HEADERS
def get_transaction_total(driver, meet_seq, rcp_no, cursor): try: #print('------------------------- 거래총액 일정규모 이상 거래 -------------------------') # #### 최대주주등과의 거래내역 목차 클릭 #### if '최대주주' in driver.find_element_by_xpath( '//*[@id="ext-gen10"]/div/li[4]/div/a').text: driver.find_element_by_xpath( '//*[@id="ext-gen10"]/div/li[4]/ul/li[2]/div/a').click() else: driver.find_element_by_xpath( '//*[@id="ext-gen10"]/div/li[5]/ul/li[2]/div/a').click() time.sleep(1) driver.switch_to.frame( driver.find_element_by_tag_name("iframe")) # iframe 가져오기 # 테이블 html = driver.page_source soup = BeautifulSoup(html, 'html.parser') tables = soup.find_all('table') # 거래총액 trans_total_num = set_transaction_table(tables) trans_total_cnt = 0 if len(trans_total_num) > 0: # 단위 if trans_total_num[0] > 0: trans_unit = get_unit(tables[0].text) else: trans_unit = '원' for trans in trans_total_num: num_arr = [3, 4] trans_tb = get_bosu_edit_per(get_table(tables[trans]), num_arr) if check_empty_table(trans_tb) == 0: continue trans_total_db(meet_seq, rcp_no, trans_unit, trans_tb, cursor) trans_total_cnt = trans_total_cnt + 1 else: trans_total_cnt = 0 info_logger.info('[5] total transaction success.') info_logger.info( '[5] total transaction table count[{0}]'.format(trans_total_cnt)) except Exception as e: error_logger.error('[5] total transaction fail. [{0}] : {1}'.format( rcp_no, e))
def calculate_wins_per_team_per_season_by_ot(): """Get regular season wins split (binary) by OT""" df_regular_season = get_table("t_original_regular_season_compact_results") # Aggregate df_wins_per_team_per_seaons_no_ot = \ df_regular_season[df_regular_season["num_ot"] == 0]\ .groupby(["season","w_team_id"]).size().reset_index() # Cosmetics df_wins_per_team_per_seaons_no_ot.rename(columns={ "w_team_id": "team_id", 0: "wins_no_ot" }, inplace=True) # Aggregate df_wins_per_team_per_seaons_ot = \ df_regular_season[df_regular_season["num_ot"] > 0]\ .groupby(["season","w_team_id"]).size().reset_index() # cosmetics df_wins_per_team_per_seaons_ot.rename(columns={ "w_team_id": "team_id", 0: "wins_ot" }, inplace=True) # join outer(!) to include teams that never or only won via OT df_wins_per_team_per_seaons_by_ot = \ pd.merge( df_wins_per_team_per_seaons_no_ot, df_wins_per_team_per_seaons_ot, on=["season", "team_id"], how="outer" ) # cosmetics df_wins_per_team_per_seaons_by_ot.fillna(0, inplace=True) df_wins_per_team_per_seaons_by_ot[ "wins_ot"] = df_wins_per_team_per_seaons_by_ot["wins_ot"].astype(int) df_wins_per_team_per_seaons_by_ot[ "wins_no_ot"] = df_wins_per_team_per_seaons_by_ot["wins_no_ot"].astype( int) write_table(df_wins_per_team_per_seaons_by_ot, "wins_per_team_per_season_by_ot")
def calculate_mean_score_per_team_per_season(): """Get the average score per team per season""" pd = get_table("t_original_regular_season_compact_results") # cover case team == winner df_scores_winner = pd[["season", "w_team_id", "w_score"]] df_scores_winner.columns = ["season", "team_id", "score"] # cover case team == loser df_scores_looser = pd[["season", "l_team_id", "l_score"]] df_scores_looser.columns = ["season", "team_id", "score"] # combine winner & loser frames df_scores_teams = df_scores_winner.append(df_scores_looser) df_mean_scores_per_team_per_season = df_scores_teams.groupby( ["season", "team_id"])["score"].mean().reset_index() df_mean_scores_per_team_per_season.columns = [ "season", "team_id", "score_avg" ] write_table(df_mean_scores_per_team_per_season, "mean_score_per_team_per_season")
def calculate_mean_stats_per_team_per_season(): df_detailed_results = get_table("t_original_ncaa_tourney_detailed_results") df_results_winner = df_detailed_results[[ 'season', 'w_team_id', 'w_score', 'wfgm', 'wfga', 'wfgm3', 'wfga3', 'wftm', 'wfta', 'wor', 'wdr', 'w_ast', 'wto', 'w_stl', 'w_blk', 'wpf' ]] df_results_loser = df_detailed_results[[ 'season', 'l_team_id', 'l_score', 'lfgm', 'lfga', 'lfgm3', 'lfga3', 'lftm', 'lfta', 'lor', 'ldr', 'l_ast', 'lto', 'l_stl', 'l_blk', 'lpf' ]] df_results_winner.columns = map(lambda x: x.lstrip("w_"), df_results_winner.columns) df_results_loser.columns = map(lambda x: x.lstrip("l_"), df_results_loser.columns) df_mean_stats_per_team_per_season =\ df_results_winner.append(df_results_loser).groupby(["season", "team_id"]).mean().reset_index() write_table(df_mean_stats_per_team_per_season, "mean_stats_per_team_per_season")
def omop_drug_exposure( drug_exposure_dir, prefix="drug_exposure", pattern="", pattern_re="", extension=".csv", use_dask=False, debug=False, ): print("OMOP DRUG_EXPOSURE", flush=True) drug_exposure = get_table( drug_exposure_dir, prefix=prefix, pattern=pattern, extension=extension, use_dask=use_dask, debug=debug, ) return drug_exposure
def calculate_ncaa_losses_per_team_by_ot(): df_ncaa = get_table("t_original_ncaa_tourney_compact_results") # Aggregate df_losses_per_team_historic_ncaa_no_ot =\ df_ncaa[df_ncaa["num_ot"] == 0].groupby("l_team_id").size().reset_index() # Cosmetics df_losses_per_team_historic_ncaa_no_ot.rename(columns={ "l_team_id": "team_id", 0: "losses_no_ot" }, inplace=True) # Aggregate df_losses_per_team_historic_ncaa_ot =\ df_ncaa[df_ncaa["num_ot"] > 0].groupby("l_team_id").size().reset_index() # cosmetics df_losses_per_team_historic_ncaa_ot.rename(columns={ "l_team_id": "team_id", 0: "losses_ot" }, inplace=True) df_losses_per_team_historic_ncaa_by_ot = \ pd.merge( df_losses_per_team_historic_ncaa_no_ot, df_losses_per_team_historic_ncaa_ot, on=["team_id"], how="outer" ) # cosmetics df_losses_per_team_historic_ncaa_by_ot.fillna(0, inplace=True) df_losses_per_team_historic_ncaa_by_ot[ "losses_ot"] = df_losses_per_team_historic_ncaa_by_ot[ "losses_ot"].astype(int) df_losses_per_team_historic_ncaa_by_ot[ "losses_no_ot"] = df_losses_per_team_historic_ncaa_by_ot[ "losses_no_ot"].astype(int) write_table(df_losses_per_team_historic_ncaa_by_ot, "ncaa_losses_per_team_by_ot")
def omop_concept( concept_dir, prefix="concept", pattern="", pattern_re="", extension=".csv", use_dask=False, debug=False, ): print("OMOP CONCEPT", flush=True) concept = get_table(concept_dir, prefix=prefix, use_dask=use_dask, debug=debug) # FIXME # set index to int concept_id # concept.set_index('concept_id') # Sort by index # concept.sort_index(inplace=True) return concept
def find_teams_in_division(division, url): parse_table(get_table(url), division)
def delete(table_name, schema): is_deleted = get_table(schema, table_name).delete(**request.args.to_dict()) if is_deleted: return 'Rows successfully deleted in %s.%s by rule %s' % (schema, table_name, request.args.to_dict()) return 'No rows to delete in %s.%s' % (schema, table_name)
def select(table_name, schema): return jsonify([row for row in get_table(schema, table_name).find(**request.args.to_dict())]), 200
def update(table_name, schema): request_body = request.get_json() keys = request.args.get('keys').split(',') updated = get_table(schema, table_name).update(request_body, keys) return '%s rows successfully updated in %s.%s' % (updated, schema, table_name)
@app.route('/health-check') def health_check(): return jsonify({"status": "OK", "message": "I'm ok."}) @app.route('/register') def register(): login = request.args.get('login') if not login: return '/register?login=YOUR_LOGIN' auth_table = get_table('tech', 'auth') existed = check_auth(login=login) if not existed: existed = {'login': login, 'password': ''.join(random.choice(string.ascii_letters) for _ in range(10))} create_schema(login) auth_table.insert(existed) else: existed = existed[0] return jsonify(existed) # create schema from execute, not dataset. PsqlDB.connect_to_report_db().execute_sql('CREATE SCHEMA test2') if __name__ == '__main__': print(get_table('test2', 'other_test').insert_many([ {'name': 'odin', 'date': datetime.today()}, {'name': 'dva', 'date': datetime.today()}, ]))
def calculate_ncaa_losses_per_team(): """Get all NCAA wins per team""" df_ncaa = get_table("t_original_ncaa_tourney_compact_results") df_ncaa_losses_per_team = df_ncaa.groupby("l_team_id").size().reset_index() df_ncaa_losses_per_team.columns = ["team_id", "losses"] write_table(df_ncaa_losses_per_team, "ncaa_losses_per_team")
def get_hando(driver, jm_code, gijun_yy): if ('임원 및 직원' in driver.find_element_by_xpath( '//*[@id="ext-gen10"]/div/li[10]/div/a').text and '관한 사항' in driver.find_element_by_xpath( '//*[@id="ext-gen10"]/div/li[10]/div/a').text): driver.find_element_by_xpath( '//*[@id="ext-gen10"]/div/li[10]/ul/li[2]/div/a').click() elif ('임원 및 직원' in driver.find_element_by_xpath( '//*[@id="ext-gen10"]/div/li[11]/div/a').text and '관한 사항' in driver.find_element_by_xpath( '//*[@id="ext-gen10"]/div/li[11]/div/a').text): driver.find_element_by_xpath( '//*[@id="ext-gen10"]/div/li[11]/ul/li[2]/div/a').click() elif ('임원 및 직원' in driver.find_element_by_xpath( '//*[@id="ext-gen10"]/div/li[12]/div/a').text and '관한 사항' in driver.find_element_by_xpath( '//*[@id="ext-gen10"]/div/li[12]/div/a').text): driver.find_element_by_xpath( '//*[@id="ext-gen10"]/div/li[12]/ul/li[2]/div/a').click() elif ('임원 및 직원' in driver.find_element_by_xpath( '//*[@id="ext-gen10"]/div/li[13]/div/a').text and '관한 사항' in driver.find_element_by_xpath( '//*[@id="ext-gen10"]/div/li[13]/div/a').text): driver.find_element_by_xpath( '//*[@id="ext-gen10"]/div/li[13]/ul/li[2]/div/a').click() left_navi = driver.find_elements_by_xpath('//*[@id="ext-gen10"]/div/li') driver.switch_to.frame( driver.find_element_by_tag_name("iframe")) # iframe 가져오기 html = driver.page_source soup = BeautifulSoup(html, 'html.parser') ps = soup.find_all('p') tables = soup.find_all('table') hando_gb = re.compile("구[ \s]*분") hando_cnt = re.compile("인[ \s]*원[ \s]*수|인[ \s]*원") hando_amt = re.compile("주주총회[ \s]*승인[ \s]*금액|주주[ \s]*총회[ \s]*승인[ \s]*금액") jigup_gb = re.compile("구[ \s]*분") jigup_cnt = re.compile("인[ \s]*원[ \s]*수|인[ \s]*원") jigup_tot = re.compile("보수[ \s]*총액|보[ \s]*수[ \s]*총[ \s]*액") jigup_per = re.compile( "1인당[ \s]*평균보수액|1[ \s]*인당[ \s]*평균보수액|1[ \s]*인당[ \s]*평균[ \s]*보수액") pttn_del = re.compile("합[ \S]*계|[ \S]*계") pttn_num = re.compile("[0-9]") cnt = 0 hando_num = 0 for table in tables: if hando_gb.search(table.text) and hando_cnt.search( table.text) and hando_amt.search(table.text): hando_num = cnt cnt = cnt + 1 hando_arr = [] hando_unit = 1 if hando_num > 0: tmp_tb = get_table(tables[hando_num]) d_tmp_tb = [] for d in range(0, len(tmp_tb)): if (not pttn_del.search(tmp_tb[d][0]) and not pttn_del.search(tmp_tb[d][1]) and not pttn_del.search(tmp_tb[d][2]) and not pttn_del.search(tmp_tb[d][3])): d_tmp_tb.append(tmp_tb[d]) hando_arr.extend(d_tmp_tb) hando_unit = get_unit(tables[hando_num - 1]) for i in range(0, len(hando_arr)): tmp_amt = "".join(pttn_num.findall(hando_arr[i][2])) if not tmp_amt: tmp_amt = "0" if "백만" in hando_unit: hando_arr[i][2] = int(tmp_amt) * 1000 elif "천" in hando_unit: hando_arr[i][2] = int(tmp_amt) elif "억" in hando_unit: hando_arr[i][2] = int(tmp_amt) * 100000 elif hando_unit == "원": hando_arr[i][2] = round(int(tmp_amt) / 1000) hando_arr[i].insert(0, jm_code) hando_arr[i].insert(0, gijun_yy) return hando_arr
def table(self): table = get_table(self.source)[0] return table
import csv import sys import os from selenium import webdriver from utils import pause, load_config, get_table, login, open_url CONFIG = load_config("configuration.yml") email_address = CONFIG["CREDENTIALS"]["USERNAME"] password = CONFIG["CREDENTIALS"]["PASSWORD"] url = CONFIG["URL"] driver = webdriver.Chrome() open_url(driver, url) login(driver, email_address, password) open_url(driver, url) get_table(driver)
def generate_patient_db( demographics_path, meddra_extractions_dir, drug_exposure_dir, concept_dir, output_dir, debug, use_dask, ): # Create patient DB to store data patients = PatientDB(name="all") # Get demographics dataframe demographics = get_df(demographics_path, use_dask=use_dask, debug=debug) ### NLP TABLES ### # Get meddra extractions dataframe meddra_extractions_pattern = "*_*" meddra_extractions_pattern_re = ".*_.*" meddra_extractions = get_table( meddra_extractions_dir, prefix="all_POS_batch", pattern=meddra_extractions_pattern, pattern_re=meddra_extractions_pattern_re, extension=".parquet", use_dask=use_dask, debug=debug, ) meddra_extractions_columns = sorted(meddra_extractions.columns.tolist()) print(f"meddra extractions column names:\n\t{meddra_extractions_columns}", flush=True) ### OMOP TABLES ### # OMOP DRUG_EXPOSURE table drug_exposure_pattern = "0000000000*" drug_exposure_pattern_re = "0000000000.*" drug_exposure = omop_drug_exposure( drug_exposure_dir, prefix="drug_exposure", pattern=drug_exposure_pattern, pattern_re=drug_exposure_pattern_re, extension=".csv", use_dask=use_dask, debug=debug, ) drug_exposure_columns = sorted(drug_exposure.columns.tolist()) print(f"drug exposure column names:\n\t{drug_exposure_columns}", flush=True) # OMOP CONCEPT table concept = omop_concept(concept_dir, use_dask=use_dask, debug=debug) concept_columns = sorted(concept.columns.tolist()) print(f"concept column names:\n\t{concept_columns}", flush=True) # import pdb;pdb.set_trace() patient_ids = get_all_patient_ids(demographics, meddra_extractions, drug_exposure, use_dask=use_dask) get_events(patients, concept, meddra_extractions, drug_exposure, use_dask=False) if not patients.data["events"]: print("Empty events dict! Exiting...", flush=True) sys.exit(0) print(f"Found {patients.num_events()} events", flush=True) print("Filter out patient IDs that don't have any events", flush=True) patient_ids = patients.select_non_empty_patients(patient_ids) print("Generate patients from IDs", flush=True) patients.generate_patients_from_ids(patient_ids) # import pdb # pdb.set_trace() # print('Get all patient visit dates...') # patient_visit_dates = \ # get_all_patient_visit_dates(patients, meddra_extractions) # unique_dates = get_dates(meddra_extractions, args.use_dask) # unique_date_strs = [date_obj_to_str(d) for d in unique_dates] # patient_visit_dates = \ # create_patient_visit_dates(patient_ids, unique_date_strs) # print('Creating patient visits...') # create_patient_visits(patients, patient_visit_dates) # print('Attach visits to patients') # patients.attach_visits_to_patients(patient_ids) # import pdb # pdb.set_trace() # FIXME print("Attach events to visits...", flush=True) patients.attach_events_to_visits() # import pdb # pdb.set_trace() print("Attach demographic information to patients", flush=True) patients.add_demographic_info(demographics, use_dask) # import pdb # pdb.set_trace() print("Dump patients to a file", flush=True) patients.dump(output_dir, "patients", "jsonl", unique=True)