def update_schema(schema, wrds_id): table_list = get_wrds_tables(schema, wrds_id) regex_list = [ "wrds_company_networks", "company_profile_sr_mgrs", "wrds_dir_profile_emp", "dir_profile_emp", "wrds_dir_profile_all", "wrds_org_composition" ] regex = "(" + "|".join(regex_list) + ")" for table in table_list: # These tables are way too big and are redundant if re.search("individual_networks", table): continue # Table doesn't work and seems not to be maintained. if table == "wrds_company_dir_names": continue fix_missing = re.search(regex, table) is not None wrds_update(table_name=table, schema="boardex", wrds_id=wrds_id, fix_missing=fix_missing)
def update_equities(year): updated = wrds_update("pr_equities_" + str(year), "rpna", rename="group=group_") if updated: engine.execute("CREATE INDEX ON rpna.pr_equities_" + str(year) + " (rp_entity_id, rpna_date_utc)") return updated
def update_schema(schema, wrds_id): table_list = get_wrds_tables(schema, wrds_id) regex_list = [ "wrds_company_networks", "company_profile_sr_mgrs", "wrds_dir_profile_emp", "dir_profile_emp", "wrds_dir_profile_all", "wrds_org_composition" ] regex = "(" + "|".join(regex_list) + ")" for table in table_list: if re.search("individual_networks", table): continue fix_missing = re.search(regex, table) is not None wrds_update(table_name=table, schema=schema, wrds_id=wrds_id, fix_missing=fix_missing)
#!/usr/bin/env python3 from wrds2pg import wrds_update, run_file_sql from wrds2pg import make_engine, wrds_id engine = make_engine() wrds_update("act_epsus", "ibes") wrds_update("actpsumu_epsus", "ibes") wrds_update("actu_epsus", "ibes") updated = wrds_update("detu_epsus", "ibes") if updated: engine.execute("SET maintenance_work_mem='1999MB'") engine.execute("CREATE INDEX ON ibes.detu_epsus (ticker, revdats)") updated = wrds_update("det_xepsus", "ibes") if updated: engine.execute("CREATE INDEX ON ibes.det_xepsus (ticker, revdats)") wrds_update("curr", "ibes") wrds_update("det_epsus", "ibes") wrds_update("detu_epsint", "ibes") wrds_update("id", "ibes") wrds_update("idsum", "ibes") wrds_update("surpsum", "ibes") updated = wrds_update("surpsumu", "ibes") if updated: engine.execute("SET maintenance_work_mem='1999MB'") engine.execute("CREATE INDEX ON ibes.surpsumu (ticker, statpers)") wrds_update("statsum_epsus", "ibes")
#!/usr/bin/env python3 from wrds2pg import make_engine, wrds_update updated = wrds_update("wrds_keydev", "ciq", fix_missing=True) if updated: engine = make_engine() engine.execute("CREATE INDEX ON ciq.wrds_keydev (keydeveventtypeid)") engine.execute("CREATE INDEX ON ciq.wrds_keydev (companyid)") engine.dispose() updated = wrds_update("ciqkeydev", "ciq", fix_cr=True) if updated: engine = make_engine() engine.execute("CREATE INDEX ON ciq.ciqkeydev (keydevid)") engine.dispose() wrds_update("ciqkeydeveventtype", "ciq", fix_cr=True) wrds_update("ciqkeydevstatus", "ciq") wrds_update("ciqkeydevobjectroletype", "ciq", fix_cr=True) updated = wrds_update("ciqkeydevtoobjecttoeventtype", "ciq") if updated: engine = make_engine() engine.execute("CREATE INDEX ON ciq.ciqkeydevtoobjecttoeventtype (keydevid)") engine.dispose()
#!/usr/bin/env python3 from wrds2pg import wrds_update, make_engine # engine.execute("CREATE ROLE risk") # engine.execute("GRANT USAGE ON SCHEMA risk TO risk") update = wrds_update("va_proposals", "risk") update = wrds_update("vavoteresults", "risk") if update: sql = """ ALTER TABLE risk.vavoteresults ALTER COLUMN companyid TYPE integer; ALTER TABLE risk.vavoteresults ALTER COLUMN meetingid TYPE integer; ALTER TABLE risk.vavoteresults ALTER COLUMN itemonagendaid TYPE integer; ALTER TABLE risk.vavoteresults ALTER COLUMN seqnumber TYPE integer; UPDATE risk.vavoteresults SET voterequirement = 0.6667 WHERE voterequirement=66.67; UPDATE risk.vavoteresults SET base = 'F+A' WHERE base='F A'; UPDATE risk.vavoteresults SET base = 'F+A+AB' WHERE base IN ('F A AB', 'F+A+B'); UPDATE risk.vavoteresults SET (votedfor, votedagainst, votedabstain)=(2224433656, 93561790, 34814753) WHERE itemonagendaid=6019529; UPDATE risk.vavoteresults SET (votedfor, votedagainst, votedabstain, ticker)= (10540862,1329889,790539,'KIDE') WHERE itemonagendaid=6039421;
#!/usr/bin/env python3 from wrds2pg import wrds_update wrds_update("wciklink_gvkey", "wrdssec") wrds_update("wciklink_names", "wrdssec") wrds_update("wciklink_cusip", "wrdssec", drop="tmatch")
#!/usr/bin/env python3 from wrds2pg import wrds_update, wrds_id from sqlalchemy import create_engine def get_wrds_tables(schema, wrds_id): from sqlalchemy import MetaData wrds_url = "postgresql://%[email protected]:9737/wrds" wrds_engine = create_engine(wrds_url % wrds_id, connect_args = {'sslmode':'require'}) metadata = MetaData(wrds_engine, schema=schema) metadata.reflect(schema=schema) table_list = [key.name for key in metadata.tables.values()] wrds_engine.dispose() return table_list ff_tables = get_wrds_tables("ff_all", wrds_id) for table in ff_tables: wrds_update(table, "ff")
#!/usr/bin/env python3 from wrds2pg import wrds_update updated = wrds_update("issuer", "cusipm") updated = wrds_update("issue", "cusipm")
#!/usr/bin/env python3 from wrds2pg import wrds_update # updated = wrds_update("firm_ratio", "wrdsapps") updated = wrds_update("bondret", "wrdsapps", col_types= {'t_volume':'text', 't_dvolume':'text', 't_spread':'text', 'yield':'text', 'ret_eom':'text', 'ret_l5m':'text', 'ret_ldm':'text'}) wrds_update("ibcrsphist", "wrdsapps")
#!/usr/bin/env python3 from wrds2pg import wrds_update wrds_update("voteanalysis_npx", "risk") wrds_update("proposals", "risk")
#!/usr/bin/env python3 from wrds2pg import wrds_update wrds_update("globalvoteresults", "risk") wrds_update("vavoteresults", "risk") wrds_update("issrec", "risk") wrds_update("rmdirectors", "risk") wrds_update("gset", "risk") wrds_update("votes", "risk") wrds_update("voteanalysis_npx", "risk") wrds_update("directors", "risk", drop="votecref")
#!/usr/bin/env python3 from wrds2pg import wrds_update # updated = wrds_update("firm_ratio", "wrdsapps") updated = wrds_update("bondret", "wrdsapps")
#!/usr/bin/env python3 from wrds2pg import wrds_update updated = wrds_update("amend", "tfn") updated = wrds_update("avgreturns", "tfn") updated = wrds_update("company", "tfn", fix_cr=True) updated = wrds_update("form144", "tfn") updated = wrds_update("header", "tfn") updated = wrds_update("idfhist", "tfn") updated = wrds_update("idfnames", "tfn") updated = wrds_update("rule10b5", "tfn") updated = wrds_update("table1", "tfn") updated = wrds_update("table2", "tfn") updated = wrds_update("s12type1", "tfn") updated = wrds_update("s12type2", "tfn") updated = wrds_update("s34", "tfn") updated = wrds_update("s34type1", "tfn") updated = wrds_update("s34type2", "tfn")
""" if col_lst: modify_lst = [mod_col(col, schema, table, engine) for col in col_lst] else: print("No columns supplied!") return None if modify_lst: print("Columns changed to boolean:", modify_lst) return modify_lst # Transfer agents updated = wrds_update("feed41_transfer_agents", "audit") # Non-reliance restatements updated = wrds_update("auditnonreli", "audit", drop="prior: match: closest: disc_text:") if updated: col_to_bool(engine, "audit", "auditnonreli", col_lst=[ "res_accounting", "res_fraud", "res_cler_err", "res_adverse", "res_improves", "res_other", "res_sec_invest" ])
#!/usr/bin/env python3 from wrds2pg import wrds_update import subprocess import os updated = wrds_update("history", "kld", fix_missing=True) if updated: print("Creating 'long' data table.") print(os.getenv("PGHOST")) subprocess.call (["Rscript", "--vanilla", "kld/create_history_long.R"]) updated = wrds_update("kldnames", "kld", fix_missing=True)
else: print("No columns supplied!") return None if modify_lst: print("Columns changed to boolean:", modify_lst) return modify_lst # Auditors updated = wrds_update("auditorsinfo", "audit", col_types={ "auditor_key": "integer", "pcaob_reg_num": "integer", "pcaob_app_num": "integer", "aud_loc_key": "integer", "eventdate_aud_fkey": "integer", "auditor_pcaob_reg": "boolean" }) # Auditor Changes updated = wrds_update("auditchange", "audit", col_types={ "auditor_change_key": "integer", "dismiss_key": "integer", "engaged_auditor_key": "integer", "dismissed_gc": "boolean", "dismissed_disagree": "boolean", "auditor_resigned": "boolean",
#!/usr/bin/env python3 from sqlalchemy import create_engine from wrds2pg import wrds_update, get_process, wrds_process_to_pg, \ run_file_sql, make_engine, wrds_id, set_table_comment from time import gmtime, strftime dsf = wrds_update("dsf", "crsp", fix_missing=True) engine = make_engine() # Update Treasury yield table crsp.tfz_ft # From wrds: # The error is correct, the table "tfz_ft," does not exist. Behind the scenes this web # query form is joining two tables on the fly. The tables this query is joining are # "crsp.tfz_idx" and either "crsp.tfz_dly_ft" or "crsp.tfz_mth_ft" (depending on if # you want daily or monthly data) by the variable "kytreasnox." # Here are some links to the information about these tables: # https://wrds-web.wharton.upenn.edu/wrds/tools/variable.cfm?library_id=137&file_id=77140 # https://wrds-web.wharton.upenn.edu/wrds/tools/variable.cfm?library_id=137&file_id=77137 # https://wrds-web.wharton.upenn.edu/wrds/tools/variable.cfm?library_id=137&file_id=77147 tfz_idx = wrds_update("tfz_idx", "crsp", fix_missing=True) tfz_dly_ft = wrds_update("tfz_dly_ft", "crsp", fix_missing=True, drop="td:") if True: # tfz_idx or tfz_dly_ft: sql = """ DROP TABLE IF EXISTS crsp.tfz_ft; CREATE TABLE crsp.tfz_ft AS (SELECT kytreasnox, tidxfam, ttermtype, ttermlbl, caldt, rdtreasno, rdcrspid
#!/usr/bin/env python3 from wrds2pg import wrds_update, make_engine # engine.execute("CREATE ROLE risk") # engine.execute("GRANT USAGE ON SCHEMA risk TO risk") update = wrds_update("va_proposals", "risk") update = wrds_update("vavoteresults", "risk") if update: sql = """ ALTER TABLE risk.vavoteresults ALTER COLUMN companyid TYPE integer; ALTER TABLE risk.vavoteresults ALTER COLUMN meetingid TYPE integer; ALTER TABLE risk.vavoteresults ALTER COLUMN itemonagendaid TYPE integer; ALTER TABLE risk.vavoteresults ALTER COLUMN seqnumber TYPE integer; UPDATE risk.vavoteresults SET voterequirement = 0.6667 WHERE voterequirement=66.67; UPDATE risk.vavoteresults SET base = 'F+A' WHERE base='F A'; UPDATE risk.vavoteresults SET base = 'F+A+AB' WHERE base IN ('F A AB', 'F+A+B'); UPDATE risk.vavoteresults SET (votedfor, votedagainst, votedabstain)=(2224433656, 93561790, 34814753) WHERE itemonagendaid=6019529; UPDATE risk.vavoteresults SET (votedfor, votedagainst, votedabstain, ticker)= (10540862,1329889,790539,'KIDE') WHERE itemonagendaid=6039421;
#!/usr/bin/env python3 from wrds2pg import wrds_update, make_engine engine = make_engine() updated = wrds_update("wrds_gvkey", "ciq", fix_missing=True) if updated: engine.execute("CREATE INDEX ON ciq.wrds_gvkey (companyid)") updated = wrds_update("wrds_cusip", "ciq", fix_missing=True) if updated: engine.execute("CREATE INDEX ON ciq.wrds_cusip (companyid)") updated = wrds_update("wrds_cik", "ciq", fix_missing=True) if updated: engine.execute("CREATE INDEX ON ciq.wrds_cik (companyid)") updated = wrds_update("ciqfininstance", "ciq") if updated: engine.execute("CREATE INDEX ON ciq.ciqfininstance (financialperiodid)") engine.execute("ANALYZE ciq.ciqfininstance") updated = wrds_update("ciqfinperiod", "ciq") if updated: engine.execute("CREATE INDEX ON ciq.ciqfinperiod (financialperiodid)") engine.execute("CREATE INDEX ON ciq.ciqfinperiod (companyid)") engine.execute("ANALYZE ciq.ciqfinperiod") updated = wrds_update("ciqgvkeyiid", "ciq") if updated: engine.execute("CREATE INDEX ON ciq.ciqgvkeyiid (relatedcompanyid)")
#!/usr/bin/env python3 from wrds2pg import wrds_update updated = wrds_update("mflink1", "mfl") updated = wrds_update("mflink2", "mfl")
#!/usr/bin/env python3 from wrds2pg import wrds_update, wrds_id from sqlalchemy import create_engine def get_wrds_tables(schema, wrds_id): from sqlalchemy import MetaData wrds_engine = create_engine( "postgresql://%[email protected]:9737/wrds" % wrds_id, connect_args={'sslmode': 'require'}) metadata = MetaData(wrds_engine, schema=schema) metadata.reflect(schema=schema) table_list = [key.name for key in metadata.tables.values()] wrds_engine.dispose() return table_list dealscan_tables = get_wrds_tables("tr_dealscan", wrds_id) for table in dealscan_tables: wrds_update(table, "dealscan")
#!/usr/bin/env python3 from sqlalchemy import MetaData from wrds2pg import wrds_update, run_file_sql, make_engine engine = make_engine() avail_years = range(2000, 2020) updated = wrds_update("rp_entity_mapping", "rpna") def update_equities(year): updated = wrds_update("pr_equities_" + str(year), "rpna", rename="group=group_") if updated: engine.execute("CREATE INDEX ON rpna.pr_equities_" + str(year) + " (rp_entity_id, rpna_date_utc)") return updated updated = [update_equities(year) for year in avail_years] def select(cols): sql = "SELECT " + ", ".join(cols) return sql def get_sql(year, cols): return (select(cols) + "\nFROM rpna.pr_equities_%s\n" % year)
#!/usr/bin/env python3 from time import gmtime, strftime from wrds2pg import wrds_update, make_engine, run_file_sql engine = make_engine() updated = wrds_update("g_exrt_dly", "comp") updated = wrds_update("g_funda", "comp", fix_missing=True) if updated: engine.execute("CREATE INDEX ON comp.g_funda (gvkey)") updated = wrds_update("g_secd", "comp") if updated: engine.execute("CREATE INDEX ON comp.g_secd (gvkey)") updated = wrds_update("g_security", "comp") updated = wrds_update("g_company", "comp") updated = wrds_update("sec_history", "comp") updated = wrds_update("idxcst_his", "comp") updated = wrds_update("anncomp", "comp") if updated: engine.execute("CREATE INDEX ON comp.anncomp (gvkey)") updated = wrds_update("adsprate", "comp") if updated: engine.execute("CREATE INDEX ON comp.adsprate (gvkey, datadate)") updated = wrds_update("co_hgic", "comp")
engine = make_engine() # Update Treasury yield table crsp.tfz_ft # From WRDS: # The error is correct, the table "tfz_ft," does not exist. Behind the scenes this web # query form is joining two tables on the fly. The tables this query is joining are # "crsp.tfz_idx" and either "crsp.tfz_dly_ft" or "crsp.tfz_mth_ft" (depending on if # you want daily or monthly data) by the variable "kytreasnox." # Here are some links to the information about these tables: # https://wrds-web.wharton.upenn.edu/wrds/tools/variable.cfm?library_id=137&file_id=77140 # https://wrds-web.wharton.upenn.edu/wrds/tools/variable.cfm?library_id=137&file_id=77137 # https://wrds-web.wharton.upenn.edu/wrds/tools/variable.cfm?library_id=137&file_id=77147 tfz_idx = wrds_update("tfz_idx", "crsp", col_types = {'kytreasnox':'integer', 'ttermtype':'integer', 'rdtreasno':'integer'}) tfz_dly_ft = wrds_update("tfz_dly_ft", "crsp", col_types = {'tdyearstm':'float8', 'tdduratn':'float8', 'tdytm':'float8', 'tdbid':'float8', 'tdask':'float8', 'tdnomprc':'float8', 'tdaccint':'float8', 'tdretadj':'float8'}) if tfz_idx or tfz_dly_ft: sql = """ DROP TABLE IF EXISTS crsp.tfz_ft; CREATE TABLE crsp.tfz_ft AS (SELECT kytreasnox, tidxfam, ttermtype, ttermlbl, caldt, rdtreasno, rdcrspid FROM crsp.tfz_idx INNER JOIN crsp.tfz_dly_ft