def load_in_batches(dataframe, tableName, totalBatches): try: con = create_context(host='td........org', user=credentials.short_name_usr_ntid, password=credentials.user_ntid_pswrd, database='user_dw', logmech="LDAP") total_rows = len(dataframe.index) print('total_rows', total_rows) def split_equal(value, parts): value = float(value) return [int(i * value / parts) for i in range(1, parts + 1)] index_list = split_equal(total_rows, totalBatches) i = 0 for j in index_list: #print(j) print('Inserting Dataframe', 'df[' + str(i) + ':' + str(j) + ']') rows = dataframe[i:j] copy_to_sql(rows, table_name=tableName, if_exists='append') i = j + 1 finally: remove_context() # if connection not closed the script will hang
def score(data_conf, model_conf, **kwargs): model = joblib.load("artifacts/input/model.joblib") create_context(host=os.environ["AOA_CONN_HOST"], username=os.environ["AOA_CONN_USERNAME"], password=os.environ["AOA_CONN_PASSWORD"], database=data_conf["schema"] if "schema" in data_conf and data_conf["schema"] != "" else None) predict_df = DataFrame(data_conf["table"]) # convert to pandas to use locally predict_df = predict_df.to_pandas() print("Scoring") y_pred = model.predict(predict_df[model.feature_names]) print("Finished Scoring") # create result dataframe and store in Teradata y_pred = pd.DataFrame(y_pred, columns=["pred"]) y_pred["PatientId"] = predict_df["PatientId"].values copy_to_sql(df=y_pred, table_name=data_conf["predictions"], index=False, if_exists="replace")
#print("Apple Mobility Finished! " + timestampStr) ############################################################# # 2) Covid Cases ############################################################# from datetime import datetime import datetime url = 'https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv' df = pd.read_csv(url) df['current_dttm'] = datetime.datetime.today() df = df.rename(columns={'date': 'date_key'}) copy_to_sql(df = df, table_name = "STG_covid19_stats", schema_name=params.SchemaName , primary_index = ['date_key'], if_exists = 'replace') from datetime import datetime datetime.utcnow() dateTimeObj = pytz.utc.localize(datetime.utcnow()).astimezone(pytz.timezone('US/Pacific')) timestampStr = dateTimeObj.strftime("%d-%b-%Y (%H:%M:%S.%f)") print("Covid Cases Finished! " + timestampStr) ############################################################# # 3) Covid Projections ############################################################# from urllib.request import urlopen import requests from pathlib import Path url = 'https://ihmecovid19storage.blob.core.windows.net/latest/ihme-covid19.zip'
username=params.MyUser, password=params.Password, temp_database_name=params.SchemaName, logmech=params.LogMech) ############################################################# # Manual Loads ############################################################# pda = pd.read_sql('DATABASE ' + params.SchemaName, con) # DIM_GEO_LOCATION_T url = 'https://raw.githubusercontent.com/golestm/RTN/master/data/DIM_GEO_LOCATION_T.txt' df = pd.read_csv(url, sep="|", doublequote=True, encoding='latin-1') copy_to_sql(df=df, table_name="TEMP_DIM_GEO_LOCATION_T", schema_name=params.SchemaName, if_exists='replace') pd.read_sql('DATABASE ' + params.SchemaName, con) pd.read_sql('DELETE FROM DIM_GEO_LOCATION_T;', con) pd.read_sql( 'INSERT INTO DIM_GEO_LOCATION_T SELECT * FROM TEMP_DIM_GEO_LOCATION_T;', con) print("DIM_GEO_LOCATION_T Finished!") # DIM_ZIP_COUNTY_MSA_MAP_RAW url = 'https://raw.githubusercontent.com/golestm/RTN/master/data/DIM_ZIP_COUNTY_MSA_MAP_RAW.txt' df = pd.read_csv(url, sep="|", doublequote=True, encoding='latin-1', dtype={