def storage(config: Dict[str, Any]) -> SchedulingStorage: s = initialize_storage(config) yield s # clear out the table from all data # dropping seems to cause issues s.driver.engine.execute( sa_text("DELETE FROM schedule;").execution_options(autocommit=True))
def exec_cmd(self, command, params=None, autocommit=True): """ Executes raw SQL command w/ optional params. """ bindparams = [] if params: for key, value in params.items(): bindparams.append(bindparam(key, value=value)) cmd = sa_text(command).execution_options(autocommit=autocommit) return self.engine.execute(cmd.bindparams(*bindparams))
def truncate(self, tablename): """Truncates a given table. Faster than a delete and reseeds identity values. .. note:: **Security Warning**: This command leverages interpolated strings and as such is vulnerable to SQL-injection. Do not use in conjunction with arbitrary user input. Instead, use .delete() :param tablename: Name of the table to truncate :type tablename: string """ sql_str = f"TRUNCATE TABLE {self.schema}.{tablename}" command = sa_text(sql_str).execution_options(autocommit=True) self.engine.execute(command)
def exec_sproc(self, stored_procedure, autocommit=False): """Executes a stored procedure .. note:: **Security Warning**: This command leverages interpolated strings and as such is vulnerable to SQL-injection. Do not use in conjunction with arbitrary user input. :param stored_procedure: The name of the stored procedure to be executed. :type stored_procedure: string :param autocommit: Determines how to handle transactions (default=False) :type autocommit: boolean :return: Stored procedure results :rtype: `SQLAlchemy.ResultProxy <https://docs.sqlalchemy.org/en/13/core/connections.html#sqlalchemy.engine.ResultProxy>`_ """ sql_str = f"EXEC {self.schema}.{stored_procedure}" command = sa_text(sql_str).execution_options(autocommit=autocommit) return self.engine.execute(command)
def truncate(self,schema,table_name, exec_sql): """ Function for truncating table on SQL server. INPUT: schema: The database schema \n table_name: The table name \n exec_sql : Toggle for printing/executing statement (1 = print, 0 = execute) \n OUTPUT: None """ sql = "TRUNCATE TABLE {}.{}".format(schema,table_name) if exec_sql == 1: print(sql) elif exec_sql == 0: try: conn = self.engine.connect() conn.execute(sa_text(sql).execution_options(autocommit=True)) conn.close() print('{}.{} was truncated'.format(schema,table_name)) except Exception as e: print('Failed to truncate table') raise
import pandas as pd import os import glob from sqlalchemy import create_engine from sqlalchemy.sql import text as sa_text uid='sa' pwd='Passw0rd' server='localhost' db = 'fixtures_v2' connstr="mssql://%s:%s@%s/%s?driver=FreeTDS&port=1433& odbc_options='TDS_Version=8.0'" % (uid,pwd,server,db) engine = create_engine(connstr).connect() files = glob.glob('bbc/bbc*.csv') for file in files: print ('processing...', file) engine.execute(sa_text('''EXEC dbo.PrepStaging''').execution_options(autocommit=True)) df = pd.read_csv(file) df.to_sql(name='bbc_fixtures', schema='staging', con=engine, if_exists='append', index=False) engine.execute(sa_text('''EXEC dbo.LoadBBCResults''').execution_options(autocommit=True)) # move file to archive folder os.rename(file, os.path.join('archive',os.path.basename(file))) engine.close()
def migrate_postgres(): try: sql = PostgreSQL() # Tables sql.exec_cmd_from_file( "sql/postgresql/tables/schoolmint_ApplicationData_changehistory.sql" ) sql.exec_cmd_from_file( "sql/postgresql/tables/schoolmint_ApplicationData_raw_backup.sql") sql.exec_cmd_from_file( "sql/postgresql/tables/schoolmint_ApplicationData_raw.sql") sql.exec_cmd_from_file( "sql/postgresql/tables/schoolmint_ApplicationDataIndex_raw_backup.sql" ) sql.exec_cmd_from_file( "sql/postgresql/tables/schoolmint_ApplicationDataIndex_raw.sql") sql.exec_cmd_from_file( "sql/postgresql/tables/schoolmint_ApplicationStatuses.sql") sql.exec_cmd_from_file( "sql/postgresql/tables/schoolmint_FactDailyStatus.sql") sql.exec_cmd_from_file( "sql/postgresql/tables/schoolmint_lk_Enrollment.sql") sql.exec_cmd_from_file( "sql/postgresql/tables/schoolmint_ProgressMonitoring.sql") sql.exec_cmd_from_file( "sql/postgresql/tables/schoolmint_SchoolCodes.sql") # Load lookup tables enrollments = pd.read_csv("sql/data/lk_enrollment.csv") sql.insert_into("schoolmint_lk_Enrollment", enrollments) application_statuses = pd.read_csv("sql/data/application_statuses.csv") sql.insert_into( "schoolmint_ApplicationStatuses", application_statuses, dtype={ "Application": Boolean, "Registration": Boolean }, ) # Views sql.exec_cmd_from_file( "sql/postgresql/views/vw_schoolmint_AppStatusList.sql") sql.exec_cmd_from_file( "sql/postgresql/views/vw_schoolmint_FactDailyStatus.sql") sql.exec_cmd_from_file( "sql/postgresql/views/vw_schoolmint_FactDailyStatus_InterimTargets.sql" ) sql.exec_cmd_from_file( "sql/postgresql/views/vw_schoolmint_Index_Demographics.sql") sql_str = sql._read_sql_file( "sql/postgresql/views/vw_schoolmint_ProgressMonitoring.sql") command = sa_text(sql_str).execution_options(autocommit=True) sql.engine.execute(command) sql.exec_cmd_from_file( "sql/postgresql/views/vw_schoolmint_FactProgressMonitoring.sql") # Stored Procedures sql.exec_cmd_from_file( "sql/postgresql/sprocs/sproc_schoolmint_Create_ChangeTracking_Entries.sql" ) sql.exec_cmd_from_file( "sql/postgresql/sprocs/sproc_schoolmint_Create_FactDailyStatus.sql" ) sql.exec_cmd_from_file( "sql/postgresql/sprocs/sproc_schoolmint_Index_PostProcess.sql") sql.exec_cmd_from_file( "sql/postgresql/sprocs/sproc_schoolmint_Index_PrepareTables.sql") sql.exec_cmd_from_file( "sql/postgresql/sprocs/sproc_schoolmint_Raw_PostProcess.sql") sql.exec_cmd_from_file( "sql/postgresql/sprocs/sproc_schoolmint_Raw_PrepareTables.sql") except ProgrammingError as e: if "Cannot open database" in str(e): print("ERROR: First create your database and schema manually") except Exception as e: print(e) print(traceback.format_exc())
def main(mytimer: func.TimerRequest) -> None: utc_timestamp = datetime.datetime.utcnow().replace( tzinfo=datetime.timezone.utc).isoformat() if mytimer.past_due: logging.info('The timer is past due!') today = datetime.datetime.now().date() date = today try: df = pd.read_excel( "https://www.ecdc.europa.eu/sites/default/files/documents/COVID-19-geographic-disbtribution-worldwide-%s.xlsx" % date.strftime("%Y-%m-%d")) logging.info(f"date: {today}") except Exception as e: logging.info(e) logging.info(f"No data for date {today}, yet.") return change_column_names_date = datetime.date(2020, 3, 27) if date >= change_column_names_date: df = df.rename( columns={ 'dateRep': 'DateRep', 'countriesAndTerritories': shared.helpers.country_col, 'cases': 'infections' }) df['DateRep'] = pd.to_datetime(df['DateRep']).dt.date df = df.rename( columns={ 'Countries and territories': shared.helpers.country_col, 'DateRep': 'date', 'Deaths': 'deaths', 'Cases': 'infections' }) df = df[['date', 'infections', 'deaths', shared.helpers.country_col]] df = df.sort_values(by=['date', shared.helpers.country_col]) df[df[shared.helpers.country_col] == 'Germany'].groupby( by=[shared.helpers.country_col]).cumsum() df_cumsum = df.groupby(by=[shared.helpers.country_col]).cumsum() df_result = df[['date', shared.helpers.country_col]].join(df_cumsum) username = os.environ.get('keyvault_db_username') password = os.environ.get('keyvault_db_password') params = urllib.parse.quote_plus( 'Driver={ODBC Driver 17 for SQL Server};Server=tcp:covid19dbserver.database.windows.net,1433;Database=covid19db;Uid=' + username + '@covid19dbserver;Pwd=' + password + ';Encrypt=yes;TrustServerCertificate=no;Connection Timeout=30;') conn_str = 'mssql+pyodbc:///?odbc_connect={}'.format(params) engine = create_engine(conn_str, echo=False) assert df_result.duplicated().sum() == 0 table_name = "ECDC" table_name_updates = f"{table_name}_updates" try: df_temp = pd.read_sql( "select Top(1) * from dbo.%s" % table_name_updates, engine) engine.execute( sa_text('''TRUNCATE TABLE %s''' % table_name_updates).execution_options(autocommit=True)) except Exception as e: print(e) pass country_col = 'Country/Region' dtype_dict = {} for col in [country_col]: df_result[col] = df_result[col].str.slice(start=0, stop=99) dtype_dict[col] = sqlalchemy.types.NVARCHAR(length=100) df_result = df_result[['Country/Region', 'infections', 'deaths', 'date']] df_result.infections = df_result.infections.fillna(0) df_result.deaths = df_result.deaths.fillna(0) df_result.to_sql(table_name_updates, engine, if_exists='append', schema='dbo', index=False, chunksize=100, method='multi', dtype=dtype_dict) merge_statement = f''' MERGE INTO dbo.{table_name} AS Target USING ( SELECT [Country/Region], infections, deaths, date FROM dbo.{table_name_updates} ) AS Source ON Target.[Country/Region] = Source.[Country/Region] AND Target.date = Source.date WHEN MATCHED THEN UPDATE SET Target.infections = Source.infections, Target.deaths = Source.deaths WHEN NOT MATCHED BY TARGET THEN INSERT ([Country/Region], infections, deaths, date) VALUES (Source.[Country/Region], Source.infections, Source.deaths, Source.date); ''' engine.execute(sa_text(merge_statement).execution_options(autocommit=True)) logging.info('Python timer trigger function ran at %s', utc_timestamp)
def download_insert_hopkins(date): df = pd.read_csv(base_url+date.strftime("%m-%d-%Y")+".csv") df = df.rename(columns={'Country_Region': 'Country/Region', 'Province_State': 'Province/State', 'Last_Update': 'Last Update'}) df['date'] = pd.to_datetime(df['Last Update']).dt.date possibly_missing = ["Admin2", "FIPS", 'Lat', 'Long_'] for col in possibly_missing: if col not in df.columns: df[col] = pd.NA df_result = df[['Country/Region', 'Province/State', "Admin2", "FIPS", 'Lat', 'Long_', 'Confirmed', 'Deaths', 'Recovered', 'date'] ].rename(columns={'Admin2': 'District', 'Long_': 'Long', 'Confirmed': 'infections', 'Deaths': 'deaths', 'Recovered': 'recovered'}) df_result = shared.helpers.cleanup_df(df_result, key_cols=key_cols) username = os.environ.get('keyvault_db_username') password = os.environ.get('keyvault_db_password') params = urllib.parse.quote_plus( 'Driver={ODBC Driver 17 for SQL Server};Server=tcp:covid19dbserver.database.windows.net,1433;Database=covid19db;Uid='+username + '@covid19dbserver;Pwd='+password + ';Encrypt=yes;TrustServerCertificate=no;Connection Timeout=30;') conn_str = 'mssql+pyodbc:///?odbc_connect={}'.format(params) engine = create_engine(conn_str, echo=False) assert df_result.duplicated().sum() == 0 assert df_result.duplicated(subset=key_cols).sum() == 0 table_name = "Hopkins" table_name_updates = f"{table_name}_updates" try: _ = pd.read_sql( f"select Top(1) * from dbo.{table_name_updates}", engine) engine.execute(sa_text( f'TRUNCATE TABLE {table_name_updates}').execution_options(autocommit=True)) except Exception as e: print(e) pass dtype_dict = {} for col in shared.helpers.string_cols: if col in df_result.columns and df_result[col].notnull().sum() > 0: # print(col) df_result.loc[df_result[col].notnull( ), col] = df_result.loc[df_result[col].notnull(), col].str.slice(start=0, stop=99) dtype_dict[col] = sqlalchemy.types.NVARCHAR(length=100) df_result.to_sql(table_name_updates, engine, if_exists='append', schema='dbo', index=False, chunksize=100, method='multi', dtype=dtype_dict) merge_statement = f''' MERGE INTO dbo.{table_name} AS Target USING ( SELECT [Country/Region], [Province/State], District, infections, deaths, recovered,FIPS, Lat, Long, date FROM dbo.{table_name_updates} ) AS Source ON Target.[Country/Region] = Source.[Country/Region] AND COALESCE(Target.[Province/State], '') = COALESCE(Source.[Province/State], '') AND COALESCE(Target.[District], '') = COALESCE(Source.[District], '') AND Target.date = Source.date WHEN MATCHED THEN UPDATE SET Target.infections = Source.infections, Target.deaths = Source.deaths, Target.recovered = Source.recovered, Target.FIPS = Source.FIPS, Target.Lat = Source.Lat, Target.Long = Source.Long WHEN NOT MATCHED BY TARGET THEN INSERT ([Country/Region], [Province/State],District, FIPS, Lat, Long, infections, deaths, recovered, date) VALUES (Source.[Country/Region], Source.[Province/State],Source.District,Source.FIPS, Source.Lat, Source.Long, Source.infections,Source.deaths,Source.recovered, Source.date); ''' engine.execute(sa_text(merge_statement).execution_options(autocommit=True))
#bcp staging.footballdata_fixtures in ./football-data/football_extract.csv -S localhost -U sa -P Passw0rd -d fixtures_v2 -c -t ',' #sqlcmd -S localhost -U sa -P Passw0rd -d fixtures_v2 -Q "exec dbo.LoadFDFixtures" import pandas as pd import os import glob from sqlalchemy import create_engine from sqlalchemy.sql import text as sa_text uid = 'sa' pwd = 'Passw0rd' server = 'localhost' db = 'fixtures_v2' connstr = "mssql://%s:%s@%s/%s?driver=FreeTDS&port=1433& odbc_options='TDS_Version=8.0'" % ( uid, pwd, server, db) engine = create_engine(connstr).connect() engine.execute( sa_text('''EXEC dbo.PrepStaging''').execution_options(autocommit=True)) df = pd.read_csv('football-data/football_extract.csv') df.to_sql(name='footballdata_fixtures', schema='staging', con=engine, if_exists='append', index=False) engine.execute( sa_text('''EXEC dbo.LoadFDFixtures''').execution_options(autocommit=True)) engine.close()
DF.to_sql("TestData", con=engine, if_exists="append", index=False) ## 如果資料庫有Schema要求 from sqlalchemy.sql import text as sa_text ## method1 ##先在資料表中建立資料表並指定好Schema engine = create_engine( "mssql+pyodbc://192.168.0.45/DWTVSale?driver=SQL+Server+Native+Client+11.0?trusted_connection=yes", echo=False) connect = engine.connect() ## Truncate Table engine.execute( sa_text('''TRUNCATE TABLE test''').execution_options(autocommit=True)) connect.close() ## use append DF.to_sql("test", con=engine, if_exists="append", index=False) ## method2 ## use replace DF.to_sql("test", con=engine, if_exists="replace", index=False) connect = engine.connect() ## Change SQL Table Schema engine.execute( sa_text('''alter TABLE test ALTER COLUMN [E] nvarchar(50)''').
def main(mytimer: func.TimerRequest) -> None: utc_timestamp = datetime.datetime.utcnow().replace( tzinfo=datetime.timezone.utc).isoformat() if mytimer.past_due: logging.info('The timer is past due!') url = "https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Projekte_RKI/Nowcasting_Zahlen.xlsx?__blob=publicationFile" req = requests.get(url, stream=True) df_result = pd.read_excel(io.BytesIO(req.content), sheet_name='Nowcast_R') df_result.columns = ["date", "infections_wo_smoothing", "infections_wo_smoothing_lb_95", "infections_wo_smoothing_ub_95", "infections", "infections_lb_95", "infections_ub_95", "R", "R_lb_95", "R_ub_95", "R_7d", "R_7d_lb_95", "R_7d_ub_95"] df_result['date'] = pd.to_datetime(df_result['date']) username = os.environ.get('keyvault_db_username') password = os.environ.get('keyvault_db_password') params = urllib.parse.quote_plus( 'Driver={ODBC Driver 17 for SQL Server};Server=tcp:covid19dbserver.database.windows.net,1433;Database=covid19db;Uid='+username + '@covid19dbserver;Pwd='+password + ';Encrypt=yes;TrustServerCertificate=no;Connection Timeout=30;') conn_str = 'mssql+pyodbc:///?odbc_connect={}'.format(params) engine = create_engine(conn_str, echo=False) assert df_result.duplicated().sum() == 0 table_name = "RKINowcast" table_name_updates = f"{table_name}_updates" try: _ = pd.read_sql( f"select Top(1) * from dbo.{table_name_updates}", engine) engine.execute(sa_text( f'TRUNCATE TABLE {table_name_updates}').execution_options(autocommit=True)) except Exception as e: print(e) pass df_result.to_sql(table_name_updates, engine, if_exists='append', schema='dbo', index=False, chunksize=100, method='multi') merge_statement = f''' MERGE INTO dbo.{table_name} AS Target USING (SELECT date, infections_wo_smoothing, infections_wo_smoothing_lb_95, infections_wo_smoothing_ub_95, infections, infections_lb_95, infections_ub_95, R, R_lb_95, R_ub_95, R_7d, R_7d_lb_95, R_7d_ub_95 FROM dbo.{table_name_updates}) AS Source ON Target.date = Source.date WHEN MATCHED THEN UPDATE SET Target.infections_wo_smoothing = Source.infections_wo_smoothing, Target.infections_wo_smoothing_lb_95 = Source.infections_wo_smoothing_lb_95, Target.infections_wo_smoothing_ub_95 = Source.infections_wo_smoothing_ub_95, Target.infections = Source.infections, Target.infections_lb_95 = Source.infections_lb_95, Target.infections_ub_95 = Source.infections_ub_95, Target.R = Source.R, Target.R_lb_95 = Source.R_lb_95, Target.R_ub_95 = Source.R_ub_95, Target.R_7d = Source.R_7d, Target.R_7d_lb_95 = Source.R_7d_lb_95, Target.R_7d_ub_95 = Source.R_7d_ub_95 WHEN NOT MATCHED BY TARGET THEN INSERT (date, infections_wo_smoothing, infections_wo_smoothing_lb_95, infections_wo_smoothing_ub_95, infections, infections_lb_95, infections_ub_95, R, R_lb_95, R_ub_95, R_7d, R_7d_lb_95, R_7d_ub_95) VALUES (Source.date, Source.infections_wo_smoothing, Source.infections_wo_smoothing_lb_95, Source.infections_wo_smoothing_ub_95, Source.infections, Source.infections_lb_95, Source.infections_ub_95, Source.R, Source.R_lb_95, Source.R_ub_95, Source.R_7d, Source.R_7d_lb_95, Source.R_7d_ub_95); ''' engine.execute(sa_text(merge_statement).execution_options(autocommit=True)) logging.info('Python timer trigger function ran at %s', utc_timestamp)
df['oi_change'] = df['oi_change'].astype(int) print (time.time() - start_time) return df #df.to_csv('Options Table.csv', index=False) #refresh_table(df, "Options") while(True): try: company_data = get_company_data() tbl_name = "Company" sql_query = """ -- Delete all records DELETE FROM ["""+tbl_name+"""] DBCC CHECKIDENT (["""+tbl_name+"""], RESEED, 0) """ engine.execute(sa_text(sql_query).execution_options(autocommit=True)) refresh_table(company_data, tbl_name) except e: print (e) print ("Could not get Company Data") try: futures_data = get_futures_data() tbl_name = "Futures" sql_query = """ DELETE FROM ["""+tbl_name+"""] DBCC CHECKIDENT (["""+tbl_name+"""], RESEED, 0) """ engine.execute(sa_text(sql_query).execution_options(autocommit=True)) refresh_table(futures_data, tbl_name) except e: print (e)
async def truncate_users(connection: sa.engine.Connection): " Expurga todos os usuários do banco de dados." connection.execute(sa_text('TRUNCATE TABLE "user"')) print("Tabela de usuários truncada.")
def main(mytimer: func.TimerRequest) -> None: utc_timestamp = datetime.datetime.utcnow().replace( tzinfo=datetime.timezone.utc).isoformat() if mytimer.past_due: logging.info('The timer is past due!') url = "https://opendata.arcgis.com/datasets/917fc37a709542548cc3be077a786c17_0.geojson" response = urlopen(url) if response.info().get('Content-Encoding') == 'gzip': data = gzip.decompress(response.read()) else: data = response.read() jsonObj = json.loads(data) df_result = pd.DataFrame(jsonObj['features']) df_result = pd.json_normalize(df_result.properties) rename_dict = { 'county': 'county', 'GEN': 'countyName', 'BEZ': 'type', 'BL': 'federalState', 'EWZ': 'population', 'cases': 'infections', 'deaths': 'deaths', 'SHAPE_Area': 'shapearea', 'SHAPE_Length': 'shapelength' } df_result = df_result.rename(columns=rename_dict) df_result = df_result[rename_dict.values()].copy() df_result['federalState'] = df_result['federalState'].apply( shared.helpers.translate_county) df_result['date'] = datetime.date.today() username = os.environ.get('keyvault_db_username') password = os.environ.get('keyvault_db_password') params = urllib.parse.quote_plus( 'Driver={ODBC Driver 17 for SQL Server};Server=tcp:covid19dbserver.database.windows.net,1433;Database=covid19db;Uid=' + username + '@covid19dbserver;Pwd=' + password + ';Encrypt=yes;TrustServerCertificate=no;Connection Timeout=30;') conn_str = 'mssql+pyodbc:///?odbc_connect={}'.format(params) engine = create_engine(conn_str, echo=False) assert df_result.duplicated().sum() == 0 table_name = "RKICounties" table_name_updates = f"{table_name}_updates" try: _ = pd.read_sql(f"select Top(1) * from dbo.{table_name_updates}", engine) engine.execute( sa_text(f'TRUNCATE TABLE {table_name_updates}').execution_options( autocommit=True)) except Exception as e: print(e) pass dtype_dict = {} for col in shared.helpers.string_cols: if col in df_result.columns and df_result[col].notnull().sum() > 0: # print(col) df_result.loc[df_result[col].notnull(), col] = df_result.loc[df_result[col].notnull(), col].str.slice(start=0, stop=99) dtype_dict[col] = sqlalchemy.types.NVARCHAR(length=100) df_result.to_sql(table_name_updates, engine, if_exists='append', schema='dbo', index=False, chunksize=100, method='multi', dtype=dtype_dict) merge_statement = f''' MERGE INTO dbo.{table_name} AS Target USING (SELECT county, countyname, type, federalstate, population, infections, deaths, shapearea, shapelength, date FROM dbo.{table_name_updates}) AS Source ON Target.federalstate = Source.federalstate AND Target.county = Source.county AND Target.date = Source.date WHEN MATCHED THEN UPDATE SET Target.countyname = Source.countyname, Target.type = Source.type, Target.population = Source.population, Target.infections = Source.infections, Target.deaths = Source.deaths, Target.shapearea = Source.shapearea, Target.shapelength = Source.shapelength WHEN NOT MATCHED BY TARGET THEN INSERT (county, countyname, type, federalstate, population, infections, deaths, shapearea, shapelength, date) VALUES (Source.county, Source.countyname, Source.type, Source.federalstate, Source.population, Source.infections, Source.deaths, Source.shapearea, Source.shapelength, Source.date); ''' engine.execute(sa_text(merge_statement).execution_options(autocommit=True)) logging.info('Python timer trigger function ran at %s', utc_timestamp)
def exec_sproc(self, stored_procedure): sql_str = f"EXEC {self.schema}.{stored_procedure}" command = sa_text(sql_str).execution_options(autocommit=True) return self.engine.execute(command)
def truncate_pts_atividades(db: Session): "Trunca as tabelas principais. Útil para zerar BD para executar testes." db.execute(sa_text("TRUNCATE TABLE plano_trabalho CASCADE")) db.commit()
def validate_aip(event, context): """Triggered by a change to a Cloud Storage bucket. Args: event (dict): Event payload. context (google.cloud.functions.Context): Metadata for the event. """ #Setting bucket and file name. bucket = event['bucket'] file_path = event['name'] #Code block to read file from GCS and load data in json format. client = storage.Client() bucket = client.get_bucket(bucket) blob = bucket.get_blob(file_path) contents = blob.download_as_string() contents = contents.decode("utf-8") data = json.loads(contents) #Reading required text field from data. output = data['responses'][0]['fullTextAnnotation']['text'] output = output[:-1] #Code setup to convert output data from json file to the required format for loading into invoice table. keys = [ 'Company_Name:', 'Client_Name:', 'Client_Address:', 'SOW_Number:', 'Project_ID:', 'Invoice_Number:', 'Invoice_Date:', 'Billing_Period:', 'Bank_Account_Number:', 'Bank_Name:', 'Balance_Due:', 'Developer Rate Hours Subtotal' ] other_key = "Developer Rate Hours Subtotal" output_list = output.split('\n') output_dict = {} other_value_list = [] for op in output_list: present = False for key in keys: if key in op: dict_key = key.replace(':', '') output_dict[dict_key] = op.replace(key, '') present = True if not present: other_value_list.append(op) output_dict[other_key] = other_value_list df_invoice = pd.DataFrame.from_dict(output_dict) df_invoice[[ 'Developer', 'Rate', 'Hours', 'Subtotal' ]] = df_invoice['Developer Rate Hours Subtotal'].str.split(expand=True) df_invoice = df_invoice.drop(columns=['Developer Rate Hours Subtotal']) #Establishing connection with Cloud SQL. db_con = sqlalchemy.create_engine( 'mysql+pymysql://root:aip-auto$@/aip?unix_socket=/cloudsql/vsmart-iiot-223813:us-central1:aip-auto' ) #Loading data into Cloud SQL, invoice table. db_con.execute(sa_text('truncate table invoice')) df_invoice.to_sql('invoice', db_con, if_exists='append', index=False) #Reading data from timesheet table. df_timesheet = pd.read_sql(f'SELECT * FROM timesheet', con=db_con) joined_df = pd.merge(df_invoice, df_timesheet, on=[ 'Company_Name', 'SOW_Number', 'Project_ID', 'Invoice_Number', 'Invoice_Date', 'Developer' ]) #Matching data of both tables. matched = [] for index, row in joined_df.iterrows(): if row['Rate_x'] == row['Rate_y'] and row['Hours_x'] == row[ 'Hours_y'] and row['Bank_Account_Number_x'] == row[ 'Bank_Account_Number_y']: matched.append(True) else: matched.append(False) #Pushing successful message into Pub/Sub. if False not in matched: data = f"Invoice Matched" publisher = pubsub_v1.PublisherClient() topic_path = publisher.topic_path('vsmart-iiot-223813', 'aip') bdata = data.encode("utf-8") future = publisher.publish(topic_path, data=bdata)
def truncate_sell_amt_table_op(): wspider_temp_engine.execute( sa_text('''TRUNCATE TABLE wspider_temp.MWS_COLT_ITEM_SELL_AMT_DEV'''). execution_options(autocommit=True))
def main(args): params = config() if args["target"] is not None: target = args["target"].lower() else: target = args['tabname'].lower() col_file = 'cols/' + args['tabname'] + '.cols' col_list = file_read(col_file) df = pd.read_csv('data/' + args['tabname'] + '.csv', header=None, names=col_list) print(df) #regexp = re.compile(r'vc__[a-z_0-9]*') #for c in col_list: # if regexp.search(c): # df.drop(c, axis=1, inplace=True) regexp = re.compile(r'sys*') for c in col_list: if regexp.search(c): df.drop(c, axis=1, inplace=True) for c in col_list: if c == "tag_hist": df.drop(c, axis=1, inplace=True) df.rename(columns={'vc__date_tag_hist': 'date_hist'}, inplace=True) #df["id_host"].apply(lambda x: x.lower()) #df['id_host'] = df['id_host'].str.lower() #df['id_host'] = df['id_host'].str.replace(r'cti3700', '') #df['id_host'] = df['id_host'].map(lambda x: x.rstrip('cti3700')) #psycopg2.errors.UniqueViolation: duplicate key value violates unique constraint "host_vg_pkey" #DETAIL: Key (id_host, vg_name, date_hist)=(sr05209, rootvg, 1970-01-01 00:00:01) already exists. #CONTEXT: COPY host_vg, line 81 #print ( df ) # iterating the columns for col in df.columns: print(col) try: # try something url = 'postgresql://' + params['user'] + ':' + params[ 'password'] + '@' + params['host'] + '/' + params['database'] engine = create_engine(url, connect_args={ 'options': '-csearch_path={}'.format(params['schema']) }) if args["truncate"]: engine.execute( sa_text('''TRUNCATE TABLE ''' + target + ''' ''').execution_options(autocommit=True)) df.to_sql(target, engine, method=psql_insert_copy, if_exists='append', index=False) except SQLAlchemyError as e: error = str(e.__dict__['orig']) print(error)
def main(mytimer: func.TimerRequest) -> None: utc_timestamp = datetime.datetime.utcnow().replace( tzinfo=datetime.timezone.utc).isoformat() if mytimer.past_due: logging.info('The timer is past due!') confirmed = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv" deaths = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv" recovered = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv" inputs = { 'deaths': deaths, 'infections': confirmed, 'recovered': recovered } df_result = None country_col = 'Country/Region' province_col = 'Province/State' for qualifier, url in inputs.items(): df = pd.read_csv(url) melt_cols = list(df.columns) melt_cols.remove(country_col) melt_cols.remove(province_col) melt_cols.remove('Lat') melt_cols.remove('Long') df_melt = pd.melt(df, id_vars=[country_col, province_col, 'Lat', 'Long'], value_vars=melt_cols) df_melt = df_melt.rename(columns={ 'value': qualifier, 'variable': 'date' }) df_melt['date'] = pd.to_datetime(df_melt['date']).dt.date df_melt[qualifier] = df_melt[qualifier].fillna(0) if df_result is None: df_result = df_melt else: df_melt = df_melt.drop(['Lat', 'Long'], axis=1) df_result = pd.merge(left=df_result, right=df_melt, on=key_cols, how='outer') df_result = shared.helpers.cleanup_df(df_result, key_cols=key_cols) username = os.environ.get('keyvault_db_username') password = os.environ.get('keyvault_db_password') params = urllib.parse.quote_plus( 'Driver={ODBC Driver 17 for SQL Server};Server=tcp:covid19dbserver.database.windows.net,1433;Database=covid19db;Uid=' + username + '@covid19dbserver;Pwd=' + password + ';Encrypt=yes;TrustServerCertificate=no;Connection Timeout=30;') conn_str = 'mssql+pyodbc:///?odbc_connect={}'.format(params) engine = create_engine(conn_str, echo=False) # %% assert df_result.duplicated().sum() == 0 table_name = "HopkinsTS" table_name_updates = f"{table_name}_updates" try: pd.read_sql("select Top(1) * from dbo.%s" % table_name_updates, engine) engine.execute( sa_text('''TRUNCATE TABLE %s''' % table_name_updates).execution_options(autocommit=True)) except: pass for col in [country_col, province_col]: df_result[col] = df_result[col].str.slice(start=0, stop=99) df_result = df_result[[ 'Country/Region', 'Province/State', 'Lat', 'Long', 'infections', 'deaths', 'recovered', 'date' ]] # %% df_result.to_sql(table_name_updates, engine, if_exists='append', schema='dbo', index=False, chunksize=100, method='multi', dtype={ country_col: sqlalchemy.types.NVARCHAR(length=100), province_col: sqlalchemy.types.NVARCHAR(length=100) }) merge_statement = f''' MERGE INTO dbo.{table_name} AS Target USING ( SELECT [Country/Region], [Province/State], Lat, Long, infections, deaths, recovered, date FROM dbo.{table_name_updates} ) AS Source ON Target.[Country/Region] = Source.[Country/Region] AND COALESCE(Target.[Province/State], '') = COALESCE(Source.[Province/State], '') AND Target.date = Source.date WHEN MATCHED THEN UPDATE SET Target.infections = Source.infections, Target.deaths = Source.deaths, Target.recovered = Source.recovered WHEN NOT MATCHED BY TARGET THEN INSERT ([Country/Region], [Province/State], Lat, Long, infections, deaths, recovered, date) VALUES (Source.[Country/Region], Source.[Province/State], Source.[Lat],Source.[Long], Source.infections, Source.deaths, Source.recovered, Source.date); ''' engine.execute(sa_text(merge_statement).execution_options(autocommit=True)) logging.info('Python timer trigger function ran at %s', utc_timestamp)
for valor in resp_query_temporal_select: aux_temporal = todo.insert().values( temperatura=valor.temperatura, humedad=valor.humedad, canal1=valor.canal1, canal2=valor.canal2, canal3=valor.canal3, canal4=valor.canal4, tempGabinete=datos_actuales.tempGabinete, hora=valor.hora, fecha=valor.fecha) #Inserta todas las filas de la tabla temporal de la ECM en la tabla todo de la CCM connection2.execute(aux_temporal) #Elimina todos los valores de la tabla temporal despues e guardarlas en la CCM connection.execute( sa_text('''TRUNCATE TABLE temporal''').execution_options( autocommit=True)) connection2.execute(aux) #Si no hay conexion con la CCM inserta los valores en la tabla temporal de la ECM if (resultado.checkbox == "con CCM" and resultado_query_estado_conexion_select.CCM == 'sin conexion'): aux = temporal.insert().values( temperatura=datos_actuales.temperatura, humedad=datos_actuales.humedad, canal1=datos_actuales.canal1, canal2=datos_actuales.canal2, canal3=datos_actuales.canal3, canal4=datos_actuales.canal4, tempGabinete=datos_actuales.tempGabinete, hora=time.strftime("%H:%M:%S"), fecha=time.strftime("%Y-%m-%d")) connection.execute(aux)