def migration_update(sqlserver_db, aurora_db, table_name, data, where_clause): # Normalize the column names lower_data = _lower_data(data) # See if there is a primary writer specific to this table primary = ah_config.get( 'database.primaryWriter.%s' % table_name, ah_config.get('database.primaryWriter', 'sqlserver')) if (primary == 'sqlserver'): primary_db_name = sqlserver_db secondary_db_name = aurora_db else: primary_db_name = aurora_db secondary_db_name = sqlserver_db primary_table = _get_table(primary_db_name, table_name) secondary_table = _get_table(secondary_db_name, table_name) with ah_db.open_db_connection(primary_db_name) as primary_connection: # First, the primary column_data = _match_data_to_column_names(primary_table, lower_data) update = primary_table.update().where( text(where_clause)).values(column_data) primary_connection.execute(update) if _do_secondary_write(): # Now, the secondary with ah_db.open_db_connection( secondary_db_name) as secondary_connection: column_data = _match_data_to_column_names( secondary_table, lower_data) update = secondary_table.update().where( text(where_clause)).values(column_data) secondary_connection.execute(update)
def migration_delete(sqlserver_db, aurora_db, table_name, where_clause): # See if there is a primary writer specific to this table primary = ah_config.get( 'database.primaryWriter.%s' % table_name, ah_config.get('database.primaryWriter', 'sqlserver')) if (primary == 'sqlserver'): primary_db_name = sqlserver_db secondary_db_name = aurora_db else: primary_db_name = aurora_db secondary_db_name = sqlserver_db primary_table = _get_table(primary_db_name, table_name) secondary_table = _get_table(secondary_db_name, table_name) with ah_db.open_db_connection(primary_db_name) as primary_connection: with ah_db.open_db_connection( secondary_db_name) as secondary_connection: # First, the primary delete = primary_table.delete().where(text(where_clause)) primary_connection.execute(delete) # Now, the secondary delete = secondary_table.delete().where(text(where_clause)) secondary_connection.execute(delete)
def migration_select_df(sqlserver_db, aurora_db, table_name, sqlserver_query, aurora_query=None): log = ah_config.getLogger('ah.dbmigration') # See if there is a primary reader specific to this table primary = ah_config.get( 'database.primaryReader.%s' % table_name, ah_config.get('database.primaryReader', 'sqlserver')) if primary == 'sqlserver': log.info( 'Migration read table {0} from sqlserver. Primary is {1}'.format( table_name, primary)) with ah_db.open_db_connection(sqlserver_db) as connection: return pd.read_sql(sqlserver_query, con=connection) elif primary == 'aurora': log.info('Migration read table {0} from aurora. Primary is {1}'.format( table_name, primary)) if (aurora_query == None): aurora_query = sqlserver_query with ah_db.open_db_connection(aurora_db) as connection: return pd.read_sql(aurora_query, con=connection) else: log.exception( 'Invalid primary DB for table {0}. Primary is {1}'.format( table_name, primary))
def getUserGroupForUXExp(self, eids, uid): #print('getUserGroupForUXExp '+str(time())) if len(eids) == 0: return [] else: eidsStr = "(" + ",".join([str(x) for x in eids]) + ")" sql = ''' SELECT eg.ExperimentId, eg.GroupId, egd.GroupDescription FROM DataScience.ExperimentGroup eg JOIN DataScience.ExperimentGroupDescriptions egd ON eg.ExperimentId = egd.ExperimentId AND eg.GroupId = egd.GroupId WHERE eg.userid=%d and eg.experimentid IN %s ''' % (uid, eidsStr) with ah_db.open_db_connection('datascience') as connection: result = connection.execute(sql) rows = result.fetchall() var = self.getExperimentVariablesForUxExp(eids) [ row.update( {'Variables': var[row['ExperimentId']][row['GroupId']]}) for row in rows ] return rows
def sql_update(db_name, table_name, data, where_clause): lower_data = _lower_data(data) table = _get_table(db_name, table_name) column_data = _match_data_to_column_names(table, lower_data) with ah_db.open_db_connection(db_name) as conn: update = table.update().where(text(where_clause)).values(column_data) conn.execute(update)
def sql_insert(db_name, table_name, data): lower_data = _lower_data(data) table = _get_table(db_name, table_name) column_data = _match_data_to_column_names(table, lower_data) with ah_db.open_db_connection(db_name) as conn: ins = table.insert().values(column_data) conn.execute(ins)
def mandatoryAssignUserGroup(self, eid, uid, groupid): sql = ''' INSERT INTO DataScience.ExperimentGroup (ExperimentId, UserId, GroupId, CreatedOn) VALUES ({0}, {1}, {2}, CURRENT_TIMESTAMP) '''.format(eid, uid, groupid) with ah_db.open_db_connection('datascience') as connection: connection.execute(sql)
def mandatoryReassignUserGroup(self, eid, uid, groupid): sql = ''' UPDATE DataScience.ExperimentGroup SET GroupId = {0} WHERE experimentid = {1} AND UserId = {2} '''.format(groupid, eid, uid) with ah_db.open_db_connection('datascience') as connection: connection.execute(sql)
def getDataFrame(self): with ah_db.open_db_connection('miscellaneous') as conn_misc: sql = ''' SELECT t.UserId, t.Date, t.TotalHours, t.CreatedOn, t.TimeSheetProviderId, tp.Name FROM TimeSheets AS t JOIN TimeSheetProviders as tp ON t.TimeSheetProviderId = tp.TimeSheetProviderId WHERE UserId={} '''.format(self.UserId) df_out = getSQLdata(sql, conn_misc) return df_out
def getAllCurrentUXExperiment(self): sql = """ SELECT ExperimentId FROM DataScience.Experiment WHERE UXExperiment = 1 AND CURDATE() BETWEEN StartTime AND EndTime """ #print('getAllCurrentUXExperiment '+str(time())) with ah_db.open_db_connection('datascience') as connection: result = connection.execute(sql) row = result.fetchall() return [x['ExerimentId'] for x in row]
def getExperimentDateRange(self, eid): sql = """ SELECT COUNT(*) AS cntExp FROM DataScience.Experiment WHERE experimentid={} AND CURDATE() BETWEEN StartTime AND EndTime """.format(eid) #print('getExperimentDateRange ' + str(time())) with ah_db.open_db_connection('datascience') as connection: result = connection.execute(sql) row = result.fetchone() return row['cntExp']
def getGroupSplit(self, eid): sql = ''' select split from DataScience.ExperimentGroupDescriptions where experimentid={0} order by groupid '''.format(eid) #group id has to be continuous and starting from 1 with ah_db.open_db_connection('datascience') as connection: result = connection.execute(sql) rows = result.fetchall() return [row['split'] for row in rows]
def getDataFrame(self): with ah_db.open_db_connection('miscellaneous') as conn_misc: sql = ''' SELECT IsHeadLine, NoteCategoryId, Date, UserId FROM Notes WHERE UserId = {} ORDER BY Date ASC'''.format(self.UserId) df_out = getSQLdata(sql, conn_misc) if df_out.shape[0] == 0: df_out = pd.DataFrame(columns=['IsHeadLine', 'NoteCategoryId', 'Date']) return df_out
def getGroupUsers(self, eid, gid): #print('getGroupUsers '+str(time())) sql = ''' SELECT UserId FROM DataScience.ExperimentGroup WHERE ExperimentId = {0} AND GroupId = {1} '''.format(eid, gid) with ah_db.open_db_connection('datascience') as connection: result = connection.execute(sql) rows = result.fetchall() uids = [x[0] for x in rows] return uids
def assignUserGroup(self, eid, uid): #print('assignUserGroup '+str(time())) splits = self.getGroupSplit(eid) g = int(np.random.choice(range(1, len(splits) + 1), p=splits)) sql = ''' INSERT INTO DataScience.ExperimentGroup (ExperimentId, UserId, GroupId, CreatedOn) VALUES ({0}, {1}, {2}, CURRENT_TIMESTAMP) '''.format(eid, uid, g) with ah_db.open_db_connection('datascience') as connection: connection.execute(sql) return g
def getExpGroupDescriptions(self, eid): #print('getExpGroupDescriptions '+str(time())) sql = ''' SELECT GroupId, GroupDescription FROM DataScience.ExperimentGroupDescriptions WHERE experimentid={0} ORDER BY groupid '''.format(eid) with ah_db.open_db_connection('datascience') as connection: result = connection.execute(sql) rows = result.fetchall() # group id has to be continuous and starting from 1 return {x['GroupId']: x['GroupDescription'] for x in rows}
def getNestedExperiment(self, eid): sql = """ SELECT NestedExperimentId, GroupIdInNestedExperiment, DefaultGroupIdInNestedExperiment FROM DataScience.Experiment WHERE experimentid={} """.format(eid) #print('getNestedExperiment '+str(time())) with ah_db.open_db_connection('datascience') as connection: result = connection.execute(sql) row = result.fetchone() return row['NestedExperimentId'], row['GroupIdInNestedExperiment'], \ row['DefaultGroupIdInNestedExperiment']
def getNewUserId(self): # return [6355,100305,45563] #print('getNewUserId '+str(time())) sql = ''' SELECT DISTINCT UserId FROM miscellaneous.Users WHERE CreatedOn >= '2017-02-15' ''' try: with ah_db.open_db_connection('miscellaneous') as connection: result = connection.execute(sql) r = result.fetchall() except: print(traceback.format_exc()) return [l[0] for l in r]
def pullCreateUserGroup(self, uid, eid): sql = ''' SELECT GroupId FROM DataScience.ExperimentGroup WHERE userid={0} and experimentid={1} '''.format(uid, eid) #print('pullCreateUserGroup ' + str(time())) with ah_db.open_db_connection('datascience') as connection: result = connection.execute(sql) row = result.fetchone() # print( row) if row is not None: return row['GroupId'] else: return self.assignUserGroup(eid, uid)
def getDataFrame(self): with ah_db.open_db_connection('moneyMovement') as conn_mm: sql = ''' SELECT ft.CreatedOn, ft.Amount, ft.PostingDate, c.CreditReasonId FROM FundsTransfers ft JOIN CustomerServiceCredits c ON ft.FundsTransferId = c.TransactionId WHERE UserId = {} ORDER BY CreatedOn ASC'''.format(self.UserId) df_out = getSQLdata(sql, conn_mm) if df_out.shape[0] == 0: df_out = pd.DataFrame(columns=['CreatedOn', 'Amount', 'PostingDate', 'CreditReasonId']) return df_out
def getDataFrame(self): with ah_db.open_db_connection('bankConnection') as conn_bf: sql = ''' SELECT UserBankId, AuthenticationIssueTypeId, AuthenticationIssueStatusId, CreatedOn, ResolvedOn FROM AuthenticationIssues WHERE UserId = {} ORDER BY CreatedOn ASC'''.format(self.UserId) df_out = getSQLdata(sql, conn_bf) if df_out.shape[0] == 0: df_out = pd.DataFrame(columns=['UserBankId', 'AuthenticationIssueTypeId', 'AuthenticationIssueStatusId', 'CreatedOn', 'ResolvedOn']) return df_out
def getDataFrame(self): with ah_db.open_db_connection('miscellaneous') as conn_misc: sql = ''' SELECT UserID, pt.PictureTypeId, pt.Typename, LastUpdatedOn, ps.StatusName FROM UserTimeSheetPictures AS utp JOIN PictureStatuses AS ps ON utp.PictureStatusId = ps.PictureStatusId JOIN PictureTypes as pt ON utp.PictureTypeId = pt.PictureTypeId WHERE StatusName = 'processed' AND UserId={} '''.format(self.UserId) df_out = getSQLdata(sql, conn_misc) return df_out
def getGroupSplit(self, eid): #print('getGroupSplit '+str(time())) sql = ''' SELECT split FROM DataScience.ExperimentGroupDescriptions WHERE experimentid={0} ORDER BY groupid '''.format(eid) # group id has to be continuous and starting from 1 with ah_db.open_db_connection('datascience') as connection: result = connection.execute(sql) rows = result.fetchall() if len(rows) == 0: raise NameError('No Group Split for experiment: {}'.format(eid)) else: return [row['split'] for row in rows]
def _do_secondary_write(): global last_secondary_write_check, do_secondary_write now = int(time.time()) if now < last_secondary_write_check: return do_secondary_write last_secondary_write_check = now + 30 with ah_db.open_db_connection('sqlserver') as connection: sql = "select ParallelWritingOn from ops.AuroraMigrationConfig where DomainName = 'DataScience'" row = connection.execute(sql).fetchone() if row: do_secondary_write = row[0] else: do_secondary_write = False logging.getLogger('ah_db.misc').info("Secondary write check %s", do_secondary_write) return do_secondary_write
def isExperimentValid(self, eid): #print('isExperimentValid '+str(time())) sql = ''' SELECT (CASE WHEN CURDATE() BETWEEN StartTime AND EndTime THEN 1 ELSE 0 END) AS valid FROM DataScience.Experiment WHERE ExperimentId = {0} '''.format(eid) with ah_db.open_db_connection('datascience') as connection: result = connection.execute(sql) row = result.fetchone() # print( row) if len(row) == 0: raise NameError('request of experiment group' 'assignment failure,' 'experiment does not exist' ', experimentid = %s.' % eid) else: return row['valid']
def getExperimentVariables(self, eid): #print('getExperimentVariables '+str(time())) sql = ''' SELECT GroupId, VariableName, VariableValue FROM DataScience.UXExperimentVariables WHERE experimentid={0} '''.format(eid) # group id has to be continuous and starting from 1 with ah_db.open_db_connection('datascience') as connection: result = connection.execute(sql) rows = result.fetchall() GroupIds = set([x['GroupId'] for x in rows]) return { y: { x['VariableName']: x['VariableValue'] for x in rows if x['GroupId'] == y } for y in GroupIds }
def getAllUserId(self): # return [6355,100305,45563] #print('getAllUserId '+str(time())) sql = ''' SELECT distinct f.userid from moneymovement.transactions t JOIN moneymovement.transactionstofundstransfers ttft ON t.TransactionID = ttft.TransactionID JOIN moneymovement.FundsTransfers f ON ttft.FundsTransferId = f.FundsTransferId where t.TransactionTypeID=3 and t.StatusID in (2,3) and f.PostingDate>'2016-01-01' order by f.userid ''' try: with ah_db.open_db_connection('moneyMovement') as connection: result = connection.execute(sql) r = result.fetchall() except: print(traceback.format_exc()) return [l[0] for l in r]
def checkzip(user1, user2): sql1 = ''' SELECT DISTINCT Zipcode FROM DataScience.UserPIP WHERE userid = {0} AND Zipcode IS NOT NULL AND Zipcode != '' '''.format(user1['userid']) sql2 = ''' SELECT DISTINCT Zipcode FROM DataScience.UserPIP WHERE userid = {0} AND Zipcode IS NOT NULL AND Zipcode != '' '''.format(user2['userid']) try: with ah_db.open_db_connection('datascience') as conn: zipcodes1 = conn.execute(sql1).fetchall() zipcodes2 = conn.execute(sql2).fetchall() ziplist1 = [] ziplist2 = [] for zips in zipcodes1: ziplist1.append(zips['Zipcode']) for zips in zipcodes2: ziplist2.append(zips['Zipcode']) overlap = set(ziplist1).intersection(set(ziplist2)) if len(ziplist1) == 0 or len(ziplist2) == 0: return 1 elif len(overlap) > 0: return 1 else: return 0 except: return 1
def sql_select_df(db_name, table_name, query): with ah_db.open_db_connection(db_name) as conn: return pd.read_sql(query, con=conn)
def sql_delete(db_name, table_name, where_clause): table = _get_table(db_name, table_name) with ah_db.open_db_connection(db_name) as conn: delete = table.delete().where(text(where_clause)) conn.execute(delete)