def kickStuckOperations(self, limit=20, kickDelay=2): """finds operations that have not been updated for more than a given time but are still assigned and resets the assignment :param int limit: number of operations to treat :param int kickDelay: age of the lastUpdate in hours :returns: S_OK/S_ERROR with number of kicked operations """ session = self.dbSession(expire_on_commit=False) try: ftsOps = (session.query(FTS3Operation.operationID).filter( FTS3Operation.lastUpdate < (func.date_sub( utc_timestamp(), text("INTERVAL %d HOUR" % kickDelay)))). filter(~FTS3Operation.assignment.is_(None)).limit(limit)) opIDs = [opTuple[0] for opTuple in ftsOps] rowCount = 0 if opIDs: result = session.execute( update(FTS3Operation).where( FTS3Operation.operationID.in_(opIDs)).where( FTS3Operation.lastUpdate < (func.date_sub( utc_timestamp(), text("INTERVAL %d HOUR" % kickDelay)))).values( { "assignment": None }).execution_options( synchronize_session=False ) # see comment about synchronize_session ) rowCount = result.rowcount session.commit() session.expunge_all() return S_OK(rowCount) except SQLAlchemyError as e: session.rollback() return S_ERROR("kickStuckOperations: unexpected exception : %s" % e) finally: session.close()
def kickStuckJobs(self, limit=20, kickDelay=2): """finds jobs that have not been updated for more than a given time but are still assigned and resets the assignment :param int limit: number of jobs to treat :param int kickDelay: age of the lastUpdate in hours :returns: S_OK/S_ERROR with number of kicked jobs """ session = self.dbSession(expire_on_commit=False) try: ftsJobs = session.query(FTS3Job.jobID)\ .filter(FTS3Job.lastUpdate < (func.date_sub(func.utc_timestamp(), text('INTERVAL %d HOUR' % kickDelay ))))\ .filter(~FTS3Job.assignment.is_(None))\ .limit(limit) jobIDs = [jobTuple[0] for jobTuple in ftsJobs] rowCount = 0 if jobIDs: result = session.execute( update(FTS3Job) .where( FTS3Job.jobID.in_(jobIDs)) .where( FTS3Job.lastUpdate < ( func.date_sub( func.utc_timestamp(), text( 'INTERVAL %d HOUR' % kickDelay)))) .values( { 'assignment': None})) rowCount = result.rowcount session.commit() session.expunge_all() return S_OK(rowCount) except SQLAlchemyError as e: session.rollback() return S_ERROR("kickStuckJobs: unexpected exception : %s" % e) finally: session.close()
def kickStuckJobs(self, limit=20, kickDelay=2): """finds jobs that have not been updated for more than a given time but are still assigned and resets the assignment :param int limit: number of jobs to treat :param int kickDelay: age of the lastUpdate in hours :returns: S_OK/S_ERROR with number of kicked jobs """ session = self.dbSession(expire_on_commit=False) try: ftsJobs = session.query(FTS3Job.jobID)\ .filter(FTS3Job.lastUpdate < (func.date_sub(func.utc_timestamp(), text('INTERVAL %d HOUR' % kickDelay ))))\ .filter(~FTS3Job.assignment.is_(None))\ .limit(limit) jobIDs = [jobTuple[0] for jobTuple in ftsJobs] rowCount = 0 if jobIDs: result = session.execute( update(FTS3Job) .where( FTS3Job.jobID.in_(jobIDs)) .where( FTS3Job.lastUpdate < ( func.date_sub( func.utc_timestamp(), text( 'INTERVAL %d HOUR' % kickDelay)))) .values( { 'assignment': None})) rowCount = result.rowcount session.commit() session.expunge_all() return S_OK(rowCount) except SQLAlchemyError as e: session.rollback() return S_ERROR("kickStuckJobs: unexpected exception : %s" % e) finally: session.close()
def add_outcomes( feature_query, start_time: datetime, positive_event_lookahead: int = 1, ): # The events table holds all the events, not just conversion ones relevant_events = bq_session.query( events.c['time'].cast(DATE).label('date'), events.c['type'].label('outcome'), events.c['user_id'].label('user_id')).filter( events.c['type'].in_(list(LABELS.keys())), cast(events.c['time'], DATE) > cast(start_time, DATE), cast(events.c['time'], DATE) <= cast( start_time + timedelta(days=positive_event_lookahead), DATE)).subquery() # TODO: Remove deduplication, once the event table doesn't contain any relevant_events_deduplicated = bq_session.query( relevant_events.c['date'], relevant_events.c['user_id'], # This case when provides logic for dealing with multiple outcomes during the same time period # an example is user_id 195379 during the 4/2020 where the user renews, but then cancels and gets # a refund (the current pipeline provides both labels) case( [ # If there is at least one churn event, we identify the user as churned (literal(negative_label()).in_( func.unnest(func.array_agg( relevant_events.c['outcome']))), negative_label()) ], # In case of any number of any positive only events we consider the event as a renewal else_=positive_labels()).label('outcome')).group_by( relevant_events.c['date'].label('date'), relevant_events.c['user_id'].label('user_id')).subquery() feature_query_w_outcome = bq_session.query( feature_query, relevant_events_deduplicated.c['outcome'].label('outcome'), relevant_events_deduplicated.c['date'].label('outcome_date') ).outerjoin( relevant_events_deduplicated, and_( feature_query.c['user_id'] == relevant_events_deduplicated.c['user_id'], feature_query.c['date'] >= func.date_sub( relevant_events_deduplicated.c['date'], text(f'interval {positive_event_lookahead} day')), feature_query.c['date'] <= relevant_events_deduplicated.c['date']) ).subquery('feature_query_w_outcome') return feature_query_w_outcome
def add_outcomes(self, feature_query): # The events table holds all the events, not just conversion ones relevant_events = self.bq_session.query( self.events.c['time'].cast(DATE).label('date'), self.events.c['type'].label('outcome'), self.events.c['browser_id'].label('browser_id')).filter( self.events.c['type'].in_(list(LABELS.keys()))).subquery() # This assumes we're always aggregating for a given day, if we do multiple day at once # This need to use a rolling window function num_shared_account_logins_past_30_days = self.bq_session.query( func.count(self.events.c['type']).label( 'shared_account_logins_past_30_days'), self.events.c['browser_id']).filter( and_( self.events.c['type'] == 'shared_account_login', self.events.c['time'].cast(DATE) <= cast( self.aggregation_time, DATE), self.events.c['time'].cast(DATE) >= cast( self.aggregation_time - timedelta(days=30), DATE))).group_by( self.events.c['browser_id']).subquery() feature_query_w_outcome = self.bq_session.query( feature_query, case([(relevant_events.c['outcome'].in_( self.positive_labels()), relevant_events.c['date'])], else_=(self.aggregation_time + timedelta(days=1)).date()).label('outcome_date'), case([(relevant_events.c['outcome'].in_( self.positive_labels()), relevant_events.c['outcome'])], else_=self.negative_label()).label('outcome'), num_shared_account_logins_past_30_days. c['shared_account_logins_past_30_days']).outerjoin( relevant_events, and_( feature_query.c['browser_id'] == relevant_events.c['browser_id'], feature_query.c['date'] >= func.date_sub( relevant_events.c['date'], text(f'interval {1} day')), feature_query.c['date'] < relevant_events.c['date'])).outerjoin( num_shared_account_logins_past_30_days, num_shared_account_logins_past_30_days.c['browser_id'] == feature_query.c['browser_id']).subquery() return feature_query_w_outcome
def deleteFinalOperations(self, limit=20, deleteDelay=180): """deletes operation in final state that are older than given time :param int limit: number of operations to treat :param int deleteDelay: age of the lastUpdate in days :returns: S_OK/S_ERROR with number of deleted operations """ session = self.dbSession(expire_on_commit=False) try: ftsOps = session.query( FTS3Operation.operationID) .filter( FTS3Operation.lastUpdate < ( func.date_sub( func.utc_timestamp(), text( 'INTERVAL %d DAY' % deleteDelay)))) .filter( FTS3Operation.status.in_( FTS3Operation.FINAL_STATES)) .limit(limit) opIDs = [opTuple[0] for opTuple in ftsOps] rowCount = 0 if opIDs: result = session.execute(delete(FTS3Operation) .where(FTS3Operation.operationID.in_(opIDs))) rowCount = result.rowcount session.commit() session.expunge_all() return S_OK(rowCount) except SQLAlchemyError as e: session.rollback() return S_ERROR("deleteFinalOperations: unexpected exception : %s" % e) finally: session.close()
def deleteFinalOperations(self, limit=20, deleteDelay=180): """deletes operation in final state that are older than given time :param int limit: number of operations to treat :param int deleteDelay: age of the lastUpdate in days :returns: S_OK/S_ERROR with number of deleted operations """ session = self.dbSession(expire_on_commit=False) try: ftsOps = session.query( FTS3Operation.operationID) .filter( FTS3Operation.lastUpdate < ( func.date_sub( func.utc_timestamp(), text( 'INTERVAL %d DAY' % deleteDelay)))) .filter( FTS3Operation.status.in_( FTS3Operation.FINAL_STATES)) .limit(limit) opIDs = [opTuple[0] for opTuple in ftsOps] rowCount = 0 if opIDs: result = session.execute(delete(FTS3Operation) .where(FTS3Operation.operationID.in_(opIDs))) rowCount = result.rowcount session.commit() session.expunge_all() return S_OK(rowCount) except SQLAlchemyError as e: session.rollback() return S_ERROR("deleteFinalOperations: unexpected exception : %s" % e) finally: session.close()
) """ sql_data = [ (d.emp_no, d.birth_date, d.first_name, d.last_name, d.gender, d.hire_date, d.title, d.from_date, d.to_date, d.salary, d.last_salary) for d in session.execute(sql) ] '''使用 sqlalchemy 方式进行查询''' '''方法一:使用 if else 三目运算符''' s1 = aliased(Salary) s2 = aliased(Salary) alchemy_data = session.query(Employee.emp_no, Employee.birth_date, Employee.first_name, Employee.last_name, Employee.gender, Employee.hire_date, Title.title, s1.from_date, s1.to_date, s1.salary, (0 if not session.query(s2.salary).filter(s2.emp_no==Employee.emp_no, func.date_sub(text("date('1997-12-01'), interval 1 year")). between(s2.from_date, s2.to_date)) else session.query(s2.salary). filter(s2.emp_no==Employee.emp_no, func.date_sub(text("date('1997-12-01'), interval 1 year")). between(s2.from_date, s2.to_date))).label("last_salary")).\ filter(Employee.emp_no==s1.emp_no , Title.emp_no==s1.emp_no, or_(Employee.emp_no==10004, Employee.emp_no==10001, Employee.emp_no==10006, Employee.emp_no==10003), func.date('1997-12-01').between(s1.from_date, s1.to_date), func.date('1997-12-01').between(Title.from_date, Title.to_date)).all() '''===============================以下是两种错误方法================================================''' '''方法二:使用 IFNULL 函数,这是一种错误的方法,由于使用 aliased ,在 from 之后将会出现另一条 IFNULL 语句, 数据本身会提示错误——“Every derived table must have its own alias” s1 = aliased(Salary) s2 = aliased(Salary)