def assign_buckets(self, client_id, bucket_cnt): #tested print("Assigning {} buckets".format(bucket_cnt)) currtime = datetime.utcnow() client_stats = {"last_access_time": currtime, "buckets_assigned": 1} state = { 'assigned_datetime': currtime, 'bucket_cnt': bucket_cnt, 'client_id': client_id, 'client_stats': client_stats } query = """ MATCH(bucket:__UndefinedServiceTag__Bucket) WHERE NOT (bucket)-[:__UNDEFINEDSERVICETAG__CLIENT]->() WITH bucket, rand() as r ORDER BY r, bucket.priority ASC LIMIT $state.bucket_cnt MATCH(:ClientForService {id:$state.client_id})-[:__UNDEFINEDSERVICETAG__CLIENT]->(client:__UndefinedServiceTag__Client) MATCH(client)-[:STATS]->(stat:__UndefinedServiceTag__ClientStats) SET stat.buckets_assigned = stat.buckets_assigned + $state.client_stats.buckets_assigned, stat.last_access_time = $state.client_stats.last_access_time MERGE(bucket)-[:__UNDEFINEDSERVICETAG__CLIENT]->(client) WITH bucket SET bucket.assigned_datetime = datetime($state.assigned_datetime) return bucket """ query = query.replace("__UNDEFINEDSERVICETAG__", self.service_db_name.upper()) query = query.replace("__UndefinedServiceTag__", self.service_db_name) response_json = execute_query_with_result(query, state=state) buckets = [bucket['bucket']['uuid'] for bucket in response_json] print("Got {} buckets".format(len(buckets))) return buckets
def detect_n_mark_deadbuckets(self, threshold_hours_elapsed): #tested print("Marking buckets as dead if last access is more than {} hours". format(threshold_hours_elapsed)) currtime = datetime.utcnow() client_stats = {"last_access_time": currtime} assigned_datetime_threshold = currtime - timedelta( hours=threshold_hours_elapsed) state = { "dead_datetime": currtime, "assigned_datetime_threshold": assigned_datetime_threshold, 'client_stats': client_stats } query = """ MATCH(b:__UndefinedServiceTag__Bucket)-[:__UNDEFINEDSERVICETAG__CLIENT]->(c:__UndefinedServiceTag__Client)-[:STATS]->(stat:__UndefinedServiceTag__ClientStats) WHERE datetime(b.assigned_datetime) < datetime($state.assigned_datetime_threshold) SET b.dead_datetime = datetime($state.dead_datetime), stat.buckets_dead = stat.buckets_dead + 1, stat.last_access_time = $state.client_stats.last_access_time return b.uuid """ query = query.replace("__UNDEFINEDSERVICETAG__", self.service_db_name.upper()) query = query.replace("__UndefinedServiceTag__", self.service_db_name) response_json = execute_query_with_result(query, state=state) buckets = [bucket['b.uuid'] for bucket in response_json] print("Got {} buckets with UUIDs as {}".format(len(buckets), buckets)) return buckets
def get_nonprocessed_list(self, max_item_counts): #tested print("Finding max {} users from DB who is not processed".format(max_item_counts)) state = {'limit':max_item_counts} query = """ match(u:User) where NOT ()-[:DM_YES|DM_NO|DM_UNKNOWN]->(u) AND NOT (u)-[:INDMCHECKBUCKET]->(:DMCheckBucket) return u.screen_name as screen_name LIMIT $state.limit """ response_json = execute_query_with_result(query, state=state) users = [ user['screen_name'] for user in response_json] print("Got {} users".format(len(users))) return users
def is_dead_bucket(self, bucket_id): #tested print("Checking if {} bucket is dead".format(bucket_id)) #TODO: Try to generalize it state = {"uuid": bucket_id} query = """ MATCH(b:__UndefinedServiceTag__Bucket {uuid:$state.bucket_id}) WHERE EXISTS(b.dead_datetime) return b.uuid """ query = query.replace("__UndefinedServiceTag__", self.service_db_name) response_json = execute_query_with_result(query, state=state) if response_json: return True else: return False
def __migrate_user(self, old_linkname, new_linkname, limit=10000): #tested print("Migrating {} users {} link to {} with client".format(limit, old_linkname, new_linkname)) state = {'dmuser_name':self.dmuser_screen_name, "limit":limit} query = """ match(u:User {screen_name:$state.dmuser_name})-[r1:__OLD_LINK___]->(u2:User) DELETE r1 WITH u LIMIT $state.limit merge(c:DMCheckClient {screen_name:"dpkmr"})-[:__NEW_LINK__]->(u) return count(u) as usercount """ query = query.replace("__OLD_LINK___", old_linkname) query = query.replace("__NEW_LINK__", new_linkname) response_json = execute_query_with_result(query, state=state) count = response_json[0]['usercount'] print("Migrated {} users".format(count)) return count
def __get_migrate_users(self, old_linkname, new_linkname, limit): #tested print("Getting {} users {} link to {} with client".format( limit, old_linkname, new_linkname)) state = {'dmuser_name': self.dmuser_screen_name, "limit": limit} query = """ match(dmuser:User {screen_name:$state.dmuser_name})-[r1:__OLD_LINK___]->(user:User) return user.screen_name as screen_name LIMIT $state.limit """ query = query.replace("__OLD_LINK___", old_linkname) query = query.replace("__NEW_LINK__", new_linkname) response_json = execute_query_with_result(query, state=state) users = [{ 'screen_name': user['screen_name'] } for user in response_json] print("Got {} users for migration".format(len(users))) return users
def get_all_dead_buckets(self, threshold_mins_elapsed): #tested print("Getting list of dead buckets for more than {} minutes".format( threshold_mins_elapsed)) currtime = datetime.utcnow() dead_datetime_threshold = currtime - timedelta( minutes=threshold_mins_elapsed) state = {"dead_datetime_threshold": dead_datetime_threshold} query = """ MATCH(b:__UndefinedServiceTag__Bucket) WHERE datetime(b.dead_datetime) < datetime($state.dead_datetime_threshold) return b.uuid """ query = query.replace("__UndefinedServiceTag__", self.service_db_name) response_json = execute_query_with_result(query, state=state) buckets = [bucket['b.uuid'] for bucket in response_json] print("Got {} buckets".format(len(buckets))) return buckets
def __get_nonprocessed_userlist_with_tweet_post_with_followers_limit( self, max_item_counts, check_user_followers_count_limit): #tested print("Finding max {} users from DB who is not processed".format( max_item_counts)) state = { 'limit': max_item_counts, 'check_user_followers_count_limit': check_user_followers_count_limit } query = """ match(u:User)-[:POSTS]->(t:Tweet) match(u) where u.followers <= $state.check_user_followers_count_limit AND NOT ()-[:CHECKEDUSERFOLLOWER]->(u) AND NOT (u)-[:INUSERFOLLOWERCHECKBUCKET]->(:UserFollowerCheckBucket) return distinct(u.screen_name) as screen_name LIMIT $state.limit """ response_json = execute_query_with_result(query, state=state) users = [user['screen_name'] for user in response_json] print("Got {} users".format(len(users))) return users
def get_all_entities_for_bucket(self, bucket_id): #tested print("Getting users for {} bucket".format(bucket_id)) #TODO: Check if it is fair assumption that entity is nothing but user currtime = datetime.utcnow() state = {'edit_datetime': currtime, 'uuid': bucket_id} query = """ MATCH(u:User)-[:IN__UNDEFINEDSERVICETAG__BUCKET]->(b:__UndefinedServiceTag__Bucket {uuid:$state.uuid}) SET b.edit_datetime = datetime($state.edit_datetime) return u.screen_name, u.id """ query = query.replace("__UNDEFINEDSERVICETAG__", self.service_db_name.upper()) query = query.replace("__UndefinedServiceTag__", self.service_db_name) response_json = execute_query_with_result(query, state=state) users = [{ 'screen_name': user['u.screen_name'], 'id': user['u.id'] } for user in response_json] print("Got {} users in {} bucket".format(len(users), bucket_id)) return users