def assign_buckets(self, client_id, bucket_cnt):
     #tested
     print("Assigning {} buckets".format(bucket_cnt))
     currtime = datetime.utcnow()
     client_stats = {"last_access_time": currtime, "buckets_assigned": 1}
     state = {
         'assigned_datetime': currtime,
         'bucket_cnt': bucket_cnt,
         'client_id': client_id,
         'client_stats': client_stats
     }
     query = """
         MATCH(bucket:__UndefinedServiceTag__Bucket) WHERE NOT (bucket)-[:__UNDEFINEDSERVICETAG__CLIENT]->()
         WITH bucket, rand() as r ORDER BY r, bucket.priority ASC LIMIT $state.bucket_cnt
         MATCH(:ClientForService {id:$state.client_id})-[:__UNDEFINEDSERVICETAG__CLIENT]->(client:__UndefinedServiceTag__Client)
         MATCH(client)-[:STATS]->(stat:__UndefinedServiceTag__ClientStats)
             SET stat.buckets_assigned = stat.buckets_assigned + $state.client_stats.buckets_assigned,
                 stat.last_access_time = $state.client_stats.last_access_time
         MERGE(bucket)-[:__UNDEFINEDSERVICETAG__CLIENT]->(client)
         WITH bucket SET bucket.assigned_datetime = datetime($state.assigned_datetime)
         return bucket
     """
     query = query.replace("__UNDEFINEDSERVICETAG__",
                           self.service_db_name.upper())
     query = query.replace("__UndefinedServiceTag__", self.service_db_name)
     response_json = execute_query_with_result(query, state=state)
     buckets = [bucket['bucket']['uuid'] for bucket in response_json]
     print("Got {} buckets".format(len(buckets)))
     return buckets
 def detect_n_mark_deadbuckets(self, threshold_hours_elapsed):
     #tested
     print("Marking buckets as dead if last access is more than {} hours".
           format(threshold_hours_elapsed))
     currtime = datetime.utcnow()
     client_stats = {"last_access_time": currtime}
     assigned_datetime_threshold = currtime - timedelta(
         hours=threshold_hours_elapsed)
     state = {
         "dead_datetime": currtime,
         "assigned_datetime_threshold": assigned_datetime_threshold,
         'client_stats': client_stats
     }
     query = """
         MATCH(b:__UndefinedServiceTag__Bucket)-[:__UNDEFINEDSERVICETAG__CLIENT]->(c:__UndefinedServiceTag__Client)-[:STATS]->(stat:__UndefinedServiceTag__ClientStats)
             WHERE datetime(b.assigned_datetime) < datetime($state.assigned_datetime_threshold)
             SET b.dead_datetime = datetime($state.dead_datetime),
                 stat.buckets_dead = stat.buckets_dead + 1,
                 stat.last_access_time = $state.client_stats.last_access_time
             return b.uuid
     """
     query = query.replace("__UNDEFINEDSERVICETAG__",
                           self.service_db_name.upper())
     query = query.replace("__UndefinedServiceTag__", self.service_db_name)
     response_json = execute_query_with_result(query, state=state)
     buckets = [bucket['b.uuid'] for bucket in response_json]
     print("Got {} buckets with UUIDs as {}".format(len(buckets), buckets))
     return buckets
Esempio n. 3
0
 def get_nonprocessed_list(self, max_item_counts):
     #tested
     print("Finding max {} users from DB who is not processed".format(max_item_counts))
     state = {'limit':max_item_counts}
     query = """
         match(u:User)
         where  NOT ()-[:DM_YES|DM_NO|DM_UNKNOWN]->(u) AND NOT (u)-[:INDMCHECKBUCKET]->(:DMCheckBucket)
         return u.screen_name as screen_name LIMIT $state.limit  
     """
     response_json = execute_query_with_result(query, state=state)
     users = [ user['screen_name'] for user in response_json]
     print("Got {} users".format(len(users)))
     return users    
 def is_dead_bucket(self, bucket_id):
     #tested
     print("Checking if {} bucket is dead".format(bucket_id))
     #TODO: Try to generalize it
     state = {"uuid": bucket_id}
     query = """
         MATCH(b:__UndefinedServiceTag__Bucket {uuid:$state.bucket_id})
             WHERE EXISTS(b.dead_datetime)
             return b.uuid
     """
     query = query.replace("__UndefinedServiceTag__", self.service_db_name)
     response_json = execute_query_with_result(query, state=state)
     if response_json:
         return True
     else:
         return False
Esempio n. 5
0
 def __migrate_user(self, old_linkname, new_linkname, limit=10000):
     #tested
     print("Migrating {} users {} link to {} with client".format(limit, old_linkname, new_linkname))
     state = {'dmuser_name':self.dmuser_screen_name, "limit":limit}
     query = """
         match(u:User {screen_name:$state.dmuser_name})-[r1:__OLD_LINK___]->(u2:User)
         DELETE r1
         WITH u LIMIT $state.limit
         merge(c:DMCheckClient {screen_name:"dpkmr"})-[:__NEW_LINK__]->(u)
         return count(u) as usercount
     """
     query = query.replace("__OLD_LINK___", old_linkname)
     query = query.replace("__NEW_LINK__", new_linkname)
     response_json = execute_query_with_result(query, state=state)
     count = response_json[0]['usercount'] 
     print("Migrated {} users".format(count))
     return count  
 def __get_migrate_users(self, old_linkname, new_linkname, limit):
     #tested
     print("Getting {} users {} link to {} with client".format(
         limit, old_linkname, new_linkname))
     state = {'dmuser_name': self.dmuser_screen_name, "limit": limit}
     query = """
         match(dmuser:User {screen_name:$state.dmuser_name})-[r1:__OLD_LINK___]->(user:User)
         return user.screen_name as screen_name LIMIT $state.limit
     """
     query = query.replace("__OLD_LINK___", old_linkname)
     query = query.replace("__NEW_LINK__", new_linkname)
     response_json = execute_query_with_result(query, state=state)
     users = [{
         'screen_name': user['screen_name']
     } for user in response_json]
     print("Got {} users for migration".format(len(users)))
     return users
    def get_all_dead_buckets(self, threshold_mins_elapsed):
        #tested
        print("Getting list of dead buckets for more than {} minutes".format(
            threshold_mins_elapsed))

        currtime = datetime.utcnow()
        dead_datetime_threshold = currtime - timedelta(
            minutes=threshold_mins_elapsed)
        state = {"dead_datetime_threshold": dead_datetime_threshold}
        query = """
            MATCH(b:__UndefinedServiceTag__Bucket)
                WHERE datetime(b.dead_datetime) < datetime($state.dead_datetime_threshold)
                return b.uuid
        """
        query = query.replace("__UndefinedServiceTag__", self.service_db_name)
        response_json = execute_query_with_result(query, state=state)
        buckets = [bucket['b.uuid'] for bucket in response_json]
        print("Got {} buckets".format(len(buckets)))
        return buckets
 def __get_nonprocessed_userlist_with_tweet_post_with_followers_limit(
         self, max_item_counts, check_user_followers_count_limit):
     #tested
     print("Finding max {} users from DB who is not processed".format(
         max_item_counts))
     state = {
         'limit': max_item_counts,
         'check_user_followers_count_limit':
         check_user_followers_count_limit
     }
     query = """
         match(u:User)-[:POSTS]->(t:Tweet)
         match(u) where  u.followers <= $state.check_user_followers_count_limit AND  NOT ()-[:CHECKEDUSERFOLLOWER]->(u) AND NOT (u)-[:INUSERFOLLOWERCHECKBUCKET]->(:UserFollowerCheckBucket)
         return distinct(u.screen_name) as screen_name LIMIT $state.limit  
     """
     response_json = execute_query_with_result(query, state=state)
     users = [user['screen_name'] for user in response_json]
     print("Got {} users".format(len(users)))
     return users
 def get_all_entities_for_bucket(self, bucket_id):
     #tested
     print("Getting users for {} bucket".format(bucket_id))
     #TODO: Check if it is fair assumption that entity is nothing but user
     currtime = datetime.utcnow()
     state = {'edit_datetime': currtime, 'uuid': bucket_id}
     query = """
         MATCH(u:User)-[:IN__UNDEFINEDSERVICETAG__BUCKET]->(b:__UndefinedServiceTag__Bucket {uuid:$state.uuid})
         SET b.edit_datetime = datetime($state.edit_datetime)
         return u.screen_name, u.id
     """
     query = query.replace("__UNDEFINEDSERVICETAG__",
                           self.service_db_name.upper())
     query = query.replace("__UndefinedServiceTag__", self.service_db_name)
     response_json = execute_query_with_result(query, state=state)
     users = [{
         'screen_name': user['u.screen_name'],
         'id': user['u.id']
     } for user in response_json]
     print("Got {} users in {} bucket".format(len(users), bucket_id))
     return users