def extract_related_users(
         self, related_user_list, input_lbsn_type, user_record):
     """Extract related users from user list"""
     for related_user in related_user_list:
         related_record = HF.new_lbsn_record_with_id(lbsn.User(),
                                                     str(related_user),
                                                     self.origin)
         self.lbsn_records.append(related_record)
         # note the switch of order here,
         # direction is important for 'isConnected',
         # and the different list each give us a
         # different view on this relationship
         if input_lbsn_type == 'friendslist':
             relationship_record =\
                 HF.new_lbsn_relation_with_id(lbsn.Relationship(),
                                              user_record.pkey.id,
                                              related_record.pkey.id,
                                              self.origin)
         elif input_lbsn_type == 'followerslist':
             relationship_record = \
                 HF.new_lbsn_relation_with_id(lbsn.Relationship(),
                                              related_record.pkey.id,
                                              user_record.pkey.id,
                                              self.origin)
         relationship_record.relationship_type = \
             lbsn.Relationship.isCONNECTED
         self.lbsn_records.add_relationship_to_dict(
             relationship_record)
 def __init__(self, relationship=None):
     if relationship is None:
         relationship = lbsn.Relationship()
     self.origin_id = relationship.pkey.relation_to.origin.origin_id
     self.guid = relationship.pkey.relation_to.id
     self.guid_rel = relationship.pkey.relation_from.id
     self.rel_type = HF.null_check(
         lbsn.Relationship().RelationshipType.Name(
             relationship.relationship_type)).lower()
 def dict_type_switcher(desc_name):
     """ Create protoBuf messages by name"""
     dict_switcher = {
         lbsn.Country().DESCRIPTOR.name: lbsn.Country(),
         lbsn.City().DESCRIPTOR.name: lbsn.City(),
         lbsn.Place().DESCRIPTOR.name: lbsn.Place(),
         lbsn.User().DESCRIPTOR.name: lbsn.User(),
         lbsn.UserGroup().DESCRIPTOR.name: lbsn.UserGroup(),
         lbsn.Post().DESCRIPTOR.name: lbsn.Post(),
         lbsn.PostReaction().DESCRIPTOR.name: lbsn.PostReaction(),
         lbsn.Relationship().DESCRIPTOR.name: lbsn.Relationship()
     }
     return dict_switcher.get(desc_name)
    def update_key_hash(self, record, key=None):
        """Update key-hash with record

        Keep lists of pkeys for each type
        this can be used to check for duplicates or to get a
        total count for each type of records (Number of unique
        Users, Countries, Places etc.)
        in this case we assume that origin_id remains the same
        in each program iteration!
        """
        if key is not None:
            self.key_hashes[record.DESCRIPTOR.name].add(key)
            return
        if record.DESCRIPTOR.name == lbsn.Relationship().DESCRIPTOR.name:
            # we need the complete uuid of both entities for
            # relationships because they can span different origin_ids
            self.key_hashes[record.DESCRIPTOR.name].add(
                f'{record.pkey.relation_to.origin.origin_id}'
                f'{record.pkey.relation_to.id}'
                f'{record.pkey.relation_from.origin.origin_id}'
                f'{record.pkey.relation_from.id}'
                f'{record.relationship_type}')
        else:
            # all other entities can be globally uniquely
            # identified by their local guid
            self.key_hashes[record.DESCRIPTOR.name].add(record.pkey.id)
 def extract_mentioned_users(self, ref_user_records, user_record_id):
     for mentioned_user_record in ref_user_records:
         relation_record = \
             HF.new_lbsn_relation_with_id(lbsn.Relationship(),
                                          user_record_id,
                                          mentioned_user_record.pkey.id,
                                          self.origin)
         relation_record.relationship_type = \
             lbsn.Relationship.MENTIONS_USER
         self.lbsn_records.append(
             relation_record)
 def extract_mentioned_users(self, ref_user_records, user_record_id):
     """Extract mentioned user from ref user records list"""
     for mentioned_user_record in ref_user_records:
         relation_record = \
             HF.new_lbsn_relation_with_id(lbsn.Relationship(),
                                          user_record_id,
                                          mentioned_user_record.pkey.id,
                                          self.origin)
         relation_record.relationship_type = \
             lbsn.Relationship.MENTIONS_USER
         self.lbsn_records.add_relationship_to_dict(
             relation_record)
Exemple #7
0
 def get_hll_metrics(cls, record) -> hll.HllMetrics:
     """Extracts hll metrics based on record type"""
     dict_switcher = {
         lbsn.Origin().DESCRIPTOR.name: cls.get_origin_metrics,
         lbsn.Country().DESCRIPTOR.name: cls.get_country_metrics,
         lbsn.City().DESCRIPTOR.name: cls.get_city_metrics,
         lbsn.Place().DESCRIPTOR.name: cls.get_place_metrics,
         lbsn.User().DESCRIPTOR.name: cls.get_user_metrics,
         lbsn.UserGroup().DESCRIPTOR.name: cls.get_usergroup_metrics,
         lbsn.Post().DESCRIPTOR.name: cls.get_post_metrics,
         lbsn.PostReaction().DESCRIPTOR.name: cls.get_postreaction_metrics,
         lbsn.Relationship().DESCRIPTOR.name: cls.get_relationship_metrics
     }
     extract_function = dict_switcher.get(record.DESCRIPTOR.name)
     record_hll_metrics = extract_function(record)
     return record_hll_metrics
 def func_prepare_selector(self, record):
     """Select correct prepare function according to record type"""
     dict_switcher = {
         lbsn.Origin().DESCRIPTOR.name: self.prepare_lbsn_origin,
         lbsn.Country().DESCRIPTOR.name: self.prepare_lbsn_country,
         lbsn.City().DESCRIPTOR.name: self.prepare_lbsn_city,
         lbsn.Place().DESCRIPTOR.name: self.prepare_lbsn_place,
         lbsn.User().DESCRIPTOR.name: self.prepare_lbsn_user,
         lbsn.UserGroup().DESCRIPTOR.name: self.prepare_lbsn_usergroup,
         lbsn.Post().DESCRIPTOR.name: self.prepare_lbsn_post,
         lbsn.Event().DESCRIPTOR.name: self.prepare_lbsn_event,
         lbsn.PostReaction().DESCRIPTOR.name:
         self.prepare_lbsn_postreaction,
         lbsn.Relationship().DESCRIPTOR.name: self.prepare_lbsn_relation
     }
     prepare_function = dict_switcher.get(record.DESCRIPTOR.name)
     return prepare_function(record)
 def __init__(self):
     self.lbsn_origin_dict = dict()
     self.lbsn_country_dict = dict()
     self.lbsn_city_dict = dict()
     self.lbsn_place_dict = dict()
     self.lbsn_user_group_dict = dict()
     self.lbsn_user_dict = dict()
     self.lbsn_post_dict = dict()
     self.lbsn_post_reaction_dict = dict()
     self.lbsn_relationship_dict = dict()
     self.key_hashes = {
         lbsn.Origin.DESCRIPTOR.name: set(),
         lbsn.Post.DESCRIPTOR.name: set(),
         lbsn.Country.DESCRIPTOR.name: set(),
         lbsn.City.DESCRIPTOR.name: set(),
         lbsn.Place.DESCRIPTOR.name: set(),
         lbsn.UserGroup.DESCRIPTOR.name: set(),
         lbsn.User.DESCRIPTOR.name: set(),
         lbsn.PostReaction.DESCRIPTOR.name: set(),
         lbsn.Relationship.DESCRIPTOR.name: set()
     }
     self.count_glob = 0  # total number of records added
     self.count_glob_total = 0
     self.count_dup_merge = 0  # number of duplicate records merged
     self.count_dup_merge_total = 0
     # returns all recordsDicts in correct order,
     # with names as references (tuple)
     self.all_dicts = [
         (self.lbsn_origin_dict, lbsn.Origin().DESCRIPTOR.name),
         (self.lbsn_country_dict, lbsn.Country().DESCRIPTOR.name),
         (self.lbsn_city_dict, lbsn.City().DESCRIPTOR.name),
         (self.lbsn_place_dict, lbsn.Place().DESCRIPTOR.name),
         (self.lbsn_user_group_dict, lbsn.UserGroup().DESCRIPTOR.name),
         (self.lbsn_user_dict, lbsn.User().DESCRIPTOR.name),
         (self.lbsn_post_dict, lbsn.Post().DESCRIPTOR.name),
         (self.lbsn_post_reaction_dict,
          lbsn.PostReaction().DESCRIPTOR.name),
         (self.lbsn_relationship_dict, lbsn.Relationship().DESCRIPTOR.name)
     ]
    def submit_lbsn_relationships(self):
        """submit relationships of different types

        record[1] is the PostgresQL formatted list of values,
        record[0] is the type of relationship that determines
            the table selection
        """
        select_friends = [
            relationship[1] for relationship in self.batched_lbsn_records[
                lbsn.Relationship().DESCRIPTOR.name]
            if relationship[0] == "isfriend"
        ]
        if select_friends:
            if self.store_csv:
                self.csv_output.store_append_batch_to_csv(
                    select_friends, self.count_round, '_user_friends_user')
            if self.db_cursor:
                args_isfriend = ','.join(select_friends)
                insert_sql = \
                    f'''
                    INSERT INTO interlinkage."_user_friends_user" (
                        {self.typeNamesHeaderDict["_user_friends_user"]})
                    VALUES {args_isfriend}
                    ON CONFLICT (origin_id, user_guid, friend_guid)
                    DO NOTHING
                    '''
                self.submit_batch(insert_sql)
        select_connected = [
            relationship[1] for relationship in self.batched_lbsn_records[
                lbsn.Relationship().DESCRIPTOR.name]
            if relationship[0] == "isconnected"
        ]
        if select_connected:
            if self.store_csv:
                self.csv_output.store_append_batch_to_csv(
                    select_connected, self.count_round,
                    '_user_connectsto_user')
            if self.db_cursor:
                args_isconnected = ','.join(select_connected)
                insert_sql = \
                    f'''
                        INSERT INTO interlinkage."_user_connectsto_user" (
                            {self.typeNamesHeaderDict["_user_connectsto_user"]})
                        VALUES {args_isconnected}
                        ON CONFLICT (origin_id, user_guid,
                            connectedto_user_guid)
                        DO NOTHING
                    '''
                self.submit_batch(insert_sql)
        select_usergroupmember = [
            relationship[1] for relationship in self.batched_lbsn_records[
                lbsn.Relationship().DESCRIPTOR.name]
            if relationship[0] == "ingroup"
        ]
        if select_usergroupmember:
            if self.store_csv:
                self.csv_output.store_append_batch_to_csv(
                    select_usergroupmember, self.count_round,
                    '_user_memberof_group')
            if self.db_cursor:
                args_isingroup = ','.join(select_usergroupmember)
                insert_sql = \
                    f'''
                    INSERT INTO interlinkage."_user_memberof_group" (
                        {self.typeNamesHeaderDict["_user_memberof_group"]})
                    VALUES {args_isingroup}
                    ON CONFLICT (origin_id, user_guid, group_guid)
                    DO NOTHING
                    '''
                self.submit_batch(insert_sql)
        select_usergroupmember = [
            relationship[1] for relationship in self.batched_lbsn_records[
                lbsn.Relationship().DESCRIPTOR.name]
            if relationship[0] == "followsgroup"
        ]
        if select_usergroupmember:
            if self.store_csv:
                self.csv_output.store_append_batch_to_csv(
                    select_usergroupmember, self.count_round,
                    '_user_follows_group')
            if self.db_cursor:
                args_isingroup = ','.join(select_usergroupmember)
                insert_sql = \
                    f'''
                    INSERT INTO interlinkage."_user_follows_group" (
                        {self.typeNamesHeaderDict["_user_follows_group"]})
                    VALUES {args_isingroup}
                    ON CONFLICT (origin_id, user_guid, group_guid)
                    DO NOTHING
                    '''
                self.submit_batch(insert_sql)
        select_usermentions = [
            relationship[1] for relationship in self.batched_lbsn_records[
                lbsn.Relationship().DESCRIPTOR.name]
            if relationship[0] == "mentions_user"
        ]
        if select_usermentions:
            if self.store_csv:
                self.csv_output.store_append_batch_to_csv(
                    select_usermentions, self.count_round,
                    '_user_mentions_user')
            if self.db_cursor:
                args_isingroup = ','.join(select_usermentions)
                insert_sql = \
                    f'''
                    INSERT INTO interlinkage."_user_mentions_user" (
                        {self.typeNamesHeaderDict["_user_mentions_user"]})
                    VALUES {args_isingroup}
                    ON CONFLICT (origin_id, user_guid, mentioneduser_guid)
                    DO NOTHING
                    '''
                self.submit_batch(insert_sql)