def _create_author_by_row(self, record_dict):
        author = Author()

        author_osn_id = self._convert_to_unicode_value(record_dict["tumblog_id"])
        author.author_osn_id = author_osn_id
        author.name = author_osn_id

        author.domain = self._domain
        author.author_guid = compute_author_guid_by_author_name(author.name)

        tumblr_blog_name = self._convert_to_unicode_value(record_dict["tumblr_blog_name"])
        author.author_screen_name = tumblr_blog_name

        author.description = self._convert_to_unicode_value(record_dict["tumblr_blog_description"])
        created_time_epoch = self._convert_to_unicode_value(record_dict["created_time_epoch"])
        if created_time_epoch is not None:
            datetime, str_datetime = convert_epoch_timestamp_to_datetime(created_time_epoch)
            author.created_at = str_datetime
        else:
            author.created_at = self._set_start_date()

        author.url = self._convert_to_unicode_value(record_dict["tumblr_blog_url"])
        author.protected = get_boolean_value(record_dict["is_private"])
        author.time_zone = self._convert_to_unicode_value(record_dict["timezone"])
        author.language = self._convert_to_unicode_value(record_dict["language"])

        is_private = record_dict["is_private"]
        if is_private == "TRUE":
            author.protected = 1
        else:
            author.protected = 0

        return author
예제 #2
0
    def setUp(self):
        self.config = getConfig()
        self._db = DB()
        self._db.setUp()
        self.timeline_overlap = TimelineOverlapVisualizationGenerator()

        author1 = Author()
        author1.name = 'acquired_user'
        author1.domain = 'Microblog'
        author1.author_guid = 'acquired_user'
        author1.author_screen_name = 'acquired_user'
        author1.author_full_name = 'acquired_user'
        author1.author_osn_id = 1
        author1.created_at = datetime.datetime.now()
        author1.missing_data_complementor_insertion_date = datetime.datetime.now(
        )
        author1.xml_importer_insertion_date = datetime.datetime.now()
        author1.author_type = 'bad_actor'
        author1.author_sub_type = 'acquired'
        self._db.add_author(author1)

        for i in range(1, 11):
            post1 = Post()
            post1.post_id = 'bad_post' + str(i)
            post1.author = 'acquired_user'
            post1.guid = 'bad_post' + str(i)
            post1.date = datetime.datetime.now()
            post1.domain = 'Microblog'
            post1.author_guid = 'acquired_user'
            post1.content = 'InternetTV love it' + str(i)
            post1.xml_importer_insertion_date = datetime.datetime.now()
            self._db.addPost(post1)

        author = Author()
        author.name = 'TestUser1'
        author.domain = 'Microblog'
        author.author_guid = 'TestUser1'
        author.author_screen_name = 'TestUser1'
        author.author_full_name = 'TestUser1'
        author.author_osn_id = 2
        author.created_at = datetime.datetime.now()
        author.missing_data_complementor_insertion_date = datetime.datetime.now(
        )
        author.xml_importer_insertion_date = datetime.datetime.now()
        self._db.add_author(author)

        for i in range(1, 11):
            post = Post()
            post.post_id = 'TestPost' + str(i)
            post.author = 'TestUser1'
            post.guid = 'TestPost' + str(i)
            post.date = datetime.datetime.now()
            post.domain = 'Microblog'
            post.author_guid = 'TestUser1'
            post.content = 'InternetTV love it' + str(i)
            post.xml_importer_insertion_date = datetime.datetime.now()
            self._db.addPost(post)

        self._db.commit()
    def _get_source_id_source_element_dict(self, source_ids, targeted_fields_dict):
        source_id_source_element_dict = defaultdict()

        source_table_name = targeted_fields_dict['source']['table_name']
        source_table_id = targeted_fields_dict['source']['id']
        elements = self._db.get_table_elements_by_where_cluases(source_table_name, [])
        author_guid_author_dict = self._db.get_author_dictionary()
        id_set = set(source_ids)
        for temp_author in elements:
            source_id = getattr(temp_author, source_table_id)
            if source_id not in id_set or source_id is None:
                continue
            if isinstance(temp_author, Author):
                author = author_guid_author_dict[temp_author.author_guid]
            elif source_table_id == u"author_guid":
                author = author_guid_author_dict[source_id]
            elif hasattr(temp_author, u"author_guid"):
                author = author_guid_author_dict[getattr(temp_author, u"author_guid")]
            else:
                author = Author()
                author.author_guid = source_id
                author.statuses_count = len(targeted_fields_dict)
                if hasattr(temp_author, 'created_at'):
                    author.created_at = temp_author.created_at
            source = author
            source_id_source_element_dict[source_id] = source
        return source_id_source_element_dict
예제 #4
0
    def _parse_author(self, json_content):
        author = Author()

        post_id = json_content["id"]
        author.name = post_id
        author.author_screen_name = post_id
        author.author_osn_id = post_id
        author.domain = self._domain

        str_post_timestamp = json_content["postTimestamp"]
        post_timestamp, str_post_date = self._get_str_and_date_formats(
            str_post_timestamp)
        author.created_at = str(str_post_date)

        post_guid = compute_post_guid(self._social_network_url, post_id,
                                      str_post_date)
        author.author_guid = post_guid

        if post_id in self._post_id_targeted_class_dict:
            targeted_class = self._post_id_targeted_class_dict[post_id]
            author.author_type = targeted_class

        post_media = json_content["postMedia"]
        if len(post_media) > 0:
            author.media_path = post_media[0]
        else:
            author.media_path = None

        author.notifications = self._dataset_type

        return author
예제 #5
0
 def _create_defult_author(self, source_id, targeted_fields_dict,
                           temp_author):
     author = Author()
     author.author_guid = source_id
     author.statuses_count = len(targeted_fields_dict)
     if hasattr(temp_author, 'date'):
         author.created_at = getattr(temp_author, 'date')
     return author
 def _add_author(self, author_guid):
     author = Author()
     author.author_guid = author_guid
     author.author_full_name = u'test author'
     author.author_screen_name = author_guid
     author.name = u'test'
     author.domain = u'tests'
     author.statuses_count = 0
     author.created_at = u"2017-06-14 05:00:00"
     # self._db.add_author(author)
     self._author = author
예제 #7
0
 def _convert_reddit_author_to_author(self, redditor):
     author = Author()
     author.name = getattr(redditor, 'name', '')
     author.author_screen_name = author.name
     author.author_guid = compute_author_guid_by_author_name(author.name)
     author.domain = u'reddit'
     author.created_at = datetime.fromtimestamp(getattr(redditor, 'created_utc', 0))
     author.author_osn_id = getattr(redditor, 'id', '')
     author.author_full_name = getattr(redditor, 'fullname', '')
     author.url = u'https://www.reddit.com/user/' + redditor.name
     return author
 def liker_to_author(self, liker, photo_id):
     author = Author()
     author.name = str(liker['username'])
     author.author_full_name = str(liker.get('realname', ""))
     author.domain = str(photo_id)
     author.created_at = str(liker["favedate"])
     author.author_osn_id = str(liker['nsid'])
     author.author_guid = compute_author_guid_by_author_name(
         author.author_osn_id)
     author.author_type = 'like'
     return author
 def commenter_to_author(self, commenter, photo_id):
     author = Author()
     author.name = str(commenter['authorname'])
     author.author_screen_name = str(commenter['path_alias'])
     author.author_full_name = str(commenter.get('realname', ""))
     author.url = str(commenter['permalink'])
     author.domain = str(photo_id)
     author.created_at = str(commenter["datecreate"])
     author.author_osn_id = str(commenter['author'])
     author.author_guid = compute_author_guid_by_author_name(
         author.author_osn_id)
     author.author_type = 'comment'
     return author
 def owner_to_author(self, jsonresponse):
     author = Author()
     author.name = str(jsonresponse['photo']['owner']['username'])
     author.author_full_name = str(
         jsonresponse['photo']['owner']['realname'])
     author.domain = str(jsonresponse['photo']['id'])
     author.created_at = str(jsonresponse['photo']['dates']["posted"])
     author.author_osn_id = str(jsonresponse['photo']['owner']['nsid'])
     author.author_guid = compute_author_guid_by_author_name(
         author.author_osn_id)
     author.location = str(jsonresponse['photo']['owner']['location'])
     author.author_type = 'owner'
     return author
 def parse_row(self, row):
     try:
         author = Author()
         author.author_type = self.parse_type(row['Risk'])
         if author.author_type == -1:
             return None
         author.author_full_name = unicode(row['AKA Name']).encode('ascii', 'ignore').decode('ascii')
         author.location = unicode(row['City'] + ' ' + row['Address'])
         author.geo_enabled = unicode(row['Location'])
         author.name=row['DBA Name']
         author.created_at = unicode(row['Inspection Date'])
         return author
     except:
         return None
 def parse_row(self, row):
     try:
         author = Author()
         author.author_type = self.parse_type(row['risk_category'])
         if author.author_type == -1:
             return None
         author.author_full_name = unicode(row['business_name']).encode('ascii', 'ignore').decode('ascii')
         author.name = unicode(author.author_full_name).encode('ascii', 'ignore').decode('ascii')
         author.location = unicode(row['business_city'] + ' ' + row['business_address'])
         author.geo_enabled = unicode(row['business_location'])
         if author.geo_enabled is None or author.geo_enabled == "":
             return None
         author.created_at = unicode(row['inspection_date'])
         return author
     except:
         logging.info("error with row:"+ str(row))
         return None
    def setUp(self):
        self._db = DB()
        self._db.setUp()
        self.author_guid = u"author_guid"

        author = Author()
        author.author_guid = self.author_guid
        author.author_full_name = u'author'
        author.name = u'author_name'
        author.author_screen_name = u'author_screen_name'
        author.domain = u'Microblog'
        author.statuses_count = 10
        author.friends_count = 5
        author.followers_count = 6
        author.favourites_count = 8
        author.author_sub_type = u"bot"
        author.author_type = u"bad"
        author.created_at = u"2017-06-17 05:00:00"
        author.default_profile = True
        author.default_profile_image = True
        author.verified = True
        self._db.add_author(author)

        post = Post()
        post.author = self.author_guid
        post.author_guid = self.author_guid
        post.content = u"content"
        post.title = u"title"
        post.domain = u"domain"
        post.post_id = u"post_id"
        post.guid = post.post_id
        post.date = convert_str_to_unicode_datetime("2017-06-14 05:00:00")
        post.created_at = post.date
        self._db.addPost(post)

        self._db.session.commit()
        self.feature_prefix = u"AccountPropertiesFeatureGenerator_"
        self.account_properties_feature_generator = AccountPropertiesFeatureGenerator(
            self._db, **{
                'authors': [author],
                'posts': {
                    self.author_guid: [post]
                }
            })
        self.account_properties_feature_generator.execute()
 def _convert_source_to_author(self, source_id, targeted_fields_dict):
     source_table_name = targeted_fields_dict['source']['table_name']
     source_table_id = targeted_fields_dict['source']['id']
     elements = self._db.get_table_elements_by_ids(source_table_name, source_table_id, [source_id])
     temp_author = elements[0]
     if isinstance(temp_author, Author):
         author = temp_author
     elif source_table_id == u"author_guid":
         author = self._db.get_author_by_author_guid(source_id)
     elif hasattr(temp_author, u"author_guid"):
         author = self._db.get_author_by_author_guid(getattr(temp_author, u"author_guid"))
     else:
         author = Author()
         author.author_guid = source_id
         author.statuses_count = len(targeted_fields_dict)
         if hasattr(temp_author, 'created_at'):
             author.created_at = temp_author.created_at
     return author