コード例 #1
0
    def setUp(self):
        self.config = getConfig()
        self._db = DB()
        self._db.setUp()
        self.timeline_overlap = TimelineOverlapVisualizationGenerator()

        author1 = Author()
        author1.name = 'acquired_user'
        author1.domain = 'Microblog'
        author1.author_guid = 'acquired_user'
        author1.author_screen_name = 'acquired_user'
        author1.author_full_name = 'acquired_user'
        author1.author_osn_id = 1
        author1.created_at = datetime.datetime.now()
        author1.missing_data_complementor_insertion_date = datetime.datetime.now(
        )
        author1.xml_importer_insertion_date = datetime.datetime.now()
        author1.author_type = 'bad_actor'
        author1.author_sub_type = 'acquired'
        self._db.add_author(author1)

        for i in range(1, 11):
            post1 = Post()
            post1.post_id = 'bad_post' + str(i)
            post1.author = 'acquired_user'
            post1.guid = 'bad_post' + str(i)
            post1.date = datetime.datetime.now()
            post1.domain = 'Microblog'
            post1.author_guid = 'acquired_user'
            post1.content = 'InternetTV love it' + str(i)
            post1.xml_importer_insertion_date = datetime.datetime.now()
            self._db.addPost(post1)

        author = Author()
        author.name = 'TestUser1'
        author.domain = 'Microblog'
        author.author_guid = 'TestUser1'
        author.author_screen_name = 'TestUser1'
        author.author_full_name = 'TestUser1'
        author.author_osn_id = 2
        author.created_at = datetime.datetime.now()
        author.missing_data_complementor_insertion_date = datetime.datetime.now(
        )
        author.xml_importer_insertion_date = datetime.datetime.now()
        self._db.add_author(author)

        for i in range(1, 11):
            post = Post()
            post.post_id = 'TestPost' + str(i)
            post.author = 'TestUser1'
            post.guid = 'TestPost' + str(i)
            post.date = datetime.datetime.now()
            post.domain = 'Microblog'
            post.author_guid = 'TestUser1'
            post.content = 'InternetTV love it' + str(i)
            post.xml_importer_insertion_date = datetime.datetime.now()
            self._db.addPost(post)

        self._db.commit()
    def _convert_tweet_to_post(self, tweet, post_type):
        post = Post()

        post.post_osn_id = unicode(tweet.id)
        post_creation_date = tweet.date
        created_at = unicode(date_to_str(post_creation_date))
        post.created_at = created_at

        post.date = post_creation_date
        post.favorite_count = tweet.favorites
        post.retweet_count = tweet.retweets
        post.content = unicode(tweet.text)

        author_name = unicode(tweet.username)
        post.author = author_name
        # post.author_guid = compute_author_guid_by_author_name(author_name)
        post_url = tweet.permalink
        post.url = unicode(post_url)

        post_guid = compute_post_guid(post_url, author_name, created_at)
        post.guid = post_guid
        post.post_id = post_guid
        post.domain = self._domain

        post.post_type = post_type
        return post
コード例 #3
0
    def _parse_post(self, json_content):
        post = Post()
        content = json_content['postText'][0]
        post.content = content

        str_post_timestamp = json_content["postTimestamp"]
        post.created_at = str(str_post_timestamp)

        # post_timestamp = dateutil.parser.parse(str_post_timestamp)
        # str_post_date = date_to_str(post_timestamp)

        post_timestamp, str_post_date = self._get_str_and_date_formats(
            str_post_timestamp)
        post.date = post_timestamp

        post_media = json_content["postMedia"]
        if len(post_media) > 0:
            post.media_path = post_media[0]
        else:
            post.media_path = None

        post_id = json_content["id"]
        post.post_id = post_id

        post.author = post_id

        post_guid = compute_post_guid(self._social_network_url, post_id,
                                      str_post_date)
        post.guid = post_guid
        post.domain = self._domain
        post.author_guid = post_guid
        post.post_osn_guid = post_guid
        return post
コード例 #4
0
    def _convert_row_to_post(self, row):
        post = Post()

        claim_id = unicode(row['claim_id'])
        title = unicode(row['title'], errors='replace')
        post.content = title

        description = unicode(row['description'], errors='replace')
        post.description = description

        url = unicode(row['url'])
        post.url = url

        publication_date = row['publication_date']
        post.date = date(publication_date)

        post_guid = compute_post_guid(self._social_network_url, claim_id, publication_date)
        post.guid = post_guid
        post.post_id = post_guid
        post.domain = self._domain
        post.author = self._author_name
        author_guid = compute_author_guid_by_author_name(self._author_name)
        post.author_guid = author_guid
        post.post_osn_guid = post_guid

        keywords = unicode(row['keywords'])
        post.tags = keywords

        post_type = unicode(row['post_type'])
        post.post_type = post_type

        return post
コード例 #5
0
    def _add_post(self, author, date, post_osn_id, score=0, upvote_ratio=-1):
        post = Post()
        post.post_osn_id = post_osn_id
        post.author = str(author)
        post.author_guid = compute_author_guid_by_author_name(post.author)
        post.created_at = str_to_date(date, formate="%d/%m/%Y %H:%M")
        post.url = 'https://www.reddit.com{}'.format(
            post.author)  # just for test
        post.guid = compute_post_guid(post.url, post.post_osn_id,
                                      date_to_str(post.created_at))
        post.domain = 'reddit_comment'
        post.post_type = 'reddit_comment'
        post.post_id = post.guid

        reddit_post = RedditPost()
        reddit_post.post_id = post.post_id
        reddit_post.guid = post.guid
        reddit_post.score = score
        if upvote_ratio != -1:
            post.domain = 'reddit_post'
            post.post_type = 'reddit_post'
            reddit_post.upvote_ratio = upvote_ratio
            reddit_post.ups = int(
                round((reddit_post.upvote_ratio * reddit_post.score) /
                      (2 * reddit_post.upvote_ratio - 1)) if
                reddit_post.upvote_ratio != 0.5 else round(reddit_post.score /
                                                           2))
            reddit_post.downs = reddit_post.ups - reddit_post.score
        else:
            reddit_post.ups = -1
            reddit_post.downs = -1
            reddit_post.upvote_ratio = -1

        self._db.addPosts([post, reddit_post])
        return post, reddit_post
コード例 #6
0
    def _convert_row_to_post(self, row):
        # [site, social_id, username_hash, comment_time, comment_tokens]
        print("\rInsert post to DataFrame {0}/{1}".format(
            self._current_row, len(self.posts_csv_df)),
              end="")
        self._current_row += 1
        date = datetime.datetime.fromtimestamp(row['comment_time'])
        post = Post()
        claim_id = compute_author_guid_by_author_name(str(row['social_id']))
        post.post_id = str(
            compute_post_guid(row['site'] + str(claim_id),
                              row['username_hash'], date_to_str(date)))
        post.content = str(row['comment_tokens'])
        post.author = str(row['username_hash'])
        post.author_guid = str(row['username_hash'])
        post.domain = str(row['site'])
        post.date = date
        self._posts.append(post)

        claim_tweet_connection = Claim_Tweet_Connection()
        claim_tweet_connection.claim_id = str(claim_id)
        claim_tweet_connection.post_id = str(post.post_id)
        self._claim_tweet_connections.append(claim_tweet_connection)

        if self._current_row % self._max_posts_without_save == 0:
            self._save_posts_and_connections()
コード例 #7
0
 def extract_post(self, data, post_type):
     post = Post()
     if data['publish_date'] is None:
         publish_date_date = calendar.timegm(time.gmtime()) * 1000
     else:
         publish_date_date = data['publish_date']['$date']
     date_str = datetime.datetime.fromtimestamp(
         publish_date_date / 1000).strftime('%Y-%m-%d %H:%M:%S')
     post.post_id = compute_post_guid(data['url'], data['source'], date_str)
     post.guid = post.post_id
     post.author_guid = compute_author_guid_by_author_name(data['source'])
     post.author = str(data['source'])
     post.date = convert_str_to_unicode_datetime(date_str)
     post.title = str(data['title'])
     post.url = str(data['url'])
     post.source_url = str(data['source'])
     post.content = str(data['text'])
     post.tags = ','.join(data['keywords'])
     post.domain = self._domain
     post.post_type = post_type
     if 'description' not in data['meta_data']:
         post.description = ""
     else:
         post.description = str(data['meta_data']['description'])
     return post
コード例 #8
0
    def _create_post_by_row(self, record_dict):

        post = Post()

        post_id = self._convert_to_unicode_value(record_dict["post_id"])
        post.post_osn_id = post_id
        post.post_id = post_id

        author_name = self._convert_to_unicode_value(record_dict["tumblog_id"])
        post.author = author_name

        post_short_url = self._convert_to_unicode_value(
            record_dict["post_short_url"])
        self._set_post_url(post_short_url, author_name, post)

        post_creation_date = self._convert_to_unicode_value(
            record_dict["created_time_epoch"])
        post.created_at = post_creation_date
        if post_creation_date is not None:
            post_formatted_creation_date, str_post_formatted_creation_date = convert_epoch_timestamp_to_datetime(
                post_creation_date)
            post.date = post_formatted_creation_date
        else:
            str_post_formatted_creation_date = self._set_start_date()

        post.guid = compute_post_guid(post.url, author_name,
                                      str_post_formatted_creation_date)
        post.post_osn_guid = post.guid

        post.title = self._convert_to_unicode_value(record_dict["post_title"])

        post_content = record_dict["post_content"]
        if post_content != 'NULL':
            content = json.loads(post_content.decode("utf-8"))
            #content = eval(record_dict["post_content"])
            final_content = ""
            if 'title' in content.keys():
                title = content['title']
                final_content += title
            if 'text' in content.keys():
                text = content['text']
                final_content += text
            post.content = self._convert_to_unicode_value(final_content)
        post.domain = self._domain
        post.author_guid = compute_author_guid_by_author_name(author_name)
        post.post_type = self._convert_to_unicode_value(
            record_dict["post_type"])
        post.post_format = self._convert_to_unicode_value(
            record_dict["post_format"])
        post.reblog_key = self._convert_to_unicode_value(
            record_dict["post_reblog_key"])
        post.tags = self._convert_to_unicode_value(record_dict["post_tags"])
        post.state = self._convert_to_unicode_value(record_dict["post_state"])

        if post.post_osn_id not in self._post_dict:
            self._post_dict[post.post_osn_id] = post

        return post
コード例 #9
0
 def add_review_to_restorunt(self, review, api_id, json_id):
     p = Post()
     p.author_guid = api_id
     p.author = json_id
     p.domain = 'Restaurant'
     p.content = review['text']
     p.created_at = review['date']
     p.favorite_count = review['useful']
     p.post_id = review['review_id']
     return p
 def _add_post(self, title, content, _domain=u'Microblog'):
     post = Post()
     post.author = self._author.author_full_name
     post.author_guid = self._author.author_guid
     post.content = content
     post.title = title
     post.domain = _domain
     post.post_id = title
     post.guid = title
     self._db.addPost(post)
     self._posts.append(post)
 def _add_post(self, title, content):
     post = Post()
     post.author = self._author.author_full_name
     post.author_guid = self._author.author_guid
     post.content = content
     post.title = title
     post.domain = u'test'
     post.post_id = len(self._posts)
     post.guid = post.post_id
     self._db.addPost(post)
     self._posts.append(post)
コード例 #12
0
 def _generate_comment(self, instagram_comment, post):
     comment = Post()
     comment.date = datetime.datetime.fromtimestamp(instagram_comment['created_at'])
     comment.post_osn_id = instagram_comment['id']
     comment.content = str(instagram_comment['text'])
     comment.author = str(instagram_comment['owner']['username'])
     comment.author_guid = str(instagram_comment['owner']['id'])
     comment.url = '{}{}/'.format(post.url, comment.post_osn_id)
     comment.domain = 'Instagram'
     comment.post_type = 'comment'
     comment.post_id = str(comment.post_osn_id)
     return comment
コード例 #13
0
 def convert_comment_to_post(self, comment, submission, domain=u"Reddit"):
     post = Post()
     post.post_osn_id = unicode(comment.id)
     post.created_at = datetime.fromtimestamp(comment.created)
     post.date = datetime.fromtimestamp(comment.created)
     if hasattr(comment, 'author') and comment.author:
         post.author = unicode(comment.author.name)
         self._redditors.append(comment.author)
     else:
         self._deleted_redditors.append(str(post.date))
         post.author = unicode('')
     post.author_guid = compute_author_guid_by_author_name(post.author)
     post.url = unicode('https://www.reddit.com' + '/'.join(getattr(comment, 'permalink', '').split('/')[3:7]))
     post.title = unicode(submission.title)
     post.content = unicode(getattr(comment, 'body', '').encode('utf-8').strip())
     post.guid = compute_post_guid(post.url, post.post_osn_id, date_to_str(post.created_at))
     post.domain = domain
     post.post_type = domain
     post.post_id = post.guid
     post.url = u'https://www.reddit.com{}'.format(comment.permalink)
     return post
コード例 #14
0
 def _json_comment_to_db_comment_converter(self, post, domain="Instagram_comment"):
     rpost = Post()
     rpost.post_osn_id = str(post['id'])
     rpost.created_at = datetime.fromtimestamp(post['created_at'])
     rpost.author = post['owner']['id']
     rpost.author_guid = compute_author_guid_by_author_name(rpost.author)
     rpost.url = str('https://www.instagram.com/p/{}/'.format(post['shortcode']))
     rpost.content = post['text']
     rpost.guid = compute_post_guid(rpost.url, rpost.post_osn_id, date_to_str(rpost.created_at))
     rpost.domain = domain
     rpost.post_type = domain
     rpost.post_id = rpost.guid
     return rpost
 def _add_post(self, title, content, author_guid):
     post = Post()
     post.author = author_guid
     post.author_guid = author_guid
     post.content = content
     post.title = title
     post.domain = u'test'
     post.post_id = len(self._posts)
     post.guid = post.post_id
     post.date = date('2020-01-01 23:59:59')
     self._db.addPost(post)
     self._db.session.commit()
     self._posts.append(post)
 def _add_post(self, author_guid, title, content, domain=u'Microblog'):
     post = Post()
     post.author = author_guid
     post.author_guid = author_guid
     post.content = content
     post.title = title
     post.domain = domain
     post.post_id = title
     post.guid = post.post_id
     post.is_detailed = True
     post.is_LB = False
     self._db.addPost(post)
     self._posts.append(post)
 def _add_post(self, post_id, content, url, _domain=u'Microblog'):
     post = Post()
     post.author = u'test_user'
     post.author_guid = u'test_user'
     post.content = content
     post.title = post_id
     post.domain = _domain
     post.post_id = post_id
     post.guid = post_id
     post.url = url
     post.source_url = url
     self._db.addPost(post)
     self._posts[post_id] = post
コード例 #18
0
 def _json_post_to_db_post_converter(self, post, domain="Instagram_post"):
     rpost = Post()
     rpost.post_osn_id = str(post['id'])
     rpost.created_at = datetime.fromtimestamp(post['taken_at_timestamp'])
     rpost.author = post['owner']['id']
     rpost.author_guid = compute_author_guid_by_author_name(rpost.author)
     rpost.url = str('https://www.instagram.com/p/{}/'.format(post['shortcode']))
     rpost.content = ', '.join(x['node']['text'] for x in post['edge_media_to_caption']['edges'])
     rpost.guid = compute_post_guid(rpost.url, rpost.post_osn_id, date_to_str(rpost.created_at))
     rpost.domain = domain
     rpost.post_type = domain
     rpost.post_id = rpost.guid
     return rpost
コード例 #19
0
 def _add_post(self, post_id, content, tags, date_str, domain=u'Microblog'):
     post = Post()
     post.author = self._author.author_guid
     post.author_guid = self._author.author_guid
     post.content = content
     post.title = post_id
     post.domain = domain
     post.post_id = post_id
     post.guid = post.post_id
     post.date = convert_str_to_unicode_datetime(date_str)
     post.created_at = post.date
     post.tags = tags
     self._db.addPost(post)
     self._author.statuses_count += 1
コード例 #20
0
 def create_post(self, _author_guid1, post_content):
     post = Post()
     post.post_id = 'TestPost'
     post.author = 'TechmarketNG'
     post.guid = 'TestPost'
     post.url = 'Url_From'
     tempDate = '2016-05-05 00:00:00'
     day = datetime.timedelta(1)
     post.date = datetime.datetime.strptime(tempDate, '%Y-%m-%d %H:%M:%S') + day
     post.domain = 'Microblog'
     post.author_guid = _author_guid1
     post.content = post_content
     post.xml_importer_insertion_date = datetime.datetime.now()
     self._db.addPost(post)
     self._db.commit()
コード例 #21
0
def convert_claim_to_post(claim):
    from DB.schema_definition import Post
    post = Post()
    post.post_id = claim.claim_id
    post.content = claim.title
    post.description = claim.description
    post.url = claim.url
    post.date = claim.verdict_date
    post.domain = 'Claim'
    post.author = 'no author'
    post.author_guid = 'no author'
    post.guid = compute_post_guid(claim.url, post.author,
                                  date_to_str(post.date))
    post.post_osn_guid = post.guid
    post.tags = claim.keywords
    post.post_type = claim.verdict
    return post
    def setUp(self):
        self._db = DB()
        self._db.setUp()
        self.author_guid = u"author_guid"

        author = Author()
        author.author_guid = self.author_guid
        author.author_full_name = u'author'
        author.name = u'author_name'
        author.author_screen_name = u'author_screen_name'
        author.domain = u'Microblog'
        author.statuses_count = 10
        author.friends_count = 5
        author.followers_count = 6
        author.favourites_count = 8
        author.author_sub_type = u"bot"
        author.author_type = u"bad"
        author.created_at = u"2017-06-17 05:00:00"
        author.default_profile = True
        author.default_profile_image = True
        author.verified = True
        self._db.add_author(author)

        post = Post()
        post.author = self.author_guid
        post.author_guid = self.author_guid
        post.content = u"content"
        post.title = u"title"
        post.domain = u"domain"
        post.post_id = u"post_id"
        post.guid = post.post_id
        post.date = convert_str_to_unicode_datetime("2017-06-14 05:00:00")
        post.created_at = post.date
        self._db.addPost(post)

        self._db.session.commit()
        self.feature_prefix = u"AccountPropertiesFeatureGenerator_"
        self.account_properties_feature_generator = AccountPropertiesFeatureGenerator(
            self._db, **{
                'authors': [author],
                'posts': {
                    self.author_guid: [post]
                }
            })
        self.account_properties_feature_generator.execute()
コード例 #23
0
 def _add_post(self,
               title,
               content,
               date_str,
               domain='Microblog',
               post_type=None):
     post = Post()
     post.author = self._author.author_guid
     post.author_guid = self._author.author_guid
     post.content = content
     post.title = title
     post.domain = domain
     post.post_id = title
     post.guid = post.post_id
     post.date = convert_str_to_unicode_datetime(date_str)
     post.created_at = post.date
     post.post_type = post_type
     self._db.addPost(post)
     self._posts.append(post)
コード例 #24
0
    def _create_post_citation_by_row(self, reblogged_from_metadata):
        original_post = Post()

        parent_post_id = reblogged_from_metadata["parent_post_id"]
        original_post.post_osn_id = parent_post_id
        original_post.post_id = parent_post_id


        parent_post_blog_id = reblogged_from_metadata["parent_post_blog_id"]
        original_post.author = parent_post_blog_id
        original_post.author_guid = compute_author_guid_by_author_name(parent_post_blog_id)
        original_post.domain = self._domain
        parent_post_short_url = self._convert_to_unicode_value(reblogged_from_metadata["parent_post_short_url"])

        self._set_post_url(parent_post_short_url, parent_post_blog_id, original_post)

        if parent_post_id not in self._post_dict:
            self._post_dict[parent_post_id] = original_post
        return original_post
    def _create_post(self, original_liar_dataset_id, speaker, targeted_label,
                     statement):
        post = Post()

        post.post_id = str(original_liar_dataset_id)

        post_guid = compute_post_guid(self._social_network_url,
                                      original_liar_dataset_id,
                                      '2007-01-01 00:00:00')
        post.guid = post_guid
        post.domain = self._domain
        post.author = speaker
        author_guid = compute_author_guid_by_author_name(speaker)
        post.author_guid = author_guid
        post.post_osn_guid = post_guid
        post.date = date('2007-01-01 00:00:00')
        post.post_type = targeted_label

        post.content = statement

        return post
    def create_dummy_post(self):
        post = Post()

        post.post_id = unicode(self.post_id)
        post.author = u"author"
        post.guid = unicode(generate_random_guid())
        post.title = u"title"
        post.url = u"http://google.com"
        post.date = str_to_date("2016-08-24 10:00:15")
        post.content = u"text"
        post.is_detailed = True
        post.is_LB = False
        post.is_valid = True
        post.domain = u"Google"
        post.author_guid = unicode(self.author_guid)
        post.post_osn_id = 123455678
        post.retweet_count = 11
        post.favorite_count = 10
        post.created_at = u"2016-08-24 10:00:15"

        return post
コード例 #27
0
 def photo_xml_to_post(self, child):
     p = Post()
     p.title = str(child.find('title').text)
     p.url = str(child.find('urls').find('url').text)
     try:
         p.tags = ','.join(tag.text
                           for tag in child.find('tags').findall('tag'))
     except:
         pass
     p.created_at = str(child.find('dates').get('posted'))
     p.date = datetime.datetime.fromtimestamp(int(p.created_at))
     p.author = str(child.find('owner').get('nsid'))
     p.domain = 'flickr'
     p.author_guid = compute_author_guid_by_author_name(p.author)
     p.retweet_count = int(child.find('comments').text)
     p.post_id = compute_post_guid(p.url, p.author, date_to_str(p.date))
     p.post_osn_id = str(child.get('id'))
     if child.find('labels') is not None:
         p.post_type = ','.join(
             tag.text for tag in child.find('labels').findall('label'))
     return p
コード例 #28
0
    def _convert_tweet_dict_to_post(self, tweet_dict):
        post = Post()

        post_osn_id = tweet_dict['id_str']
        post.post_osn_id = post_osn_id

        author_osn_id = tweet_dict['author_osn_id']
        author = self._author_osn_id_author_dict[author_osn_id]
        author_screen_name = author.author_screen_name
        post.author = author_screen_name

        post.author_guid = compute_author_guid_by_author_name(
            author_screen_name)

        created_at = tweet_dict['created_at']
        post.created_at = created_at

        creation_date_str = extract_tweet_publiction_date(created_at)
        creation_date = str_to_date(creation_date_str)
        post.date = creation_date

        post.favorite_count = tweet_dict['favorite_count']
        post.retweet_count = tweet_dict['retweet_count']
        post.reply_count = tweet_dict['reply_count']
        post.content = str(tweet_dict['full_text'])
        post.domain = self._domain
        post.language = str(tweet_dict['lang'])

        post_url = "https://twitter.com/{0}/status/{1}".format(
            author_screen_name, post_osn_id)
        post.url = post_url

        post_guid = compute_post_guid(post_url, author_screen_name,
                                      creation_date_str)
        post.guid = post_guid
        post.post_id = post_guid

        return post