def extract_post(self, data, post_type):
     post = Post()
     if data['publish_date'] is None:
         publish_date_date = calendar.timegm(time.gmtime()) * 1000
     else:
         publish_date_date = data['publish_date']['$date']
     date_str = datetime.datetime.fromtimestamp(
         publish_date_date / 1000).strftime('%Y-%m-%d %H:%M:%S')
     post.post_id = compute_post_guid(data['url'], data['source'], date_str)
     post.guid = post.post_id
     post.author_guid = compute_author_guid_by_author_name(data['source'])
     post.author = str(data['source'])
     post.date = convert_str_to_unicode_datetime(date_str)
     post.title = str(data['title'])
     post.url = str(data['url'])
     post.source_url = str(data['source'])
     post.content = str(data['text'])
     post.tags = ','.join(data['keywords'])
     post.domain = self._domain
     post.post_type = post_type
     if 'description' not in data['meta_data']:
         post.description = ""
     else:
         post.description = str(data['meta_data']['description'])
     return post
Ejemplo n.º 2
0
    def _convert_row_to_post(self, row):
        post = Post()

        claim_id = unicode(row['claim_id'])
        title = unicode(row['title'], errors='replace')
        post.content = title

        description = unicode(row['description'], errors='replace')
        post.description = description

        url = unicode(row['url'])
        post.url = url

        publication_date = row['publication_date']
        post.date = date(publication_date)

        post_guid = compute_post_guid(self._social_network_url, claim_id, publication_date)
        post.guid = post_guid
        post.post_id = post_guid
        post.domain = self._domain
        post.author = self._author_name
        author_guid = compute_author_guid_by_author_name(self._author_name)
        post.author_guid = author_guid
        post.post_osn_guid = post_guid

        keywords = unicode(row['keywords'])
        post.tags = keywords

        post_type = unicode(row['post_type'])
        post.post_type = post_type

        return post
Ejemplo n.º 3
0
    def _create_post_by_row(self, record_dict):

        post = Post()

        post_id = self._convert_to_unicode_value(record_dict["post_id"])
        post.post_osn_id = post_id
        post.post_id = post_id

        author_name = self._convert_to_unicode_value(record_dict["tumblog_id"])
        post.author = author_name

        post_short_url = self._convert_to_unicode_value(
            record_dict["post_short_url"])
        self._set_post_url(post_short_url, author_name, post)

        post_creation_date = self._convert_to_unicode_value(
            record_dict["created_time_epoch"])
        post.created_at = post_creation_date
        if post_creation_date is not None:
            post_formatted_creation_date, str_post_formatted_creation_date = convert_epoch_timestamp_to_datetime(
                post_creation_date)
            post.date = post_formatted_creation_date
        else:
            str_post_formatted_creation_date = self._set_start_date()

        post.guid = compute_post_guid(post.url, author_name,
                                      str_post_formatted_creation_date)
        post.post_osn_guid = post.guid

        post.title = self._convert_to_unicode_value(record_dict["post_title"])

        post_content = record_dict["post_content"]
        if post_content != 'NULL':
            content = json.loads(post_content.decode("utf-8"))
            #content = eval(record_dict["post_content"])
            final_content = ""
            if 'title' in content.keys():
                title = content['title']
                final_content += title
            if 'text' in content.keys():
                text = content['text']
                final_content += text
            post.content = self._convert_to_unicode_value(final_content)
        post.domain = self._domain
        post.author_guid = compute_author_guid_by_author_name(author_name)
        post.post_type = self._convert_to_unicode_value(
            record_dict["post_type"])
        post.post_format = self._convert_to_unicode_value(
            record_dict["post_format"])
        post.reblog_key = self._convert_to_unicode_value(
            record_dict["post_reblog_key"])
        post.tags = self._convert_to_unicode_value(record_dict["post_tags"])
        post.state = self._convert_to_unicode_value(record_dict["post_state"])

        if post.post_osn_id not in self._post_dict:
            self._post_dict[post.post_osn_id] = post

        return post
Ejemplo n.º 4
0
 def _add_post(self, post_id, content, tags, date_str, domain=u'Microblog'):
     post = Post()
     post.author = self._author.author_guid
     post.author_guid = self._author.author_guid
     post.content = content
     post.title = post_id
     post.domain = domain
     post.post_id = post_id
     post.guid = post.post_id
     post.date = convert_str_to_unicode_datetime(date_str)
     post.created_at = post.date
     post.tags = tags
     self._db.addPost(post)
     self._author.statuses_count += 1
Ejemplo n.º 5
0
def convert_claim_to_post(claim):
    from DB.schema_definition import Post
    post = Post()
    post.post_id = claim.claim_id
    post.content = claim.title
    post.description = claim.description
    post.url = claim.url
    post.date = claim.verdict_date
    post.domain = 'Claim'
    post.author = 'no author'
    post.author_guid = 'no author'
    post.guid = compute_post_guid(claim.url, post.author,
                                  date_to_str(post.date))
    post.post_osn_guid = post.guid
    post.tags = claim.keywords
    post.post_type = claim.verdict
    return post
 def photo_xml_to_post(self, child):
     p = Post()
     p.title = str(child.find('title').text)
     p.url = str(child.find('urls').find('url').text)
     try:
         p.tags = ','.join(tag.text
                           for tag in child.find('tags').findall('tag'))
     except:
         pass
     p.created_at = str(child.find('dates').get('posted'))
     p.date = datetime.datetime.fromtimestamp(int(p.created_at))
     p.author = str(child.find('owner').get('nsid'))
     p.domain = 'flickr'
     p.author_guid = compute_author_guid_by_author_name(p.author)
     p.retweet_count = int(child.find('comments').text)
     p.post_id = compute_post_guid(p.url, p.author, date_to_str(p.date))
     p.post_osn_id = str(child.get('id'))
     if child.find('labels') is not None:
         p.post_type = ','.join(
             tag.text for tag in child.find('labels').findall('label'))
     return p