def _create_post_by_row(self, record_dict): post = Post() post_id = self._convert_to_unicode_value(record_dict["post_id"]) post.post_osn_id = post_id post.post_id = post_id author_name = self._convert_to_unicode_value(record_dict["tumblog_id"]) post.author = author_name post_short_url = self._convert_to_unicode_value( record_dict["post_short_url"]) self._set_post_url(post_short_url, author_name, post) post_creation_date = self._convert_to_unicode_value( record_dict["created_time_epoch"]) post.created_at = post_creation_date if post_creation_date is not None: post_formatted_creation_date, str_post_formatted_creation_date = convert_epoch_timestamp_to_datetime( post_creation_date) post.date = post_formatted_creation_date else: str_post_formatted_creation_date = self._set_start_date() post.guid = compute_post_guid(post.url, author_name, str_post_formatted_creation_date) post.post_osn_guid = post.guid post.title = self._convert_to_unicode_value(record_dict["post_title"]) post_content = record_dict["post_content"] if post_content != 'NULL': content = json.loads(post_content.decode("utf-8")) #content = eval(record_dict["post_content"]) final_content = "" if 'title' in content.keys(): title = content['title'] final_content += title if 'text' in content.keys(): text = content['text'] final_content += text post.content = self._convert_to_unicode_value(final_content) post.domain = self._domain post.author_guid = compute_author_guid_by_author_name(author_name) post.post_type = self._convert_to_unicode_value( record_dict["post_type"]) post.post_format = self._convert_to_unicode_value( record_dict["post_format"]) post.reblog_key = self._convert_to_unicode_value( record_dict["post_reblog_key"]) post.tags = self._convert_to_unicode_value(record_dict["post_tags"]) post.state = self._convert_to_unicode_value(record_dict["post_state"]) if post.post_osn_id not in self._post_dict: self._post_dict[post.post_osn_id] = post return post
def _generate_post(self, instagram_post): post = Post() post.author_guid = str(instagram_post['owner']['id']) post.date = datetime.datetime.fromtimestamp(instagram_post['taken_at_timestamp']) post.post_osn_id = instagram_post['id'] try: post.content = instagram_post['edge_media_to_caption']['edges'][0]['node']['text'] except: pass post.retweet_count = instagram_post['edge_media_to_comment']['count'] post.favorite_count = instagram_post['edge_media_preview_like']['count'] post.url = 'https://www.instagram.com/p/{}/'.format(instagram_post['shortcode']) image_names = [] # for url in instagram_post['urls']: # image_name_contaner = url.split('/')[-1] # end = image_name_contaner.index('?') # image_names.append(image_name_contaner[:end]) image_name = self._get_image_name_from_url(instagram_post['display_url']) post.media_path = str(instagram_post['display_url']) post.post_format = '{}'.format(image_name) post.domain = 'Instagram' post.post_type = 'post' post.post_id = str(post.post_osn_id) return post