def test_get_page_count(self): my_article = baker.make('reading_list.Article') my_article.permalink = self.url1 my_article.save() article_id = get_article_id(self.url1) article_key = '{}.html'.format(article_id) path = os.path.join('reading_list', 'tests', 'html', article_key) put_object(HTML_BUCKET, article_key, path) pages = get_page_count(self.url1) self.assertEquals(6, pages) my_article = baker.make('reading_list.Article') my_article.permalink = self.url2 my_article.save() article_id = get_article_id(self.url2) article_key = '{}.html'.format(article_id) path = os.path.join('reading_list', 'tests', 'html', article_key) put_object(HTML_BUCKET, article_key, path) pages = get_page_count(self.url2) self.assertEquals(7, pages) my_article = baker.make('reading_list.Article') my_article.permalink = self.url3 my_article.save() article_id = get_article_id(self.url3) article_key = '{}.html'.format(article_id) path = os.path.join('reading_list', 'tests', 'html', article_key) put_object(HTML_BUCKET, article_key, path) pages = get_page_count(self.url3) self.assertEquals(4, pages)
def test_html_to_s3(self, mock_inject_json_into_html): mock_inject_json_into_html.return_value = "test html" my_article = baker.make('Article') my_article.save() my_article_id = get_article_id(my_article.permalink) article_key = "{}.html".format(my_article_id) html_to_s3(my_article) self.assertTrue(check_file(article_key, HTML_BUCKET))
def test_no_call_celery(self): # Add article to S3 and check that delegate task returns false my_article = baker.make('Article') my_article.page_count = 3 article_created = False article_id = get_article_id(my_article.permalink) article_key = "{}.html".format(article_id) f = open(article_key, "w+") f.write(str("test html")) f.close() put_object(HTML_BUCKET, article_key, article_key) os.remove(article_key) self.assertFalse(delegate_task(my_article, article_created)) self.s3_client.download_file(HTML_BUCKET, article_key, article_key) f = open(article_key, "r").read() self.assertEquals(f, "test html") os.remove(article_key)
def create_user_magazine(email): try: user = User.objects.get(email=email) except User.DoesNotExist: logging.warning('User {} does not exist'.format(email)) return staged = get_staged_articles(user) assembly_soup = BeautifulSoup(open('./pdf/assembly.html'), 'html.parser') assembly_body = assembly_soup.find('body') magazine_id = get_magazine_id(staged[0].permalink) for item in staged: permalink = item.permalink article_id = get_article_id(permalink) # get article and check if uploaded article, article_created = Article.objects.get_or_create( permalink=permalink) if not check_file('{}.html'.format(article_id), HTML_BUCKET): html_to_s3(article) # populate template soup w/ content injected_soup = inject_json_into_html(article) injected_container = injected_soup.select_one('.container') injected_container['id'] = article_id # Add populated template soup to assembly soup assembly_body.append(injected_container) f = open("./{}.html".format(magazine_id), "w+") f.write(str(assembly_soup)) f.close() put_object('pulpmagazines', "{}.html".format(magazine_id), "./{}.html".format(magazine_id)) os.remove("./{}.html".format(magazine_id)) return
def handle(self, *args, **options): articles = Article.objects.all() for article in articles: article_id = get_article_id(article.permalink) article.custom_id = article_id article.save()
def test_puppeteer_file_not_in_s3(self): article_id = get_article_id(self.url1) article_key = '{}.html'.format(article_id) self.assertFalse(check_file(article_key, HTML_BUCKET))