def scrape_and_add_fanfic(url, current_user): """ Add task to queue, this task scrapes the fanfic website adds the fanfic to the system with the data. When it's done the user is notified so they can configure the data. """ try: new_fanfic = Fanfic(url) cleaned_url = new_fanfic.get_cleaned_url() fanfic_in_sys = fanfic_model.objects.filter( web__icontains=cleaned_url) url_in_system = fanfic_in_sys.exists() number_of_retries_done = int(scrape_and_add_fanfic.request.retries) user = CustomUser.objects.get(id=current_user) subject = CustomUser.objects.filter( username="******").first() if url_in_system is True: # notify user already_existing_fanfic = fanfic_in_sys.first() send_notification(already_existing_fanfic, user, "has already been added,", already_existing_fanfic.get_url()) else: new_fanfic.set_appropiate_scraper() page = new_fanfic.load_page_html() if page is None: if number_of_retries_done == 5: send_notification(subject, user, "has encountered a problem trying " "to add the " "fanfic you specified,", url) raise Exception("Error fetching fanfic page") else: is_fanfic = new_fanfic.check_if_is_fanfic_or_chapter() if is_fanfic is False: # it's a chapter send_notification(subject, user, "has encountered a problem trying " "to add the " "fanfic you specified,", url) else: created_fanfic = new_fanfic.scrape_and_save() send_notification(subject, user, "has added successfully the " "fanfic you specified,", created_fanfic.get_url()) except Exception as e: logger.error("Error in celery task scraping website {}".format(e)) logger.error(traceback.print_exc()) scrape_and_add_fanfic.retry(exc=e, countdown=60)
def post(self, request): url_fanfic = request.POST.get("url_fanfic") fanfic = Fanfic(url_fanfic) url_errors = fanfic.url_without_errors() if "Error" in url_errors: msg_error = url_errors messages.error(request, msg_error) logger.error("Error in fanfic url {}".format(msg_error)) return render(request, self.template_name, { "url_fanfic": url_fanfic, "error": "error" }) else: url_fanfic = url_errors try: cleaned_url = fanfic.get_cleaned_url() url_in_system = fanfic_model.objects.filter( web__icontains=cleaned_url).exists() if url_in_system is True: messages.error(request, "This fanfic is already in the " "system") return render(request, self.template_name, { "url_fanfic": url_fanfic, "error": "error" }) online = fanfic.check_if_online() if online is False: # url not working anymore messages.error( request, "Sorry, the url doesn't seem to" " be working " "right now.") return render(request, self.template_name, { "url_fanfic": url_fanfic, "error": "error" }) current_user = request.user.id scrape_and_add_fanfic.delay(url_fanfic, current_user) logger.info("Fanfic added to celery queue") return redirect(reverse('fanfics:external_done')) except Exception as e: # couldn't parse well, server error, it's our fault logger.error("Error adding external fanfic: {}".format(e)) messages.error( request, "We have a server error and we'll " " fix it soon. It's our fault and we " "apologize.") return redirect(reverse('fanfics:external_add'))
def setUp(self): self.fanfic = Fanfic("url")
def setUpClass(cls): super(FicWadTests, cls).setUpClass() url = "https://ficwad.com/story/204190" cls.fanfic_ficwad = Fanfic(url) cls.fanfic_ficwad.set_appropiate_scraper() cls.fanfic_ficwad.load_page_html()
def setUpClass(cls): super(ArchiveOfOurOwnTests, cls).setUpClass() url = "https://archiveofourown.org/works/8109805" cls.fanfic = Fanfic(url) cls.fanfic.set_appropiate_scraper() cls.fanfic.load_page_html()
def setUpClass(cls): super(AvengersFanfictionTests, cls).setUpClass() url = "http://www.avengersfanfiction.com/Story/86623/The-silvers-tears" cls.fanfic_avengers = Fanfic(url) cls.fanfic_avengers.set_appropiate_scraper() cls.fanfic_avengers.load_page_html()
def update_chapters_of_fanfics(): """ Check if some fanfics have been updated, aka have new chapters""" fanfics_to_update = fanfic_model.objects.filter( complete=False).order_by('last_time_checked') how_many = round(fanfics_to_update.count() / 2) + 1 if how_many > 0: fanfics_to_update = fanfics_to_update[:how_many] for one_fanfic in fanfics_to_update: current_chapters_count = one_fanfic.get_num_of_chapters() try: with transaction.atomic(): fanfic_obj = Fanfic(one_fanfic.web) fanfic_obj.set_appropiate_scraper() page = fanfic_obj.load_page_html() if page is not None: chapters = fanfic_obj.get_chapters() if chapters and len(chapters) > current_chapters_count: # chapters is not empty last_number_chapter = Chapter.objects.filter( fanfic=one_fanfic).order_by( '-num_chapter') if last_number_chapter.exists(): last_number_chapter = last_number_chapter. \ first() last_number = last_number_chapter.num_chapter num_chapter_new = last_number + 1 if "ficwad" in fanfic_obj.get_site() and \ current_chapters_count == 1: # it's from ficwad last_number_chapter.url_chapter = \ chapters[0]['url'] last_number_chapter.save() else: # there weren't any chapters num_chapter_new = 1 index_start = current_chapters_count while index_start < len(chapters): # for every new chapter new_chapter = chapters[index_start] Chapter.objects.create( fanfic=one_fanfic, title=new_chapter['title'], num_chapter=num_chapter_new, url_chapter=new_chapter['url']) index_start += 1 num_chapter_new += 1 # update word count & status(complete, in progress) word_count = fanfic_obj.get_num_words() status = fanfic_obj.get_status() one_fanfic.num_words = word_count one_fanfic.complete = status one_fanfic.last_time_updated = fanfic_obj. \ get_last_time_updated() except Exception as e: logger.error( "Error in celery task updating chapters {}".format(e)) logger.error(traceback.print_exc()) one_fanfic.last_time_checked = datetime.now() one_fanfic.save()