def search(self, msg): self.log(logging.INFO, 'Starting') asyncio.set_event_loop(asyncio.new_event_loop()) main_search_id = msg['body']['search_id'] updater = statusUpdate.get(self.name) updater.in_progress(main_search_id) if search_cancelled(main_search_id): self.log(logging.INFO, 'Search cancelled, finishing') updater.success(main_search_id) return if not self.search_parameters_correct(msg): self.log(logging.INFO, 'Parameters incorrect, finishing') updater.success(main_search_id) return try: main_search = get_main_search(main_search_id) link = msg['body']['link'] parent = Parent.from_dict(msg['body']['parent']) sender = msg['sender'] result_article = ImportedArticle.objects.get(pk=link) self.save_or_skip(result_article, main_search, parent, sender) updater.success(main_search_id) self.log(logging.INFO, 'Finished') except Exception as e: print(traceback.format_exc()) updater.failure(main_search_id) self.log(logging.WARNING, 'Failed: {0}'.format(str(e)))
def process_link(self, msg): main_search_id = msg['body']['search_id'] updater = statusUpdate.get(self.name) updater.in_progress(main_search_id) if search_cancelled(main_search_id): self.log(logging.INFO, 'Search cancelled, finishing') updater.success(main_search_id) return try: asyncio.set_event_loop(asyncio.new_event_loop()) link = msg['body']['link'] date = datetime.fromtimestamp(int(msg['body'].get('date'))) if msg['body'].get('date') else None title = msg['body'].get('title') or '' snippet = msg['body'].get('snippet') or '' main_search = get_main_search(main_search_id) parent = Parent.from_dict(msg['body']['parent']) sender = msg['sender'] if ImportedArticle.objects.filter(link=link).exists() and main_search.db_search: statusUpdate.get(DB_URL_SEARCHER_NAME).queued(main_search_id) send_to_worker(self.channel_layer, sender=sender, where=DB_URL_SEARCHER_NAME, method='search', body={ 'link': link, 'search_id': main_search.id, 'parent': parent.to_dict() }) return if is_valid(link) and main_search.link != link: try: with transaction.atomic(): domain_str = get_domain(link) domain, _ = Domain.objects.get_or_create(link=domain_str) result = get_or_create(link, date, domain_str, domain, title, snippet) add_parent(result, parent) if main_search.twitter_search: statusUpdate.get(TWITTER_URL_SEARCHER_NAME).queued(main_search_id) send_to_worker(self.channel_layer, sender=sender, where=TWITTER_URL_SEARCHER_NAME, method='search', body={ 'link': result.link, 'search_id': main_search.id, 'parent': Parent(id=result.link, type=self.name).to_dict() }) except Exception as e: self.log(logging.WARNING, 'Object was not added to database: {}'.format(str(e))) if sender not in WORKER_NAMES: send_to_websocket(self.channel_layer, where=sender, method='success', message='') except Exception as e: print(traceback.format_exc()) self.log(logging.ERROR, 'Failed: {0}'.format(str(e)))
def search(self, msg): self.log(logging.INFO, 'Starting') asyncio.set_event_loop(asyncio.new_event_loop()) main_search_id = msg['body']['search_id'] updater = statusUpdate.get(self.name) updater.in_progress(main_search_id) if search_cancelled(main_search_id): self.log(logging.INFO, 'Search cancelled, finishing') updater.success(main_search_id) return if not self.search_parameters_correct(msg): self.log(logging.INFO, 'Parameters incorrect, finishing') updater.success(main_search_id) return try: main_search = get_main_search(main_search_id) title = msg['body']['title'] parent = Parent.from_dict(msg['body']['parent']) sender = msg['sender'] # Configure tweets = [] c = get_twint_configuration(tweets) # Search c.Search = title twint.run.Search(c) self.log(logging.INFO, f'{len(tweets)} tweets were downloaded.') for tweet in tweets: tweet_id, links = self.save_tweet(tweet, parent, sender) if tweet_id: self.send_to_internet_search_manager( links, Parent(type=self.name, id=tweet_id), main_search.id) updater.success(main_search_id) except Exception as e: print(traceback.format_exc()) self.log(logging.ERROR, 'Failed: {0}'.format(str(e))) updater.failure(main_search_id)
def failure(self, search_id): if not search_cancelled(search_id): SearcherStatus.objects.filter(pk=self.crawler).update(in_progress=F('in_progress') - 1) SearcherStatus.objects.filter(pk=self.crawler).update(failure=F('failure') + 1)
def in_progress(self, search_id): if not search_cancelled(search_id): SearcherStatus.objects.filter(pk=self.crawler).update(in_progress=F('in_progress') + 1) SearcherStatus.objects.filter(pk=self.crawler).update(queued=F('queued') - 1)
def queued(self, search_id): if not search_cancelled(search_id): SearcherStatus.objects.filter(pk=self.crawler).update(queued=F('queued') + 1)