コード例 #1
0
ファイル: dbSearcher.py プロジェクト: matbut/Locus
    def search(self, msg):
        self.log(logging.INFO, 'Starting')
        asyncio.set_event_loop(asyncio.new_event_loop())

        main_search_id = msg['body']['search_id']
        updater = statusUpdate.get(self.name)
        updater.in_progress(main_search_id)

        if search_cancelled(main_search_id):
            self.log(logging.INFO, 'Search cancelled, finishing')
            updater.success(main_search_id)
            return

        if not self.search_parameters_correct(msg):
            self.log(logging.INFO, 'Parameters incorrect, finishing')
            updater.success(main_search_id)
            return

        try:
            main_search = get_main_search(main_search_id)
            link = msg['body']['link']
            parent = Parent.from_dict(msg['body']['parent'])
            sender = msg['sender']

            result_article = ImportedArticle.objects.get(pk=link)
            self.save_or_skip(result_article, main_search, parent, sender)

            updater.success(main_search_id)
            self.log(logging.INFO, 'Finished')

        except Exception as e:
            print(traceback.format_exc())
            updater.failure(main_search_id)
            self.log(logging.WARNING, 'Failed: {0}'.format(str(e)))
コード例 #2
0
    def process_link(self, msg):

        main_search_id = msg['body']['search_id']
        updater = statusUpdate.get(self.name)
        updater.in_progress(main_search_id)

        if search_cancelled(main_search_id):
            self.log(logging.INFO, 'Search cancelled, finishing')
            updater.success(main_search_id)
            return

        try:
            asyncio.set_event_loop(asyncio.new_event_loop())
            link = msg['body']['link']
            date = datetime.fromtimestamp(int(msg['body'].get('date'))) if msg['body'].get('date') else None
            title = msg['body'].get('title') or ''
            snippet = msg['body'].get('snippet') or ''
            main_search = get_main_search(main_search_id)
            parent = Parent.from_dict(msg['body']['parent'])
            sender = msg['sender']

            if ImportedArticle.objects.filter(link=link).exists() and main_search.db_search:
                statusUpdate.get(DB_URL_SEARCHER_NAME).queued(main_search_id)
                send_to_worker(self.channel_layer, sender=sender, where=DB_URL_SEARCHER_NAME,
                               method='search', body={
                        'link': link,
                        'search_id': main_search.id,
                        'parent': parent.to_dict()
                    })
                return

            if is_valid(link) and main_search.link != link:
                try:
                    with transaction.atomic():
                        domain_str = get_domain(link)
                        domain, _ = Domain.objects.get_or_create(link=domain_str)
                        result = get_or_create(link, date, domain_str, domain, title, snippet)
                        add_parent(result, parent)

                        if main_search.twitter_search:
                            statusUpdate.get(TWITTER_URL_SEARCHER_NAME).queued(main_search_id)
                            send_to_worker(self.channel_layer, sender=sender, where=TWITTER_URL_SEARCHER_NAME,
                                           method='search', body={
                                    'link': result.link,
                                    'search_id': main_search.id,
                                    'parent': Parent(id=result.link, type=self.name).to_dict()
                                })
                except Exception as e:
                    self.log(logging.WARNING, 'Object was not added to database: {}'.format(str(e)))

                if sender not in WORKER_NAMES:
                    send_to_websocket(self.channel_layer, where=sender, method='success', message='')


        except Exception as e:
            print(traceback.format_exc())
            self.log(logging.ERROR, 'Failed: {0}'.format(str(e)))
コード例 #3
0
ファイル: twitterSearcher.py プロジェクト: matbut/Locus
    def search(self, msg):

        self.log(logging.INFO, 'Starting')
        asyncio.set_event_loop(asyncio.new_event_loop())

        main_search_id = msg['body']['search_id']
        updater = statusUpdate.get(self.name)
        updater.in_progress(main_search_id)

        if search_cancelled(main_search_id):
            self.log(logging.INFO, 'Search cancelled, finishing')
            updater.success(main_search_id)
            return

        if not self.search_parameters_correct(msg):
            self.log(logging.INFO, 'Parameters incorrect, finishing')
            updater.success(main_search_id)
            return

        try:
            main_search = get_main_search(main_search_id)
            title = msg['body']['title']
            parent = Parent.from_dict(msg['body']['parent'])
            sender = msg['sender']
            # Configure
            tweets = []
            c = get_twint_configuration(tweets)

            # Search
            c.Search = title
            twint.run.Search(c)

            self.log(logging.INFO, f'{len(tweets)} tweets were downloaded.')

            for tweet in tweets:
                tweet_id, links = self.save_tweet(tweet, parent, sender)
                if tweet_id:
                    self.send_to_internet_search_manager(
                        links, Parent(type=self.name, id=tweet_id),
                        main_search.id)

            updater.success(main_search_id)

        except Exception as e:
            print(traceback.format_exc())
            self.log(logging.ERROR, 'Failed: {0}'.format(str(e)))
            updater.failure(main_search_id)
コード例 #4
0
 def failure(self, search_id):
     if not search_cancelled(search_id):
         SearcherStatus.objects.filter(pk=self.crawler).update(in_progress=F('in_progress') - 1)
         SearcherStatus.objects.filter(pk=self.crawler).update(failure=F('failure') + 1)
コード例 #5
0
 def in_progress(self, search_id):
     if not search_cancelled(search_id):
         SearcherStatus.objects.filter(pk=self.crawler).update(in_progress=F('in_progress') + 1)
         SearcherStatus.objects.filter(pk=self.crawler).update(queued=F('queued') - 1)
コード例 #6
0
 def queued(self, search_id):
     if not search_cancelled(search_id):
         SearcherStatus.objects.filter(pk=self.crawler).update(queued=F('queued') + 1)