Exemple #1
0
    def run(self):
        import mongo_query_results

        if not self.url or self.url == 'ConnectionError':
            return self.url
        # Get list of newspaper.Article objs
        self.articles = []

        if mongo_query_results.check_age(self.name_clean):

            self.article_objs = self.get_newspaper()

        else:
            self.article_objs = "Recent cache"

        if self.article_objs in [
                "No articles found!", "Empty list", "Recent cache"
        ]:
            logger.info(self.article_objs)
            try:
                LambdaWhisperer.json_results, self.num_articles = mongo_query_results.get_scores(
                    self.url)

            except IndexError:
                return 'ConnectionError'
        else:
            # Threadpool for getting articles

            self.articles = self.download_articles()

            if self.articles == 'ConnectionError':
                return 'ConnectionError'
            elif self.articles == 'LanguageError':
                return 'LanguageError'
            elif len(self.articles) == 0:
                try:

                    LambdaWhisperer.json_results, self.num_articles = mongo_query_results.get_scores(
                        self.name_clean)
                except IndexError:
                    return 'ConnectionError'
            else:
                self.num_articles = self.API.nlp_api_endpoint(
                    self.articles, self.url, self.name_clean)

        # if self.articles:
        #     self.save_plot()

        logger.info(self.url)
        logger.info(f"NAME {self.name_clean}")
        payload = {
            'scores': LambdaWhisperer.json_results,
            'url': self.url,
            'name_clean': self.name_clean,
            'n_articles': self.num_articles
        }
        return payload
Exemple #2
0
    def run(self):
        if not self.url:
            return False
        if self.url == 'ConnectionError':
            return self.url
        # Get list of newspaper.Article objs
        self.article_objs = self.get_newspaper()

        if self.article_objs in ["No articles found!", "Empty list"]:
            try:
                LambdaWhisperer.json_results, self.num_articles = mongo_query_results.get_scores(
                    self.url)
            except IndexError:
                return 'ConnectionError'
        else:
            # Threadpool for getting articles

            self.article_objs = islice(self.article_objs, self.limit)
            self.articles = self.articles_gen()
            self.num_articles = self.API.send(self.articles, self.url)

        if self.API.json_results:
            self.dump()
            self.save_plot()

        print(
            sorted(self.API.json_results.items(),
                   key=lambda kv: kv[1],
                   reverse=True))
        print(self.url)
        # polarity, subjectivity = analyzer(self.articles)
        # return self.num_articles, round(polarity, 3), round(subjectivity,
        #                                                     3), len(self.articles), self.hash

        return self.num_articles, 0, 0, self.num_articles, self.hash
Exemple #3
0
    def run(self):
        if not self.url:
            return False
        if self.url == 'ConnectionError':
            return self.url
        # Get list of newspaper.Article objs
        self.article_objs = self.get_newspaper()

        if self.article_objs in ["No articles found!", "Empty list"]:
            try:
                LambdaWhisperer.json_results, self.num_articles = mongo_query_results.get_scores(
                    self.url)
            except IndexError:
                return 'ConnectionError'
        else:
            # Threadpool for getting articles

            self.article_objs = islice(self.article_objs, self.limit)
            self.articles = self.articles_gen()
            self.num_articles = self.API.send(self.articles, self.url)

        if self.API.json_results:
            self.dump()
            self.save_plot()

        print(sorted(self.API.json_results.items(), key=lambda kv: kv[1], reverse=True))
        print(self.url)
        # polarity, subjectivity = analyzer(self.articles)
        # return self.num_articles, round(polarity, 3), round(subjectivity,
        #                                                     3), len(self.articles), self.hash

        return self.num_articles, 0, 0, self.num_articles, self.hash
Exemple #4
0
    def nlp_api_endpoint(self, url_text: dict, url: str):

        json.dump(url_text, open('./latest.json', 'w'))
        response = json.loads(requests.put(nlp_api, json=url_text).text)
        mongo_query_results.insert(response, url)
        LambdaWhisperer.json_results, n_articles = mongo_query_results.get_scores(url)
        return n_articles
Exemple #5
0
    def nlp_api_endpoint(self, url_text: dict, url: str):

        json.dump(url_text, open('./latest.json', 'w'))
        response = json.loads(requests.put(nlp_api, json=url_text).text)
        mongo_query_results.insert(response, url)
        LambdaWhisperer.json_results, n_articles = mongo_query_results.get_scores(
            url)
        return n_articles
Exemple #6
0
    def run(self):
        if not self.url or self.url == 'ConnectionError':
            return self.url
        # Get list of newspaper.Article objs
        self.articles = []

        if mongo_query_results.check_age(self.name_clean):

            self.article_objs = self.get_newspaper()

        else:
            self.article_objs = 'Recent cache'  # TODO: enum.Enum

        if self.article_objs in ['No articles found!', 'Empty list', 'Recent cache']:
            print(self.article_objs)
            try:
                LambdaWhisperer.json_results, self.num_articles = mongo_query_results.get_scores(
                    self.url)

            except IndexError:
                return 'ConnectionError'  # TODO: enum.Enum
        else:
            # Threadpool for getting articles

            self.articles = self.download_articles()

            if self.articles == 'ConnectionError':
                return 'ConnectionError'  # TODO: enum.Enum
            elif len(self.articles) == 0:
                try:
                    LambdaWhisperer.json_results, self.num_articles = \
                        mongo_query_results.get_scores(self.name_clean)
                except IndexError:
                    return 'ConnectionError'  # TODO: enum.Enum
            else:
                self.num_articles = self.API.nlp_api_endpoint(
                    self.articles, self.url, self.name_clean
                )

        if self.articles:
            self.save_plot()

        print(self.url)
        print('NAME', self.name_clean)
        return self.num_articles, self.name_clean
Exemple #7
0
 def nlp_api_endpoint(self, url_text: dict, url: str, name_clean: str):
     url_text = {k: v for k, v in url_text.items() if type(v) == str}
     import mongo_query_results
     response = json.loads(requests.post(nlp_api, json=url_text).text)
     url_text = json.dumps(url_text)
     #     response = json.loads(requests.put(nlp_api, json=url_text).text)
     if 'message' in response:
         raise Exception('Lambda Error')
     mongo_query_results.insert(response, name_clean)
     LambdaWhisperer.json_results, n_articles = mongo_query_results.get_scores(
         name_clean)
     return n_articles
Exemple #8
0
    def nlp_api_endpoint(self, url_text: dict, url: str, name_clean: str):
        url_text = {k: v for k, v in url_text.items() if type(v) == str}

        req = requests.put(NLP_API, json=url_text)
        req.raise_for_status()
        response = req.json()
        if 'message' in response:
            raise Exception('Lambda Error')  # TODO: custom exception class

        mongo_query_results.insert(response, name_clean)

        LambdaWhisperer.json_results, n_articles = mongo_query_results.get_scores(name_clean)
        return n_articles