def run(self): import mongo_query_results if not self.url or self.url == 'ConnectionError': return self.url # Get list of newspaper.Article objs self.articles = [] if mongo_query_results.check_age(self.name_clean): self.article_objs = self.get_newspaper() else: self.article_objs = "Recent cache" if self.article_objs in [ "No articles found!", "Empty list", "Recent cache" ]: logger.info(self.article_objs) try: LambdaWhisperer.json_results, self.num_articles = mongo_query_results.get_scores( self.url) except IndexError: return 'ConnectionError' else: # Threadpool for getting articles self.articles = self.download_articles() if self.articles == 'ConnectionError': return 'ConnectionError' elif self.articles == 'LanguageError': return 'LanguageError' elif len(self.articles) == 0: try: LambdaWhisperer.json_results, self.num_articles = mongo_query_results.get_scores( self.name_clean) except IndexError: return 'ConnectionError' else: self.num_articles = self.API.nlp_api_endpoint( self.articles, self.url, self.name_clean) # if self.articles: # self.save_plot() logger.info(self.url) logger.info(f"NAME {self.name_clean}") payload = { 'scores': LambdaWhisperer.json_results, 'url': self.url, 'name_clean': self.name_clean, 'n_articles': self.num_articles } return payload
def run(self): if not self.url: return False if self.url == 'ConnectionError': return self.url # Get list of newspaper.Article objs self.article_objs = self.get_newspaper() if self.article_objs in ["No articles found!", "Empty list"]: try: LambdaWhisperer.json_results, self.num_articles = mongo_query_results.get_scores( self.url) except IndexError: return 'ConnectionError' else: # Threadpool for getting articles self.article_objs = islice(self.article_objs, self.limit) self.articles = self.articles_gen() self.num_articles = self.API.send(self.articles, self.url) if self.API.json_results: self.dump() self.save_plot() print( sorted(self.API.json_results.items(), key=lambda kv: kv[1], reverse=True)) print(self.url) # polarity, subjectivity = analyzer(self.articles) # return self.num_articles, round(polarity, 3), round(subjectivity, # 3), len(self.articles), self.hash return self.num_articles, 0, 0, self.num_articles, self.hash
def run(self): if not self.url: return False if self.url == 'ConnectionError': return self.url # Get list of newspaper.Article objs self.article_objs = self.get_newspaper() if self.article_objs in ["No articles found!", "Empty list"]: try: LambdaWhisperer.json_results, self.num_articles = mongo_query_results.get_scores( self.url) except IndexError: return 'ConnectionError' else: # Threadpool for getting articles self.article_objs = islice(self.article_objs, self.limit) self.articles = self.articles_gen() self.num_articles = self.API.send(self.articles, self.url) if self.API.json_results: self.dump() self.save_plot() print(sorted(self.API.json_results.items(), key=lambda kv: kv[1], reverse=True)) print(self.url) # polarity, subjectivity = analyzer(self.articles) # return self.num_articles, round(polarity, 3), round(subjectivity, # 3), len(self.articles), self.hash return self.num_articles, 0, 0, self.num_articles, self.hash
def nlp_api_endpoint(self, url_text: dict, url: str): json.dump(url_text, open('./latest.json', 'w')) response = json.loads(requests.put(nlp_api, json=url_text).text) mongo_query_results.insert(response, url) LambdaWhisperer.json_results, n_articles = mongo_query_results.get_scores(url) return n_articles
def nlp_api_endpoint(self, url_text: dict, url: str): json.dump(url_text, open('./latest.json', 'w')) response = json.loads(requests.put(nlp_api, json=url_text).text) mongo_query_results.insert(response, url) LambdaWhisperer.json_results, n_articles = mongo_query_results.get_scores( url) return n_articles
def run(self): if not self.url or self.url == 'ConnectionError': return self.url # Get list of newspaper.Article objs self.articles = [] if mongo_query_results.check_age(self.name_clean): self.article_objs = self.get_newspaper() else: self.article_objs = 'Recent cache' # TODO: enum.Enum if self.article_objs in ['No articles found!', 'Empty list', 'Recent cache']: print(self.article_objs) try: LambdaWhisperer.json_results, self.num_articles = mongo_query_results.get_scores( self.url) except IndexError: return 'ConnectionError' # TODO: enum.Enum else: # Threadpool for getting articles self.articles = self.download_articles() if self.articles == 'ConnectionError': return 'ConnectionError' # TODO: enum.Enum elif len(self.articles) == 0: try: LambdaWhisperer.json_results, self.num_articles = \ mongo_query_results.get_scores(self.name_clean) except IndexError: return 'ConnectionError' # TODO: enum.Enum else: self.num_articles = self.API.nlp_api_endpoint( self.articles, self.url, self.name_clean ) if self.articles: self.save_plot() print(self.url) print('NAME', self.name_clean) return self.num_articles, self.name_clean
def nlp_api_endpoint(self, url_text: dict, url: str, name_clean: str): url_text = {k: v for k, v in url_text.items() if type(v) == str} import mongo_query_results response = json.loads(requests.post(nlp_api, json=url_text).text) url_text = json.dumps(url_text) # response = json.loads(requests.put(nlp_api, json=url_text).text) if 'message' in response: raise Exception('Lambda Error') mongo_query_results.insert(response, name_clean) LambdaWhisperer.json_results, n_articles = mongo_query_results.get_scores( name_clean) return n_articles
def nlp_api_endpoint(self, url_text: dict, url: str, name_clean: str): url_text = {k: v for k, v in url_text.items() if type(v) == str} req = requests.put(NLP_API, json=url_text) req.raise_for_status() response = req.json() if 'message' in response: raise Exception('Lambda Error') # TODO: custom exception class mongo_query_results.insert(response, name_clean) LambdaWhisperer.json_results, n_articles = mongo_query_results.get_scores(name_clean) return n_articles