def parse_url(): print("Got request", request.args) # No URL found. Raise error url = request.args.get('url', None) print(url) try: if url is None: raise ApplicationError(*error_list["URL_NT_FND"]) except ApplicationError as error: return return_result(error) # TODO: Throwing error not added news_obj, twitter_obj, error = preprocessor(url, published=True) if error is not None: return return_result(error) if len(news_obj.content.split(' ')) < MIN_CONTENT_LEN: return return_result(ApplicationError(*error_list["CONTENT_TOO_SHORT"])) aggregator = Aggregator(news=news_obj, tweet=twitter_obj, is_twitter=twitter_obj is not None) try: aggregator.run_models() except ApplicationError as error: return return_result(error) return return_result(error, True, aggregator, twitter_obj, news_obj)
def parse_file(): print("Got request", request.args) # If file not found, raise error try: if 'file' not in request.files: raise ApplicationError(*error_list["FILE_NT_FND"]) else: filest = request.files['file'] if not filest.filename.endswith('doc') and not filest.filename.endswith('docx'): raise ApplicationError(*error_list["FILE_NT_SUP"]) else: file_obj = io.BytesIO(filest.read()) except ApplicationError as error: return return_result(error) news_obj, twitter_obj, error = preprocessor(file_obj, published=False) if error is not None: return return_result(error) if len(news_obj.content.split(' ')) < MIN_CONTENT_LEN: return return_result(ApplicationError(*error_list["CONTENT_TOO_SHORT"])) aggregator = Aggregator(news=news_obj, tweet=twitter_obj, is_twitter=False) try: aggregator.run_models() except ApplicationError as error: return return_result(error) # TODO: returning result return return_result(error, False, aggregator, twitter_obj, news_obj)
def get_news_from_file(path: str) -> [NewsObject, ApplicationError]: try: word_file = NewsObject(path) word_file.fetch_from_file() if word_file.error: raise ApplicationError(*error_list["UNBL_FTCH_NEWS"]) return word_file, None except ApplicationError as err: return None, err
def get_news_from_url(url: str) -> [NewsObject, ApplicationError]: try: news = NewsObject(url) news.fetch_from_url() # expecting that in case of error you throw error if news.error: raise ApplicationError(*error_list["UNBL_FTCH_NEWS"]) return news, None except ApplicationError as err: return None, err
def is_whitelisted_url(url): """ Parse a given url to determine whether it is a valid url and whether the source is supported. :param url: A url given by user. :return: Return the source. :error: Application error if the source is not supported or invalid. """ if not validators.url(url): print('invalid url') raise ApplicationError(*error_list["MAL_URL"]) _, source, _ = tldextract.extract(url) if source in supported_sources: # replace tldextract.extract(url).domain) with source # pytest.set_trace() return source else: print('unsupported source') raise ApplicationError(*error_list["UNSUP_SRC"])
def return_result(error: ApplicationError, published=None, aggregator=None, tweet=None, news_obj=None): if error is None: agg_dict = aggregator.to_dict() if aggregator is not None else None news_dict = news_obj.to_dict() if news_obj is not None else None tweet_dict = tweet.to_dict() if tweet is not None else None if published: input_type = 'Twitter' if tweet is not None else "NonTwitter" else: input_type = "UnPub" return jsonify({ "input_type": input_type, "models": agg_dict, "details": news_dict, "metrics": tweet_dict, "error": "" }) else: return jsonify({"error": error.to_dict()})
def preprocessor(url, published): """ Given a url(a web url or a file object), the preprocessor identifies the type of the url and generates news article data and/or twitter data. :returns: Returns newsObject, tweeter object and error object """ news, tweet, error = None, None, None # article is published if published: if url is not None and not url.startswith("https://"): url = "https://" + url try: source = is_whitelisted_url(url) except ApplicationError as error: return None, None, error # if the url is from twitter if source == "twitter": tweet, error = get_tweet(url) if error is not None: return None, None, error # check expanded url to make sure it is supported try: is_whitelisted_url(tweet.expanded_url) news, error = get_news_from_url(tweet.expanded_url) except ApplicationError as error: error = ApplicationError(*error_list["UNSUP_EMB_URL"]) return None, None, error else: news, error = get_news_from_url(url) if error is not None: return None, None, error # article is not published else: news, error = get_news_from_file(url) if error is not None: return None, None, error return news, tweet, error
def ratelimit_handler(e): return return_result(ApplicationError(*error_list["RATE_LIMIT_EXCEEDED"]))