def test_single_word_not_keyword(): """ Only one word in input, but it isn't a keyword """ result = {'lan': 'es', 'tokens': []} mock_response = {'language': 'es', 'tokens': [constants.EL_TOKEN]} response_obj = munchify(mock_response) with mock.patch('extract.gcloud_syntax_extraction', return_value=response_obj): assert result == get_keywords(make_flask_request({'text': 'el'}))
def test_wrong_input(): """ text in json body missing from request """ result = { "error": { "message": "ValueError: Expected 'text' field in json body is missing" } } assert result == get_keywords(make_flask_request({'': ''}))
def test_no_words(): """ No words in input """ result = {'lan': 'es', 'tokens': []} mock_response = {'tokens': [], 'language': 'es'} response_obj = munchify(mock_response) with mock.patch('extract.gcloud_syntax_extraction', return_value=response_obj): assert result == get_keywords(make_flask_request({'text': ''}))
def test_single_word_keyword(): """ Only one word in input, but it is a keyword """ result = { 'lan': 'es', 'tokens': [{ 'word': 'hablando', 'lemma': 'hablar', 'part_of_speech': 'VERB' }] } mock_response = {'language': 'es', 'tokens': [constants.HABLANDO_TOKEN]} response_obj = munchify(mock_response) with mock.patch('extract.gcloud_syntax_extraction', return_value=response_obj): assert result == get_keywords(make_flask_request({'text': 'hablando'}))
warnings.filterwarnings('ignore') from dash.dependencies import Input, Output from multiprocessing import Process, Queue from utils import read_mongo, json_pandas from main import get_keywords from utils_app import get_tpm, create_graph, create_wc, get_username_list, create_wc2 from npl_utils import init_counter, process # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # global variables # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # dir_noticias = 'data/Noticieros Twitter.csv' dir_politicos = 'data/Politicos-Twitter.csv' keywords = get_keywords()[:50] noticieros = get_username_list(dir_noticias) politicos = get_username_list(dir_politicos) time_interval = 30 # seconds # dataframe with starting database df = json_pandas( read_mongo('dbTweets', 'tweets_chile', query_fields={"dateTweet": 1, "tweet": 1, "screenName": 1}, json_only=True, num_limit=10 ** 5) ) twiterator = map(process, df['tweet']) word_counter = init_counter(twiterator)
def main(): # Parametros de busqueda # search_words = get_keywords() # Get Tweets # read_tweets(config.region_CHILE, search_words)