def test_analyzer(): """ Analyzer tests """ language_data = text_analyzer.language_data_loader(GRAMMAR_PATH, COUNTER_GRAMMAR_PATH, START_WORDS_PATH, STOP_WORDS_PATH) message = "This is a new medicine for hyperthyroidism" # \ # "and helps fight against diabetes, cancer + heart disease. " \ # "https://t.co/sw6mvsslg" analysis = text_analyzer.analyzer(message, language_data['start_words'], language_data['grammar'], language_data['counter_grammar'], language_data['stop_words'], language_data['magic_bullet_grammar']) assert analysis[1] == 'hyperthyroidism' assert analysis[0] == 'a new medicine' message = "some unrelated message that only talks about watching tv" analysis = text_analyzer.analyzer(message, language_data['start_words'], language_data['grammar'], language_data['counter_grammar'], language_data['stop_words'], language_data['magic_bullet_grammar']) assert analysis[0] == '<nothing_found>' message = "#hyperthyroidism for hyperthyroidism" analysis = text_analyzer.analyzer(message, language_data['start_words'], language_data['grammar'], language_data['counter_grammar'], language_data['stop_words'], language_data['magic_bullet_grammar']) assert analysis[0] == '<nothing_found>' assert analysis[1] == 'hyperthyroidism'
def test_counter_analyzer(): """ Counter analyzer tests """ language_data = text_analyzer.language_data_loader(GRAMMAR_PATH, COUNTER_GRAMMAR_PATH, START_WORDS_PATH, STOP_WORDS_PATH) message = "A new medicine for obesity" analysis = text_analyzer.counter_analyzer(message, 'obesity', language_data['counter_grammar']) assert analysis is False
def test_language_data_loader(): """ Language data loader tests """ language_data = text_analyzer.language_data_loader(GRAMMAR_PATH, COUNTER_GRAMMAR_PATH, START_WORDS_PATH, STOP_WORDS_PATH) assert 'eczema' in language_data['start_words'].keys() assert '^@\\w+$' in language_data['stop_words'] assert '[s] \\w+ed to (healthier|better)( \\S+){0,7} [p]' in language_data[ 'grammar'] assert 'chances for ( \S+){0,5} [p]' in language_data['counter_grammar'] assert r'[s] effective( \w+){0,2} (in|for|to)( \w+){0,5} [p]' in language_data[ 'grammar']
from datetime import datetime from analyzer.engines import user_analyzer from analyzer.engines import text_analyzer ## Initialization ## # User analysis DICTIONARY = user_analyzer.dictionary_parser( './language_data/user_dictionary.txt') LEXICON = user_analyzer.lexicon_generator('./language_data/user_grammar.txt', DICTIONARY) STRING_TWITTER_QUERIES = user_analyzer.string_twitter_queriesParser( './language_data/string_twitter_queries.txt') # Text analysis LANGUAGE_DATA = text_analyzer.language_data_loader( './language_data/grammar.txt', './language_data/counter_grammar.txt', './language_data/start_words.txt', './language_data/stop_words.txt') def nlp_analysis(job_json): """ It takes a job as an input and returns an analysis. """ analysis = dict() # Get 'profile' and 'health_related' user_analysis = user_analyzer.user_analyzer(job_json['user_name'], job_json['user_description'], STRING_TWITTER_QUERIES, LEXICON) analysis['profile'] = user_analysis[1]