def tweebo(texts): ''' Given a list of Strings will tokenise, pos tag and then dependecy parse the text using `Tweebo <https://github.com/ikekonglp/TweeboParser>`_ a Tweet specific parser. The Tweebo parser cannot handle no strings therefore a special empty string symbol is required. If one of the texts is an empty String then an empty list will be returned for that index of the returned list. :param texts: The texts that are to be parsed :type text: list :returns: A list of of a list of DependencyToken instances. A list per text \ in the texts argument. :rtype: list ''' def no_text(text): ''' Given a String checks if it is empty if so returns an empty_token else the text that was given. :param text: Text to be checked :type text: String :returns: The text if it is not empty or empty token if it is. :rtype: String ''' empty_token = '$$$EMPTY$$$' if text.strip() == '': return empty_token return text with tempfile.TemporaryDirectory() as working_dir: with tempfile.TemporaryDirectory() as temp_dir: text_file_path = os.path.join(temp_dir, 'text_file.txt') result_file_path = os.path.join(temp_dir, 'text_file.txt.predict') tweebo_dir = full_path( read_config('depdency_parsers')['tweebo_dir']) with open(text_file_path, 'w+') as text_file: for text in texts: text = no_text(text) text_file.write(text) text_file.write('\n') run_script = os.path.join(tweebo_dir, 'python_run.sh') if subprocess.run( ['bash', run_script, text_file_path, working_dir]): with open(result_file_path, 'r') as result_file: return tweebo_post_process(result_file.read()) else: raise SystemError('Could not run the Tweebo run script {}'\ .format(run_script))
def get_lexicon(self): ''' Overrides :py:func@`bella.lexicons.Lexicon.get_lexicon` ''' sentiment_folder = full_path(read_config('lexicons')['hu_liu']) cats = ['positive', 'negative'] word_cat = [] for cat in cats: file_path = os.path.join(sentiment_folder, '{}-words.txt'.format(cat)) with open(file_path, 'r', encoding='cp1252') as senti_file: for line in senti_file: if re.search('^;', line) or re.search(r'^\W+', line): continue line = line.strip() word_cat.append((line.strip(), cat)) return word_cat
def get_lexicon(self): ''' Overrides :py:func:`bella.lexicons.Lexicon.get_lexicon` ''' emotion_file_path = full_path(read_config('lexicons')['nrc_emotion']) word_cat = [] with open(emotion_file_path, 'r', newline='') as emotion_file: tsv_reader = csv.reader(emotion_file, delimiter='\t') for row in tsv_reader: if len(row): word = row[0] cat = row[1] association = int(row[2]) if association: word_cat.append((word, cat)) return word_cat
def tweebo_install(tweebo_func): ''' Python decorator that ensures that `TweeboParser <https://github.com/ikekonglp/TweeboParser>`_ is installed, before running the function it wraps. Returns the given function. :param tweebo_func: A function that uses the Tweebo Parser. :type tweebo_func: function :returns: The given function :rtype: function ''' tweebo_dir = full_path(read_config('depdency_parsers')['tweebo_dir']) # If the models file exists then Tweebo has been installed or failed to # install tweebo_models = os.path.join(tweebo_dir, 'pretrained_models.tar.gz') if not os.path.isfile(tweebo_models): install_script = os.path.join(tweebo_dir, 'install.sh') subprocess.run(['bash', install_script]) return tweebo_func
def get_lexicon(self): ''' Overrides :py:func@`bella.lexicons.Lexicon.get_lexicon` ''' mpqa_file_path = full_path(read_config('lexicons')['mpqa']) word_cats = [] with open(mpqa_file_path, 'r') as mpqa_file: for line in mpqa_file: line = line.strip() if line: key_values = {} for data in line.split(): if '=' in data: key, value = data.split('=') key_values[key] = value word = key_values['word1'] cat = key_values['priorpolarity'] if cat == 'weakneg': cat = key_values['polarity'] word_cats.append((word, cat)) return word_cats