def tweebo(texts):
    '''
    Given a list of Strings will tokenise, pos tag and then dependecy parse
    the text using `Tweebo <https://github.com/ikekonglp/TweeboParser>`_
    a Tweet specific parser.

    The Tweebo parser cannot handle no strings therefore a special empty string
    symbol is required.

    If one of the texts is an empty String then an empty list will be returned
    for that index of the returned list.

    :param texts: The texts that are to be parsed
    :type text: list
    :returns: A list of of a list of DependencyToken instances. A list per text \
    in the texts argument.
    :rtype: list
    '''
    def no_text(text):
        '''
        Given a String checks if it is empty if so returns an empty_token else
        the text that was given.

        :param text: Text to be checked
        :type text: String
        :returns: The text if it is not empty or empty token if it is.
        :rtype: String
        '''

        empty_token = '$$$EMPTY$$$'
        if text.strip() == '':
            return empty_token
        return text

    with tempfile.TemporaryDirectory() as working_dir:
        with tempfile.TemporaryDirectory() as temp_dir:
            text_file_path = os.path.join(temp_dir, 'text_file.txt')
            result_file_path = os.path.join(temp_dir, 'text_file.txt.predict')
            tweebo_dir = full_path(
                read_config('depdency_parsers')['tweebo_dir'])
            with open(text_file_path, 'w+') as text_file:
                for text in texts:
                    text = no_text(text)
                    text_file.write(text)
                    text_file.write('\n')
            run_script = os.path.join(tweebo_dir, 'python_run.sh')
            if subprocess.run(
                ['bash', run_script, text_file_path, working_dir]):
                with open(result_file_path, 'r') as result_file:
                    return tweebo_post_process(result_file.read())
            else:
                raise SystemError('Could not run the Tweebo run script {}'\
                                  .format(run_script))
Beispiel #2
0
    def get_lexicon(self):
        '''
        Overrides :py:func@`bella.lexicons.Lexicon.get_lexicon`
        '''

        sentiment_folder = full_path(read_config('lexicons')['hu_liu'])
        cats = ['positive', 'negative']
        word_cat = []
        for cat in cats:
            file_path = os.path.join(sentiment_folder, '{}-words.txt'.format(cat))
            with open(file_path, 'r', encoding='cp1252') as senti_file:
                for line in senti_file:
                    if re.search('^;', line) or re.search(r'^\W+', line):
                        continue
                    line = line.strip()
                    word_cat.append((line.strip(), cat))
        return word_cat
Beispiel #3
0
    def get_lexicon(self):
        '''
        Overrides :py:func:`bella.lexicons.Lexicon.get_lexicon`
        '''

        emotion_file_path = full_path(read_config('lexicons')['nrc_emotion'])
        word_cat = []

        with open(emotion_file_path, 'r', newline='') as emotion_file:
            tsv_reader = csv.reader(emotion_file, delimiter='\t')
            for row in tsv_reader:
                if len(row):
                    word = row[0]
                    cat = row[1]
                    association = int(row[2])
                    if association:
                        word_cat.append((word, cat))
        return word_cat
def tweebo_install(tweebo_func):
    '''
    Python decorator that ensures that
    `TweeboParser <https://github.com/ikekonglp/TweeboParser>`_ is installed,
    before running the function it wraps. Returns the given function.

    :param tweebo_func: A function that uses the Tweebo Parser.
    :type tweebo_func: function
    :returns: The given function
    :rtype: function
    '''

    tweebo_dir = full_path(read_config('depdency_parsers')['tweebo_dir'])
    # If the models file exists then Tweebo has been installed or failed to
    # install
    tweebo_models = os.path.join(tweebo_dir, 'pretrained_models.tar.gz')
    if not os.path.isfile(tweebo_models):
        install_script = os.path.join(tweebo_dir, 'install.sh')
        subprocess.run(['bash', install_script])
    return tweebo_func
Beispiel #5
0
 def get_lexicon(self):
     '''
     Overrides :py:func@`bella.lexicons.Lexicon.get_lexicon`
     '''
     mpqa_file_path = full_path(read_config('lexicons')['mpqa'])
     word_cats = []
     with open(mpqa_file_path, 'r') as mpqa_file:
         for line in mpqa_file:
             line = line.strip()
             if line:
                 key_values = {}
                 for data in line.split():
                     if '=' in data:
                         key, value = data.split('=')
                         key_values[key] = value
                 word = key_values['word1']
                 cat = key_values['priorpolarity']
                 if cat == 'weakneg':
                     cat = key_values['polarity']
                 word_cats.append((word, cat))
     return word_cats