Esempio n. 1
0
    def run(self, execution_context):
        filtered_word_types = self.get_config('train.filtered_word_types')

        data_name = execution_context.event.get('data_name', 'default')
        text = execution_context.event.get('text')
        if text is None:
            raise ValueError('text cannot be null')

        config = persist.get_data_config(data_name)
        tokenizer = TokenizerFactory.get_tokenizer(config.get('tokenizer'))

        # clean text if requested
        if config.get('clean_text'):
            text = pre_process.clean_text(text, filtered_word_types)

        # tokenize words and predict
        result_word, result_proba = classifier.predict(text, data_name)
        result_proba *= 100

        # pos tagging the text
        tagged_text = self.tag_text(tokenizer(text))

        _, content = self.send_msg(result_word, result_proba, tagged_text)
        content_obj = json.loads(content.decode('utf-8'))

        result = 'predict: ' + result_word + ' with probability: ' \
                    + str((int)(result_proba)) + '%. response from bot: ' \
                    + str(content_obj.get('msg')) \
                    + ' [tagged words: ' + ' ' \
                    + ' '.join(list(map(lambda w: '(' + w[0] + ' ' + w[1] + ')', tagged_text))) \
                    + ']'
        execution_context.finish({
            'raw': result,
            'bot_response': content_obj.get('msg')
        })
Esempio n. 2
0
def run(execution_context):
    """run the action"""
    filtered_word_type = app.get_config('predict.filtered_word_types')
    remove_stop_words = app.get_config('train.remove_stop_words')

    data_name = execution_context.event.get('data_name', 'default')
    text = execution_context.event.get('text')
    if text is None:
        raise ValueError('text cannot be null')

    config = persist.get_data_config(data_name)
    tokenizer = TokenizerFactory.get_tokenizer(config.get('tokenizer'))

    if config.get('clean_text'):
        text = pre_process.clean_text(text, remove_stop_words)

    tokenized_text = tokenizer(text)
    result_word, result_proba = pre_process.predict(tokenized_text, data_name)

    LOGGER.warning('predict %s with probability %2f %%', result_word,
                   result_proba * 100)

    pos_tagged_text = nltk.pos_tag(nltk.word_tokenize(text))
    filtered_text = \
        [(w, word_type) for w, word_type in pos_tagged_text if not word_type in filtered_word_type]

    send_msg(result_word, result_proba, filtered_text)
Esempio n. 3
0
    def run(self, execution_context):
        data_name = execution_context.event.get('data_name', 'default')

        config = persist.get_data_config(data_name)

        reader = self.open_test_file(data_name)
        result = self.test_data(reader, data_name, config)
        execution_context.finish(result)
Esempio n. 4
0
def run(execution_context):
    """run the action"""
    data_name = execution_context.event.get('data_name', 'default')
    config = persist.get_data_config(data_name)

    with open('cache/data/' + data_name + '/raw.csv') as data_file:
        reader = csv.reader(data_file)
        train_data(reader, data_name, config)
Esempio n. 5
0
    def run(self, execution_context, app_context):
        """run the action"""
        data_name = execution_context.event.get('data_name', 'default')
        config = persist.get_data_config(data_name)

        with open('cache/data/' + data_name + '/raw.csv') as data_file:
            reader = csv.reader(data_file)
            self.train_data(reader, data_name, config, app_context)

        execution_context.finish('train done')
Esempio n. 6
0
    def run(self, execution_context, app_context):
        """run the action"""
        data_name = execution_context.event.get('data_name', 'default')

        config = persist.get_data_config(data_name)

        filtered_word_types = app_context.get_config(
            'train.filtered_word_types')

        with open('cache/data/' + data_name + '/test.csv') as data_file:
            reader = csv.reader(data_file)
            result = self.test_data(reader, data_name, config,
                                    filtered_word_types)
            execution_context.finish(result)
Esempio n. 7
0
    def run(self, execution_context, app_context):
        """run the action"""
        event = execution_context.event
        data_name = event.get('data_name', 'default')
        text = event.get('text')

        self.validate(text, app_context)

        data_config = persist.get_data_config(data_name)
        if not data_config.get('allow_teaching'):
            raise ValueError('data_name ' + data_name +
                             ' is not allowed to teach')

        with open("cache/data/" + data_name + '/raw.csv', 'a') as data_file:
            data_file.write(text + '\n')

        with open("cache/data/" + data_name + '/test.csv', 'a') as data_file:
            data_file.write(text + '\n')

        execution_context.finish('teach done')