Beispiel #1
0
def intents_list():
    file_name = request.args.get("file", "")
    file_path = path.join(DATA_DIR, escape(file_name))
    if not path.exists(file_path) or not path.isfile(file_path):
        return BadRequest('File not found')

    df = cache.get_df_from_file(file_name)
    return jsonify(list(sorted(set(df['Intent']))))
def intents_trend():
    file_name = request.args.get("file", "")
    # only = request.args.get("only", "").split(",")
    intents = request.args.get("intents", "").split(",")
    period = request.args.get("period", "D").upper()

    file_path = path.join(DATA_DIR, escape(file_name))
    if not path.exists(file_path) or not path.isfile(file_path):
        return BadRequest('File not found')

    if not period in ['D', 'M']:
        return BadRequest('Invalid frequency in request')

    intents = [intent for intent in intents if len(intent.strip()) > 0]
    if len(intents) == 0:
        return BadRequest('No intent specified in request')

    df = cache.get_df_from_file(file_name)

    data = []

    for intent in intents:
        intent = intent.lower().strip()

        tmp_df = df.copy()
        tmp_df['Intent'] = tmp_df.apply(lambda x: x['Intent'].lower().strip(),
                                        axis=1)
        tmp_df = tmp_df[tmp_df['Intent'] == intent]

        intent_result = tmp_df.groupby(
            tmp_df.index.to_period(period)).count()[['Intent']]
        intent_result.index = pd.to_datetime(
            intent_result.index.to_timestamp())
        intent_result.index = intent_result.apply(
            lambda x: x.index.strftime("%Y-%m-%d %H:%M:%S"))

        data_series = {
            'x': [item[0] for item in intent_result.index],
            'y': list(intent_result['Intent']),
            'type': 'scatter',
            'name': f'"{intent}" triggers'
        }

        data.append(data_series)

    return jsonify({
        'data': data,
        'layout': {
            'title': 'Triggers over time',
            'displaylogo': False
        }
    })
Beispiel #3
0
def words_trend():
    file_name = request.args.get("file", "")
    # only = request.args.get("only", "").split(",")
    words = request.args.get("words", "").split(",")
    period = request.args.get("period", "D").upper()

    file_path = path.join(DATA_DIR, escape(file_name))
    if not path.exists(file_path) or not path.isfile(file_path):
        return BadRequest('File not found')

    if not period in ['D', 'M']:
        return BadRequest('Invalid frequency in request')

    words = [word for word in words if len(word.strip()) > 0]
    if len(words) == 0:
        return BadRequest('No word specified in request')

    df = cache.get_df_from_file(file_name)
    
    data = []

    for word in words:
        word = word.lower().strip()
        word = vn_utils.remove_tone_marks(word)

        tmp_df = df.copy()
        tmp_df['Count'] = tmp_df.apply(lambda x: count_occurences(x['User Message'], word), axis=1)
        
        word_result = tmp_df.groupby(tmp_df.index.to_period(period)).sum()[['Count']]
        word_result.index = pd.to_datetime(word_result.index.to_timestamp())
        word_result.index = word_result.apply(lambda x: x.index.strftime("%Y-%m-%d %H:%M:%S"))

        data_series = {
            'x': [item[0] for item in word_result.index],
            'y': list(word_result['Count']),
            'type': 'scatter',
            'name': f'"{word}" mentions'
        }
        
        data.append(data_series)

    return jsonify({
        'data': data,
        'layout': {
            'title': 'Mentions over time',
            'displaylogo': False
        }
    })
def top_intents():
    file_name = request.args.get("file", "")
    only = request.args.get("only", "").split(',')
    top_n = int(request.args.get("top_n", "-1"))

    file_path = path.join(DATA_DIR, escape(file_name))
    if not path.exists(file_path) or not path.isfile(file_path):
        return BadRequest('File not found')

    df = cache.get_df_from_file(file_name)
    intents_list = list(set(df['Intent']))
    intents_list = [item for item in intents_list if item in only]
    intents_list = list(
        sorted([{
            'name': item,
            'count': len(df[df['Intent'] == item])
        } for item in intents_list],
               key=lambda item: -item['count']))

    # plot a pie chart

    if top_n > 0:
        intents_list = intents_list[:top_n]

    return jsonify({
        'list': intents_list,
        'plot': {
            'data': [{
                'values': [item.get('count', 0) for item in intents_list],
                'labels': [item.get('name', '') for item in intents_list],
                'type': 'pie',
                'textinfo': 'label+percent',
                'textposition': 'outside',
                'automargin': True
            }],
            'layout': {
                'showlegend': False,
                'displaylogo': False
            }
        }
    })
Beispiel #5
0
def top_words():
    file_name = request.args.get("file", "")
    only = request.args.get("only", "").split(',')
    top_n = int(request.args.get("top_n", "-1"))

    file_path = path.join(DATA_DIR, escape(file_name))
    if not path.exists(file_path) or not path.isfile(file_path):
        return BadRequest('File not found')

    df = cache.get_df_from_file(file_name)
    texts = utils.get_text_list_from_df(df, is_in=only)

    wcount = {}
    for sent in texts:
        for word in sent:
            word = word.strip().lower()

            if word in string.punctuation:
                continue

            word = vn_utils.remove_tone_marks(word)
            if word not in wcount:
                wcount[word] = 1
            else:
                wcount[word] += 1

    response = list(
        sorted([{
            'word': key,
            'count': value
        } for key, value in wcount.items()],
               key=lambda item: -item['count']))

    if top_n > 0:
        response = response[:top_n]

    return jsonify(response)
    x_min, x_max = np.min(X_tsne, 0), np.max(X_tsne, 0)
    X_tsne = (X_tsne - x_min) / (x_max - x_min)

    return X_tsne, y_pred


if __name__ == '__main__':
    print(args)

    file_name = args.file_name
    only_fallback = args.only_fallback
    only_fallback = only_fallback.lower() in ['1', 'true']

    # file_path = path.join(DATA_DIR, file_name)

    df = cache.get_df_from_file(file_name)

    if only_fallback:
        text_list = utils.get_text_list_from_df(
            df, is_in=ignore_lists.FALLBACK_INTENTS_LIST)
    else:
        text_list = utils.get_text_list_from_df(df)

    X = utils.get_sentence_vectors(text_list)

    X_tsne, y_pred = visualize_matrix(X)

    viz_groups = {}

    for i in range(X_tsne.shape[0]):
        # item_value = {