Ejemplo n.º 1
0
def get_point_chart_data():
    selected_corpus_id = request.args.get('corpus')

    point_data = []
    # get from database about positions
    criteria = {"corpus_id": selected_corpus_id}
    positions_docu = list(repo_find_documents("token_positions", criteria))[0]
    lst_theme = list(repo_find_documents("themes", criteria))

    dict_theme_tokens = {}
    for theme_doc in lst_theme:
        dict_theme_tokens[theme_doc["theme"]] = theme_doc["tokens"]

    for token, position in positions_docu.items():
        if token.startswith("_") or token == "corpus_id":
            continue

        x = position[0]
        y = position[1]
        theme = service_get_theme_by_token(token, dict_theme_tokens)
        entry = {}
        entry["theme"] = theme
        entry["token"] = token
        entry["x"] = x
        entry["y"] = y
        point_data.append(entry)

    return jsonify(point_data)
Ejemplo n.º 2
0
def get_corpus_queries():
    selected_corpus_id = request.args.get('corpus')
    query_infos = []
    criteria = {"corpus_id": selected_corpus_id}
    print(selected_corpus_id)
    lst_query = list(repo_find_documents('queries', criteria))
    for one_query in lst_query:
        entry = {}
        entry["query_id"] = one_query["_id"]
        entry["question"] = one_query["question"]
        query_infos.append(entry)

    return jsonify(query_infos)
Ejemplo n.º 3
0
def get_line_chart_data():
    selected_corpus_id = request.args.get('corpus')

    line_data = []
    # get from database
    criteria = {"corpus_id": selected_corpus_id}
    docu = list(repo_find_documents("cluster_analysis", criteria))[0]
    lst_cluster = docu["clusters"]
    lst_elbow_val = docu["elbow_values"]
    lst_sc_scores = docu["sc_scores"]

    for i in range(0, len(lst_cluster)):
        entry = {}
        entry["num_cluster"] = lst_cluster[i]
        entry["elobow"] = lst_elbow_val[i]
        entry["SC_score"] = lst_sc_scores[i]
        line_data.append(entry)

    return jsonify(line_data)
Ejemplo n.º 4
0
def get_bar_chart_data():
    selected_corpus_id = request.args.get('corpus')
    selected_theme = request.args.get('theme')

    # loading indexing
    # path = '../model/out/'
    # with open(path + selected_corpus_id + '_en.pickle', 'rb') as file:
    #     en_inversed_index = pickle.load(file)
    # with open(path + selected_corpus_id + '_fr.pickle', 'rb') as file:
    #     fr_inversed_index = pickle.load(file)

    en_index_doc = repo_find_document_by_id("index_en", selected_corpus_id)
    fr_index_doc = repo_find_document_by_id("index_fr", selected_corpus_id)
    en_inversed_index = en_index_doc.get("inversed_index")
    fr_inversed_index = fr_index_doc.get("inversed_index")

    bar_data = []
    # get from database about every theme cover what words
    criteria = {"theme": selected_theme, "corpus_id": selected_corpus_id}
    docu = list(repo_find_documents("themes", criteria))[0]
    lst_tokens = docu["tokens"]
    for t in lst_tokens:
        postings = service_get_postings_by_token(t, en_inversed_index,
                                                 fr_inversed_index)
        entry = {}
        entry["token"] = t
        entry["count"] = len(postings)
        bar_data.append(entry)

    # sort bar data based on "count"
    service_bubble_sort(bar_data, "count")

    del en_inversed_index
    del fr_inversed_index

    return jsonify(bar_data)
Ejemplo n.º 5
0
def get_analysis_data():
    selected_corpus_id = request.args.get('corpus')
    result = {}
    # get pie chart data
    pie_data = []

    # loading indexing : upgrade in v2
    # path = '../model/out/'
    # with open(path + selected_corpus_id + '_en.pickle', 'rb') as file:
    #     en_inversed_index = pickle.load(file)
    # with open(path + selected_corpus_id + '_fr.pickle', 'rb') as file:
    #     fr_inversed_index = pickle.load(file)
    en_index_doc = repo_find_document_by_id("index_en", selected_corpus_id)
    fr_index_doc = repo_find_document_by_id("index_fr", selected_corpus_id)
    en_inversed_index = en_index_doc.get("inversed_index")
    fr_inversed_index = fr_index_doc.get("inversed_index")

    # read from db
    criteria = {"corpus_id": selected_corpus_id}
    lst_theme = list(repo_find_documents("themes", criteria))

    for one_theme_doc in lst_theme:
        accumulator = 0
        theme = one_theme_doc["theme"]
        lst_tokens = one_theme_doc["tokens"]
        for t in lst_tokens:
            temp_postings = service_get_postings_by_token(
                t, en_inversed_index, fr_inversed_index)
            accumulator = accumulator + len(temp_postings)
        # pack result in dict
        entry = {}
        entry["item"] = theme
        entry["count"] = accumulator
        pie_data.append(entry)

    result["pie_chart_data"] = pie_data

    # get bar chart data matching largest pie
    bar_data = []
    max_theme = ''
    max_count = 0
    for d in pie_data:  # get theme with max token_num
        if max_count < d.get("count"):
            max_count = d.get("count")
            max_theme = d.get("item")

    lst_tokens = []
    for one_theme_doc in lst_theme:  # get token list of max_theme
        if one_theme_doc["theme"] == max_theme:
            lst_tokens = one_theme_doc["tokens"]
            break

    for t in lst_tokens:
        postings = service_get_postings_by_token(t, en_inversed_index,
                                                 fr_inversed_index)
        entry = {}
        entry["token"] = t
        entry["count"] = len(postings)
        bar_data.append(entry)

    # sort bar data based on "count"
    service_bubble_sort(bar_data, "count")
    result["largest_theme"] = max_theme
    result["bar_chart_data"] = bar_data

    # release index
    del en_inversed_index
    del fr_inversed_index

    return jsonify(result)