Beispiel #1
0
def barplot(id):
    '''
        Get doc id
        & return barplot image from
        :param '/barplot/<string:id>': url + doc id
        :type '/barplot/<string:id>': str
        :return: Response(img, mimetype='image/png')
        :rtype: img object
    '''
    # get document by id
    selection_obj = SelectionAnalytics()
    res = selection_obj.get_document_by_id(id)
    tokens_list = res['_source']['doc_token']
    # data
    counter = Counter()
    counter.update(tokens_list)
    most_common = counter.most_common(25)
    df = pd.DataFrame(most_common, columns=['token', 'score'])
    scores = df['score']
    words = df['token']
    # figure
    fig = Figure(figsize=(8, 8))
    y_pos = np.arange(len(words))
    ax = fig.add_subplot(111)
    ax.barh(y_pos, scores, align='center')
    ax.set_yticks(y_pos)
    ax.set_yticklabels(words)
    ax.invert_yaxis()  # labels read top-to-bottom
    ax.set_xlabel('Scores')
    ax.set_title('Words frequencies')
    output = io.BytesIO()
    FigureCanvas(fig).print_png(output)
    # return barplot image
    return Response(output.getvalue(), mimetype='image/png')
Beispiel #2
0
def wordcloud_png(id):
    '''
        Get doc id
        & return wordcloud image from
        :param '/wordcloud/<string:id>': url + doc id
        :type '/wordcloud/<string:id>': str
        :return: Response(img, mimetype='image/png')
        :rtype: img object
    '''
    # get document by id
    selection_obj = SelectionAnalytics()
    res = selection_obj.get_document_by_id(id)
    tokens_list = res['_source']['doc_token']
    # create wordcloud
    text_wordcloud = ' '.join(tokens_list)
    wordcloud = WordCloud(max_font_size=50,
                          max_words=100,
                          width=800,
                          height=600,
                          margin=0,
                          background_color="white").generate(text_wordcloud)
    # save wordcloud image
    img = BytesIO()
    wordcloud.to_image().save(img, 'PNG')
    img.seek(0)
    # return wordcloud image
    return Response(img, mimetype='image/png')
Beispiel #3
0
def statistics(id):
    '''
        Get doc id
        & return doc data & id
        :param '/statistics/<string:id>': url + doc id
        :type '/statistics/<string:id>': str
        :return: data, id
        :rtype: dict, str
        :return: render_template("statistics.html")
        :rtype: html page
    '''
    if id:
        # get document by id
        selection_obj = SelectionAnalytics()
        res = selection_obj.get_document_by_id(id)
        # data for barplot
        tokens_list = res['_source']['doc_token']
        # data
        counter = Counter(tokens_list)
        # counter.update(tokens_list)
        most_common = counter.most_common(25)
        df = pd.DataFrame(most_common, columns=['token', 'score'])
        scores = df['score']
        words = df['token']
        scores_list = df.to_dict('records')
        scores_list = sorted(scores_list, key=itemgetter('score'))
        print(scores_list)
        return render_template("statistics.html",
                               data=res['_source'],
                               id=id,
                               scores_list=scores_list)
    else:
        return "ERROR"
Beispiel #4
0
def exploration():
    '''
        Get tokens_search from session
        & return exploration template
        (List of documents links)
        :param '/exploration': url
        :type '/exploration': str
        :return: data, hits, wanted, docs
        :rtype: list, int, int, list
        :return: render_template('home.html')
        :rtype: html page
    '''
    # get tokens_search from session
    if 'tokens_search' in session:
        # tokens_search = session['tokens_search']
        # return render_template("exploration.html", data=tokens_search)
        tokens_search = ','.join(session['tokens_search'])
        selection_obj = SelectionAnalytics()
        nb_docs = 50
        res = selection_obj.get_documents(tokens_search, nb_docs)
        # print(res)
        return render_template("exploration.html",
                               data=tokens_search,
                               hits=res[0],
                               wanted=res[1],
                               docs=res[2])
    else:
        return "null"
Beispiel #5
0
def infos():
    # get sections scores
    selection_obj = SelectionAnalytics()
    # data for piechart
    sections_scores = selection_obj.count_by_sections()
    # data for linegraph
    dates_scores = selection_obj.count_by_dates()
    return render_template('infos.html',
                           sections_scores=sections_scores,
                           dates_scores=dates_scores)
def get_document_by_id():
    '''
        get_documents
        :param id_doc: id_doc
        :type id_doc: str
        :return: doc
        :rtype: dict
    '''
    # params get_document_by_id()
    param_id_doc = 'YPETBXYBFL8Y9aYU1wrA'
    # use function get_document_by_id()
    selection_obj = SelectionAnalytics()
    res = selection_obj.get_document_by_id(id)
    # test res is dict
    assert isinstance(res, dict)
def get_documents():
    '''
        get_documents
        :param string_search: tokens to search
        :type string_search: str
        :param nb_wanted: total docs wanted
        :type nb_wanted: int
        :return: (hits, nb_wanted, documents_list)
        :rtype: tuple
    '''
    # params get_documents()
    param_string_search = 'enseignants, écoles, éducation'
    param_nb_wanted = 10
    # SelectionAnalytics instance
    process_doc = SelectionAnalytics()
    # use function get_documents()
    res = selection_obj.get_documents(tokens_search, nb_docs)
    # test res is tuple
    assert isinstance(res, tuple)
    # test res[0] is int
    assert isinstance(res[0], int)
    # test res[1] is int
    assert isinstance(res[1], int)
    # test res[2] is list
    assert isinstance(res[2], list)
Beispiel #8
0
def home():
    '''
        Page 'Home'
        :param url: url
        :type ur: str
        :return: render_template('home.html')
        :rtype: html page
    '''
    # get sections scores
    selection_obj = SelectionAnalytics()
    # data for piechart
    sections_scores = selection_obj.count_by_sections()
    # data for linegraph
    dates_scores = selection_obj.count_by_dates()
    return render_template('home.html',
                           sections_scores=sections_scores,
                           dates_scores=dates_scores)
Beispiel #9
0
def document(id):
    '''
        Get doc id
        & return doc data & id
        :param '/document/<string:id>': url + doc id
        :type '/document/<string:id>': str
        :return: data, id
        :rtype: dict, str
        :return: render_template("document.html")
        :rtype: html page
    '''
    if id:
        selection_obj = SelectionAnalytics()
        res = selection_obj.get_document_by_id(id)
        return render_template("document.html", data=res['_source'], id=id)
    else:
        return "ERROR"
Beispiel #10
0
def modeling():
    '''
        Page 'Modeling'
        :param '/modeling': url
        :type ur: str
        :return: sections, random_key
        :rtype: list, str
        :return: render_template('home.html')
        :rtype: html page
    '''
    # select sections lists
    selection_obj = SelectionAnalytics()
    sections_list = selection_obj.get_elements_list("section")
    # random key for select form
    random_item_key = random.choice(sections_list[:8])['key']
    return render_template("modeling.html",
                           sections=sections_list,
                           random_key=random_item_key)
def test_get_elements_list():
    '''
        test get_elements_list
        :param element_name: element_name
        :type element_name: str
        :return: elements_list
        :rtype: list of dics (doc_count & key)
    '''
    # param get_elements_list()
    param = 'section'
    # SelectionAnalytics instance
    process_doc = SelectionAnalytics()
    # use function get_elements_list()
    result_get_elements_list = process_doc.get_elements_list(param)
    # test result of function get_elements_list is list
    assert isinstance(result_get_elements_list, list) == True
    # test elements result are dict
    assert isinstance(result_get_elements_list[0], dict) == True
    # test 1st elements of dict is int
    assert isinstance(result_get_elements_list[0]['doc_count'], int) == True
    # test 2nd elements of dict is string
    assert isinstance(result_get_elements_list[0]['key'], str) == True
def test_get_custom_corpus_list():
    '''
        test get_custom_corpus
        :param section_name: section_name
        :type section_name: str
        :param query_size: query_size
        :type query_size: int
        :return: (custom_corpus, total_hits)
        :rtype: tuple (custom_corpus, total_hits)
    '''
    # params get_custom_corpus()
    param_section_name = 'emploi'
    param_query_size = '10'
    # SelectionAnalytics instance
    process_doc = SelectionAnalytics()
    # use function get_custom_corpus()
    corpus = process_doc.get_custom_corpus(param_section_name,
                                           param_query_size)
    # test result of function get_custom_corpus is tuple
    assert isinstance(corpus, tuple) == True
    # test corpus[0] is list
    assert isinstance(corpus[0], list) == True
    # test corpus[1] is int
    assert isinstance(corpus[1], int) == True
Beispiel #13
0
def topics():
    '''
        Corpus creation & making topic modeling
        :param '/topics': url
        :type '/topics': str
        :return: jsonify({'topics': topics})
        :rtype: json object
    '''
    if request.method == "POST":
        req = request.form.to_dict()
        params = {
            "sel_twords": int(req['sel_twords']),
            "sel_model": req['sel_model'],
            "sel_section": req['sel_section'],
            "sel_topics": int(req['sel_topics']),
            "sel_docs": int(req['sel_docs'])
        }
        if (req and params['sel_model'] == 'LDA'):
            selection_obj = SelectionAnalytics()
            # custom corpus
            corpus = selection_obj.get_custom_corpus_list(
                params["sel_section"], params["sel_docs"])
            # model obj
            model_obj = TopicsModelingLDA(params["sel_twords"],
                                          params["sel_topics"], corpus)
            # data fit
            data_fitted = model_obj.fit_data()
            # get topics
            topics = model_obj.get_topics()
            # set session variable topics_params
            session['topics_params'] = params
            return jsonify({'topics': topics})
        elif (req and params['sel_model'] == 'NMF'):
            selection_obj = SelectionAnalytics()
            # custom corpus
            corpus = selection_obj.get_custom_corpus_list(
                params["sel_section"], params["sel_docs"])
            # model obj
            model_obj = TopicsModelingNMF(params["sel_twords"],
                                          params["sel_topics"], corpus)
            # data fit
            data_fitted = model_obj.fit_data()
            # get topics
            topics = model_obj.get_topics()
            # set session variable topics_params
            session['topics_params'] = params
            return jsonify({'topics': topics})
        else:
            return jsonify({'topics': None})