Esempio n. 1
0
def simulate_main(triple):
    fast_search = 'true'
    obj_a = Argument(triple[1].lower().strip())
    obj_b = Argument(triple[2].lower().strip())
    aspects = [Aspect(triple[3].lower(), 5)]
    model = 'bow'
    # model = 'infersent'
    load_config()
    if aspects:
        json_compl_triples = request_es_triple(obj_a, obj_b, aspects)
    json_compl = request_es_ML(fast_search, obj_a, obj_b)

    if aspects:
        all_sentences = extract_sentences(json_compl_triples)
        all_sentences.extend(extract_sentences(json_compl))
    else:
        all_sentences = extract_sentences(json_compl)

    remove_questions(all_sentences)

    prepared_sentences = prepare_sentence_DF(all_sentences, obj_a, obj_b)

    classification_results = classify_sentences(prepared_sentences, model)

    final_dict = evaluate(all_sentences, prepared_sentences,
                          classification_results, obj_a, obj_b, aspects)

    a_aspect_score = 0
    if triple[3] in final_dict['object1']['points']:
        a_aspect_score = final_dict['object1']['points'][triple[3]]
    b_aspect_score = 0
    if triple[3] in final_dict['object2']['points']:
        b_aspect_score = final_dict['object2']['points'][triple[3]]

    return [a_aspect_score, b_aspect_score]
Esempio n. 2
0
def make_scores_cam(query, titles, answers):
    print ("make_scores_cam")
    query = (cleanhtml(query))
    answers_clean = [(cleanhtml(answer)) for answer in answers]
    titles_clean = [(cleanhtml(title)) for title in titles]
    obj1, obj2, pred, asp = extract_objs_asp(extr, query)
    print ("obj1, obj2, pred, asp", obj1, obj2, pred, asp, "\n")
    number_of_comparative_sentences = []
    for ind, answer in enumerate(answers_clean):
        sentenсes = [titles[ind]] + nltk.tokenize.sent_tokenize(answer)
        dframe = prepare_sentence_DF(sentenсes, obj1, obj2)
        answ = classify_sentences(dframe, 'infersent')
        filt = (answ["BETTER"] >= 0.2) | (answ["WORSE"] >= 0.2)
        new_answ_df = answ.where(filt)
        new_answ_df = new_answ_df.dropna()
        number_of_comparative_sentences.append(len(new_answ_df))
    return number_of_comparative_sentences
Esempio n. 3
0
def cam():
    '''
    to be visited after a user clicked the 'compare' button.
    '''
    load_config()

    fast_search = request.args.get('fs')
    obj_a = Argument(request.args.get('objectA').lower().strip())
    obj_b = Argument(request.args.get('objectB').lower().strip())
    aspects = extract_aspects(request)
    model = request.args.get('model')
    statusID = request.args.get('statusID')

    if model == 'default' or model is None:
        # json obj with all ES hits containing obj_a, obj_b and a marker.
        setStatus(statusID, 'Request ES')
        json_compl = request_es(fast_search, obj_a, obj_b)

        # list of all sentences containing obj_a, obj_b and a marker.
        setStatus(statusID, 'Extract sentences')
        all_sentences = extract_sentences(json_compl)

        # removing sentences that can't be properly analyzed
        setStatus(statusID, 'Clear sentences')
        all_sentences = clear_sentences(all_sentences, obj_a, obj_b)

        # find the winner of the two objects
        setStatus(statusID, 'Find winner')
        return jsonify(find_winner(all_sentences, obj_a, obj_b, aspects))

    else:
        setStatus(statusID, 'Request all sentences containing the objects')
        if aspects:
            json_compl_triples = request_es_triple(obj_a, obj_b, aspects)
        json_compl = request_es_ML(fast_search, obj_a, obj_b)

        setStatus(statusID, 'Extract sentences')
        if aspects:
            all_sentences = extract_sentences(json_compl_triples)
            all_sentences.extend([
                sentence for sentence in extract_sentences(json_compl)
                if sentence.text not in
                [sentence.text for sentence in all_sentences]
            ])
        else:
            all_sentences = extract_sentences(json_compl)

        if len(all_sentences) == 0:
            return jsonify(find_winner(all_sentences, obj_a, obj_b, aspects))

        remove_questions(all_sentences)

        setStatus(statusID, 'Prepare sentences for classification')
        prepared_sentences = prepare_sentence_DF(all_sentences, obj_a, obj_b)

        setStatus(statusID, 'Classify sentences')
        classification_results = classify_sentences(prepared_sentences, model)

        setStatus(statusID, 'Evaluate classified sentences; Find winner')
        final_dict = evaluate(all_sentences, prepared_sentences,
                              classification_results, obj_a, obj_b, aspects)

        return jsonify(final_dict)
Esempio n. 4
0
def one_liner(obj_a, obj_b, user, password, w2v_model=None):
    obj_a = Argument(obj_a.lower().strip())
    obj_b = Argument(obj_b.lower().strip())
    print("Requesting Elasticsearch")
    json_compl = request_elasticsearch(obj_a, obj_b, 'reader', 'reader')

    print("Preparing sentences")

    all_sentences = extract_sentences(json_compl)
    remove_questions(all_sentences)
    prepared_sentences = prepare_sentence_DF(all_sentences, obj_a, obj_b)

    print("Classifying comparative sentences")

    classification_results = classify_sentences(prepared_sentences, 'bow')

    comparative_sentences = prepared_sentences[
        classification_results['max'] != 'NONE']
    comparative_sentences['max'] = classification_results[
        classification_results['max'] != 'NONE']['max']

    print("Looking for keyphrases")

    text = prepared_sentences[
        classification_results['max'] != 'NONE']['sentence'].str.cat(sep=' ')

    extractor = MultipartiteRank()
    extractor.load_document(input=text,
                            language="en",
                            normalization='stemming')

    extractor.candidate_selection(pos={'NOUN', 'PROPN', 'ADJ'})

    extractor.candidate_weighting()

    keyphrases = extractor.get_n_best(n=-1, stemming=False)

    if w2v_model is None:
        print("Loading w2v")
        w2v_model = gensim.models.KeyedVectors.load_word2vec_format(
            'GoogleNews-vectors-negative300.bin', binary=True)
        # w2v_model = api.load('word2vec-google-news-300')

    print("Preparing keyphrases for classification")

    asp_df = pd.DataFrame(
        columns=['OBJECT A', 'OBJECT B', 'ASPECT', 'SENTENCE', 'max'])
    forbidden_phrases = [obj_a.name, obj_b.name, 'better', 'worse']

    for index, row in comparative_sentences.iterrows():
        sentence = row['sentence']
        for (keyphrase, score) in keyphrases:
            skip_keyphrase = False
            for phrase in forbidden_phrases:
                if keyphrase == phrase:
                    skip_keyphrase = True
                    break
            if not skip_keyphrase:
                if keyphrase in sentence:
                    asp_df = asp_df.append(
                        {
                            'OBJECT A': row['object_a'],
                            'OBJECT B': row['object_b'],
                            'ASPECT': keyphrase,
                            'SENTENCE': row['sentence'],
                            'max': row['max'],
                        },
                        ignore_index=True)

    asp_df['TOKENS'] = pd.Series(get_list_of_tokens(asp_df))
    X_asp = to_w2v_matrix(asp_df, w2v_model)

    print("Classifying keyphrases")

    filename = 'asp_clf.pkl'
    model = pickle.load(open(filename, 'rb'))

    y_pred = model.predict(X_asp)
    aspects = asp_df.iloc[np.nonzero(y_pred)[0].tolist()]['ASPECT'].unique()

    print("Determining the winner")

    obj_a_aspects = []
    obj_b_aspects = []
    for aspect in aspects:
        rows = asp_df[asp_df['ASPECT'] == aspect]
        if obj_a.name == rows.iloc[0]['OBJECT A']:
            obj_a_aspects.append(aspect)
        else:
            obj_b_aspects.append(aspect)

    comparing_pair = {}
    if len(obj_a_aspects) > len(obj_b_aspects):
        comparing_pair['winner_aspects'] = obj_a_aspects
        comparing_pair['loser_aspects'] = obj_b_aspects
        comparing_pair['winner'] = obj_a.name
        comparing_pair['loser'] = obj_b.name
    else:
        comparing_pair['winner_aspects'] = obj_b_aspects
        comparing_pair['loser_aspects'] = obj_a_aspects
        comparing_pair['winner'] = obj_b.name
        comparing_pair['loser'] = obj_a.name

    print("Generating response")

    response = generate_template(comparing_pair, mode="extended")

    print("Generating summary")

    rows = asp_df[asp_df.ASPECT.isin(aspects)]

    sentences = ""
    for row in range(rows.shape[0]):
        sentence = asp_df.iloc[row]['SENTENCE'] + " "
        if sentence not in sentences:
            sentences += sentence

    summary = ""

    if len(split_sentences(sentences)) > 10:
        summary = str(summarize(sentences, split=False, word_count=50))
    else:
        summary = sentences

    return response, summary