Beispiel #1
0
    def bleu_evaluation(self, candidate):

        abstractive_score = None
        extractive_score = None
        """first elemnt in refrences is the abstractive resume and the second one is the extractive resume"""

        # if the list is not empty
        if self.resume_abstractive:

            abstractive_score = sentence_bleu([
                tools.tokenize(
                    tools.text_to_string(
                        './manual_resume_abstractive/' + text + '.txt',
                        is_resume_abstract=True))
                for text in self.resume_abstractive
            ], candidate)
        # if the list is not empty
        if self.resume_extractive:
            #self.clean_extractive(self.corpus)
            extractive_score = sentence_bleu([
                tools.tokenize(
                    tools.text_to_string('./axilaury_extractive/' + text +
                                         '.txt'))
                for text in self.resume_extractive
            ], candidate)

        return (abstractive_score, extractive_score)
    ['meet_116', 'meet_117', 'meet_118', 'meet_119'],
    ['meet_120', 'meet_121', 'meet_122', 'meet_123'],
    ['meet_124', 'meet_125', 'meet_126', 'meet_127'],
    ['meet_128', 'meet_129', 'meet_130', 'meet_131'],
    ['meet_132', 'meet_133', 'meet_134', 'meet_135'],
    ['meet_136', 'meet_137', 'meet_138', 'meet_139'],
    ['meet_140', 'meet_141', 'meet_142', 'meet_143'],
    ['meet_144', 'meet_145', 'meet_146', 'meet_147'],
    ['meet_148', 'meet_149', 'meet_150', 'meet_151']
]

meet_list = ["block_" + str(i + 1) for i in xrange(34)]
print 'meet_list', meet_list

adress_ = "./manual_blocks/"
all_documents = [tools.text_to_string(adress_ + i + '.txt') for i in meet_list]

example = tf_idf_class(all_documents)
tfidf_representation = example.tfidf()

counter = 0

for meet in meet_list:

    print '***********'
    print 'meet', meet

    meet_block = classification_scenario_based[counter]

    file.write(str(meet_block) + '\n')
    file.write('*****************' + '\n')
Beispiel #3
0
#     file.flush()
#
# file.close()

file = open('./result/Row_results/result_relative_1.txt', 'w')
adress_ = "./manuel_corpus/"

file.write(
    'this file includes the tf-idf scores for 138 scenario-based meetings' +
    '\n' +
    'For each meeting, the tf-idf scores are computed by comparing this meeting w.r.t other scenario-based '
    + '\n' +
    'meetings esxcept the meetins with the same order in their bloks.' + '\n')

all_documents_group_1 = [
    tools.text_to_string(adress_ + i + '.txt') for i in m1.itervalues()
]
all_documents_group_2 = [
    tools.text_to_string(adress_ + i + '.txt') for i in m2.itervalues()
]
all_documents_group_3 = [
    tools.text_to_string(adress_ + i + '.txt') for i in m3.itervalues()
]
all_documents_group_4 = [
    tools.text_to_string(adress_ + i + '.txt') for i in m4.itervalues()
    if i is not None
]

all_documents__1 = all_documents_group_2 + all_documents_group_3 + all_documents_group_4
all_documents__2 = all_documents_group_1 + all_documents_group_3 + all_documents_group_4
all_documents__3 = all_documents_group_2 + all_documents_group_1 + all_documents_group_4
Beispiel #4
0
def blocks_4_4_ami(n_words, length= None):
        """
        this function extracts the first
        :return:
        """
        file = open(path + 'result_ami/result_4_4.txt', 'w')
        adress = path + '/manual_corpus/'

        if length is None:
            for meet_list in classification_scenario_based:
                file.write('************************' + str(meet_list) + '*************************************\n')
                all_documents = [tools.text_to_string(adress + i + '.txt') for i in meet_list]
                example = tf_idf_class(all_documents)  # , document_0)
                tfidf_representation = example.tfidf()

                for meet in meet_list:
                    file.write(meet+ '\n')
                    file.write('*****************\n')

                    c = compare_with_AMI_results(meet)
                    c.get_resumes()

                    candidate = c.get_best_k_tfidf(n_words, tfidf_representation[meet_list.index(meet)])
                    file.write('candidate = ' + str(candidate)+ '\n')
                    file.flush()

                    bleu_measure = c.bleu_evaluation(candidate)

                    file.write('score_abstractive = ' + str(bleu_measure[0])+ '\n')
                    file.write('score_extractive = ' + str(bleu_measure[1])+ '\n')
                    file.flush()

                file.write('\n')
                file.flush()
        else:
            for meet_list in classification_scenario_based[0:length]:
                file.write('************************' + str(meet_list) + '*************************************\n')
                all_documents = [tools.text_to_string(adress + i + '.txt') for i in meet_list]
                example = tf_idf_class(all_documents)  # , document_0)
                tfidf_representation = example.tfidf(length)

                for meet in meet_list:
                    file.write(meet+ '\n')
                    file.write('*****************\n')

                    c = compare_with_AMI_results(meet)
                    c.get_resumes()

                    candidate = c.get_best_k_tfidf(n_words, tfidf_representation[meet_list.index(meet)])
                    file.write('candidate = ' + str(candidate)+ '\n')
                    file.flush()

                    bleu_measure = c.bleu_evaluation(candidate)

                    file.write('score_abstractive = ' + str(bleu_measure[0])+ '\n')
                    file.write('score_extractive = ' + str(bleu_measure[1])+ '\n')
                    file.flush()

                file.write('\n')
                file.flush()

        file.close()

        return
Beispiel #5
0
def blocks_ami(n_words, length = None):

    #print('we are inside')

    file = open(path + 'result_ami/result_4_block_scen.txt', 'w')
    adress_ = path + "manual_blocks/"
    meet_list = ["block_" + str(i + 1) for i in xrange(34)]

    all_documents = [tools.text_to_string(adress_ + i + '.txt') for i in meet_list]

    example = tf_idf_class(all_documents)

    counter = 0

    if length is None:
        tfidf_representation = example.tfidf()
        for meet in meet_list:

            #print '***********'
            #print 'meet', meet

            meet_block = classification_scenario_based[counter]

            file.write(str(meet_block) + '\n')
            file.write('*****************' + '\n')
            file.flush()

            c = compare_with_AMI_results(meet_block)
            c.get_resumes()

            candidate = c.get_best_k_tfidf(n_words, tfidf_representation[counter])
            file.write('candidate = ' + str(candidate) + '\n')
            file.flush()

            bleu_measure = c.bleu_evaluation(candidate)

            file.write('score_abstractive = ' + str(bleu_measure[0]) + '\n')
            file.write('score_extractive = ' + str(bleu_measure[1]) + '\n')
            file.write('\n')

            file.flush()

            counter += 1
    else:
        #print('length is not none')
        tfidf_representation = example.tfidf(length)
        for meet in meet_list[0:length]:
            #print '***********'
            #print 'meet', meet

            meet_block = classification_scenario_based[counter]

            file.write(str(meet_block) + '\n')
            file.write('*****************' + '\n')
            file.flush()

            c = compare_with_AMI_results(meet_block)
            c.get_resumes()

            candidate = c.get_best_k_tfidf(n_words, tfidf_representation[counter])
            file.write('candidate = ' + str(candidate) + '\n')
            file.flush()

            bleu_measure = c.bleu_evaluation(candidate)

            file.write('score_abstractive = ' + str(bleu_measure[0]) + '\n')
            file.write('score_extractive = ' + str(bleu_measure[1]) + '\n')
            file.write('\n')

            file.flush()

            counter += 1

    file.close()
Beispiel #6
0
    ['meet_116', 'meet_117', 'meet_118', 'meet_119'],
    ['meet_120', 'meet_121', 'meet_122', 'meet_123'],
    ['meet_124', 'meet_125', 'meet_126', 'meet_127'],
    ['meet_128', 'meet_129', 'meet_130', 'meet_131'],
    ['meet_132', 'meet_133', 'meet_134', 'meet_135'],
    ['meet_136', 'meet_137', 'meet_138', 'meet_139'],
    ['meet_140', 'meet_141', 'meet_142', 'meet_143'],
    ['meet_144', 'meet_145', 'meet_146', 'meet_147'],
    ['meet_148', 'meet_149', 'meet_150', 'meet_151']
]

for meet_list in classification_scenario_based:
    file.write('************************' + str(meet_list) +
               '*************************************\n')
    all_documents = [
        tools.text_to_string(adress + i + '.txt') for i in meet_list
    ]
    example = tf_idf_class(all_documents)  # , document_0)
    tfidf_representation = example.tfidf()

    for meet in meet_list:
        file.write(meet + '\n')
        file.write('*****************\n')

        c = compare_with_AMI_results(meet)
        c.get_resumes()

        candidate = c.get_best_k_tfidf(
            20, tfidf_representation[meet_list.index(meet)])
        file.write('candidate = ' + str(candidate) + '\n')