Example #1
0
def ini_mss2015_data(root_path, out_path):
    #__processing_using_ros()
    #__processing_using_nltk()
    os.makedirs(out_path)
    #__stop_word = read_file("./third_part/dict/stop_list.txt")
    for cur_file in os.listdir(root_path):
        out_dir_name = cur_file
        out_dir_path = os.path.join("%s/%s" % (out_path, out_dir_name))
        os.mkdir(out_dir_path)
        content = read_file(root_path + "/" + cur_file + "/" + cur_file + ".txt")
        write_file(content, out_dir_path + "/" + out_dir_name + ".txt", False)
        # start generate temp file
        tokenized_paper = read_file(root_path + "/" + cur_file + "/lemmatized_body.temp")
        remove_stop = []
        segmented_paper = []
        no_bracket_str = []
        section_set = []
        tmp_str = ""
        tmp_removed_str = ""
        tmp_no_bracket_str = ""
        __brackets = False
        tmp_int = 0
        for word in tokenized_paper:
            if word == "(" or word == u"(":
                __brackets = True
            elif word == ")" or word == u")":
                __brackets = False
            #if word not in __stop_word:
            tmp_removed_str += word + " "
            if __brackets:
                tmp_str += word + " "
                continue
            if word != "#":
                tmp_no_bracket_str += word + " "
                tmp_str += word + " "
            if word.endswith(".") or word in [u"。", u"!", u"?", u";", u"#"]:
                if tmp_removed_str != "":
                    segmented_paper.append(tmp_str)
                    remove_stop.append(tmp_removed_str)
                    no_bracket_str.append(tmp_no_bracket_str)
                    tmp_int += 1
                if word == "#":
                    section_set.append(str(tmp_int - 1))
                tmp_str = ""
                tmp_removed_str = ""
                tmp_no_bracket_str = ""
        section_set.append(str(len(segmented_paper)))
        write_file(remove_stop, out_dir_path + "/RemoveStop.temp", False)
        write_file(segmented_paper, out_dir_path + "/word_segment.temp", False)
        write_file(no_bracket_str, out_dir_path + "/word_remove_bracket.temp", False)
        titles = read_file(root_path + "/" + cur_file + "/tokenized_title.temp")
        write_file([" ".join(titles)], out_dir_path + "/titles.temp", False)
        write_file(section_set, out_dir_path + "/sec_idx.temp", False)
        model_temp(segmented_paper, out_dir_path)

    return ""
Example #2
0
    def test(self):
        """
        :param root_path, file path of hLDA results, must contains runXXX
        :return:
        """
        run_path = self.__root_path + "/run000"
        # path assign
        mode_assign = read_file(run_path + "/mode.assign")
        # word level assign
        mode_levels = read_file(run_path + "/mode.levels")
        # word list
        word_list = read_file(self.__root_path + "/words.temp")
        # mode.temp
        model_temp = read_file(self.__root_path + "model.temp")

        # sentences paths
        self.path_list = dict()
        self.nodes = dict()
        self.node_word_freq = dict()
        for idx in range(len(mode_assign)):
            line = mode_assign[idx]
            new_path = " ".join(line.split(" ")[2:])
            if new_path not in self.path_list:
                self.path_list[new_path] = []
            if idx in self.__candidata:
                self.path_list[new_path].append(idx)
        self.__ori_allocation = np.array(
            [float(len(self.path_list[i])) for i in self.path_list])
        self.__ori_allocation /= (np.sum(self.__ori_allocation))
        self.__cur_allocation = np.zeros(self.__ori_allocation.shape).tolist()
        # print path
        # for path in sorted(self.path_list.items(), key=lambda x: len(x[1])):
        #     print path[0], "\t: ", str(len(path[1])), path[1]

        for level in range(3):
            for path in mode_assign:
                cur_node = path.split(" ")[2:level + 3]
                if " ".join(cur_node) not in self.nodes:
                    self.nodes[" ".join(cur_node)] = []
                    self.node_word_freq[" ".join(cur_node)] = []
                for i in range(
                        len(mode_levels[int(path.split(" ")[0])].split(" "))):
                    word = mode_levels[int(path.split(" ")[0])].split(" ")[i]
                    if int(word.split(":")[1]) == level:
                        self.nodes[" ".join(cur_node)].append(word_list[int(
                            word.split(":")[0])])
                        # self.node_word_freq[" ".join(cur_node)].append(model_temp[int(path.split(" ")[0])].split(" ")[i + 1])
        for node in self.nodes:
            print node, ": ", "\t".join(
                self.nodes[node])  # , "\t".join(self.node_word_freq[node])
Example #3
0
def __processing_using_nltk(original_path,
                            data_backup_path):
    px = ParseXML()
    for cur_file in os.listdir(original_path):
        dir_name = cur_file.split(".")[0]
        if dir_name == "test":
            continue
        out_path = os.path.join("%s/%s/" % (data_backup_path, dir_name))
        if os.path.exists(out_path + "/tokenized_body.temp") and \
                os.path.exists(out_path + "/tokenized_title.temp") and False:
            continue
        if not os.path.exists(out_path):
            os.makedirs(out_path)
        #px.parse(os.path.join("%s/%s" % (original_path, cur_file)))
        cur_content = read_file(os.path.join("%s/%s" % (original_path, cur_file)))
        contents = cur_content
        #print contents
        titles = cur_content[0]
        print titles
        word_segmented = word_tokenize(" ".join(contents))
        write_file(contents, os.path.join("%s/%s.txt" % (out_path, dir_name)), False)
        write_file(word_segmented, out_path + "/tokenized_paper.temp", False)
        write_file(word_segmented, out_path + "/lemmatized_paper.temp", False)
        write_file(word_tokenize(" ".join(titles)), out_path + "/tokenized_title.temp", False)
        write_file(word_tokenize(" ".join(titles)), out_path + "/lemmatized_title.temp", False)
        write_file(word_tokenize(" ".join(cur_content)), out_path + "/tokenized_body.temp", False)
        write_file(word_tokenize(" ".join(cur_content)), out_path + "/lemmatized_body.temp", False)
Example #4
0
 def launch_multiling_single_summary(self, dic_path):
     self.__rouge_path = ini_rouge_data(name_suffix=self.feature_merge +
                                        "-" + "-" + self.summary_method)
     path_dir = os.listdir(dic_path)
     for cur_lang in path_dir:
         if cur_lang not in ["zh", "en"]:
             continue
         lang_dir = os.path.join("%s/%s" % (dic_path, cur_lang))
         self.__all_conf = []
         # get target length of current language
         self.__target_len = dict()
         for line in read_file(self.__target_len_dir + cur_lang + ".txt"):
             self.__target_len[line.split("_")[0]] = int(line.split(",")[1])
         # get summary of current file(cur_file)
         for cur_file in os.listdir(lang_dir):
             self.max_sum_len__ = self.__target_len[cur_file]
             child_path = os.path.join('%s/%s/%s/' %
                                       (dic_path, cur_lang, cur_file))
             self.__child_path = child_path
             log.info(child_path)
             self.get_mss_paper_summary(cur_lang, cur_file)
         # write_file(self.__all_conf, self.__rouge_path + cur_lang + "/configure/.configure_all_" + cur_lang + ".txt", False)
         # if not os.path.exists(self.__rouge_path + cur_lang + "/output"):
         #     os.makedirs(self.__rouge_path + cur_lang + "/output")
     return self.__rouge_path
Example #5
0
def get_lda_input_format():
    file_content = read_file(
        "..\\data\\sample_datas\\evasampledata4-TaskAA.txt")
    lda_format = list()
    cur_target = ""
    count = 0
    for cur_line in file_content:
        if cur_line == "":
            continue
        (id_, target, text, stance) = cur_line.split("\t")
        if cur_target == "":
            cur_target = target
            count = 0
        if cur_target != target:
            cur_target = target
            count = 0
        if count < 500:
            lda_format.append(text)
            count += 1


#        if target != "IphoneSE":
#            continue
#        lda_format.append(" ".join(jieba.cut(text)))
    write_file(lda_format, "../nlpcc2016.txt")
Example #6
0
def __extract_rouge_result(result_path):
    log.info("extracting rouge result to log.")
    if not os.path.isdir(result_path + "output/"):
        log.error("result path is not a dictionary")
    for cur_file in os.listdir(result_path + "output/"):
        cur_rouge_value = read_file(
            os.path.join("%s/%s/%s" % (result_path, 'output', cur_file)))
        log.debug(cur_file + "\n" + "\n".join(cur_rouge_value))
Example #7
0
    def get_mss_paper_summary(self, file_name, if_write_file=True):
        """
        generate summary for one paper, single document summarization
        :param lang:
        :param file_name: current file name, used for write summary answer
        :param if_write_file: whether write generated summary to answer file named file_name
        :return:
        """
        # initial
        self.__quality, self.__paper_name = None, file_name
        self.quality_method__ = ""

        if self.stop_word_method == "remove_stop":
            self.__paper = read_file(self.__child_path + "RemoveStop.temp")
        elif self.stop_word_method == "with_stop":
            self.__paper = read_file(self.__child_path + "word_segment.temp")
        self.__titles = read_file(self.__child_path + "titles.temp")
        self.__paper_original = read_file(self.__child_path +
                                          "word_segment.temp")
        self.__sub_paper_len = [
            int(i) for i in read_file(self.__child_path + "sec_idx.temp")
        ]

        # extract sentence
        feature_subset, eig = self.__cal_candidate_set()
        #print len(feature_subset)
        #        feature_subset = range(len(self.__paper_original))
        #        eig = []
        log.info(feature_subset)
        log.debug(eig)
        # use feature list to extract summary
        summary = self.__construct_summary(feature_subset, eig)

        if if_write_file:
            if file_name == '':
                log.error("file name is empty")
                return ""
            # write answer to file for ROUGE
            #print self.__rouge_path
            answer_path = self.__child_path
            write_file(
                summary,
                os.path.join('%s/%s.txt' %
                             (answer_path, file_name + '_result')), False)

        return "".join(summary)
Example #8
0
    def __init(self, root_path):
        """
        :param root_path, file path of hLDA results, must contains runXXX
        :return:
        """
        run_path = root_path + "/run000"
        # path assign
        mode_assign = read_file(run_path + "/mode.assign")
        # word level assign
        mode_levels = read_file(run_path + "/mode.levels")
        # word list
        word_list = read_file(root_path + "/words.temp")

        # sentences paths
        self.path_list = dict()
        for idx in range(len(mode_assign)):
            line = mode_assign[idx]
            new_path = " ".join(line.split(" ")[2:])
            if new_path not in self.path_list:
                self.path_list[new_path] = []
            if idx in self.__candidata:
                self.path_list[new_path].append(idx)
        self.__ori_allocation = np.array(
            [float(len(self.path_list[i])) for i in self.path_list])
        self.__ori_allocation /= (np.sum(self.__ori_allocation))
        self.__cur_allocation = np.zeros(self.__ori_allocation.shape).tolist()
        # print path
        # for path in sorted(self.path_list.items(), key=lambda x: len(x[1])):
        #     print path[0], "\t: ", str(len(path[1])), path[1]
        # for path in self.path_list:
        # if self.path_list[path]
        # print path, len(self.path_list[path]), self.path_list[path]
        # sentence levels
        self.sen_levels = []
        for i in range(len(mode_levels)):
            self.sen_levels.append([])
            word_level = mode_levels[i].split(" ")
            for j in range(3):
                tmp = []
                for word in word_level:
                    w2l = word.split(":")
                    if w2l[1] == str(j):
                        tmp.append(w2l[0])
                self.sen_levels[i].append(tmp)
Example #9
0
def get_rouge_ans():
    memog_res = read_file("../data/2017_results/MSS2017_ROUGE_1.5.7_CI.csv")
    ori_res_value = np.array(
        [line.replace(" ", "").split(",")[4] for line in memog_res])
    ori_team = np.array(
        [line.replace(" ", "").split(",")[0] for line in memog_res])
    ori_priority = np.array(
        [line.replace(" ", "").split(",")[1] for line in memog_res])
    ori_lang = np.array(
        [line.replace(" ", "").split(",")[2] for line in memog_res])
    rouge_n = np.array(
        [line.replace(" ", "").split(",")[3] for line in memog_res])
    for cur_rouge in ["ROUGE-1", "ROUGE-2", "ROUGE-3", "ROUGE-4"]:
        ans = []
        tmp_set = set()
        rouge_idx = np.where(rouge_n == cur_rouge)[0]
        lang = ori_lang[rouge_idx]
        priority = ori_priority[rouge_idx]
        team = ori_team[rouge_idx]
        res_value = ori_res_value[rouge_idx]
        for cur_lang in set(lang):
            print cur_lang
            idx = np.where(lang == cur_lang)[0]
            cur_value = res_value[idx]
            cur_priority = priority[idx]
            cur_team = team[idx]
            tmp_ans = cur_lang + '\t'
            tmp_len = 0
            while tmp_len < len(cur_value):
                idx_min = np.where(cur_value == max(cur_value))[0]
                tmp_len += len(idx_min)
                for idx_1 in idx_min:
                    tmp_ans += cur_team[idx_1] + "-" + cur_priority[
                        idx_1] + '\t'
                cur_value[idx_min] = -1
            ans.append(tmp_ans)
        tmp_set = set([team[i] + "-" + priority[i] for i in range(len(team))])
        write_file(ans, "../" + cur_rouge + "_ans.txt")
        final_ans = []
        final_value = []
        tmp_set = sorted(list(tmp_set))
        final_ans.append("lang," + ",".join(tmp_set))
        for cur_ans in ans:
            ans_list = cur_ans.split('\t')
            print ans_list
            final_tmp_ans = ["" for i in range(len(tmp_set) + 1)]
            final_tmp_ans[0] = ans_list[0]
            for i in range(len(tmp_set)):
                if tmp_set[i] in ans_list:
                    final_tmp_ans[i + 1] = str(
                        np.where(np.array(ans_list) == tmp_set[i])[0][0])
            final_ans.append(",".join(final_tmp_ans))
        write_file(final_ans, "../final_" + cur_rouge + "_ans.csv")
    print "end"
Example #10
0
 def __get_doc2vec_matrix(self, path):
     log.info('use word2vec')
     self.quality_method__ = "word2vec"
     self.distance_method__ = "100"
     word2vec_matrix = read_file(path)
     word2vec_matrix = word2vec_matrix[2:len(word2vec_matrix) - 1]
     self.__key_word = [vec_.split(u" ")[0] for vec_ in word2vec_matrix]
     log.debug("word2vec key words: \n" + "\t".join(self.__key_word))
     word2vec = np.array([(vec_.encode("utf-8")).split(" ")[1:]
                          for vec_ in word2vec_matrix])
     word2vec = word2vec.astype(np.float64)
     return word2vec.dot(word2vec.transpose()) * 1000
Example #11
0
def sentence_cluster(mode_path, run_name):
    assign_path = mode_path + "/" + run_name + "/mode.assign"
    mode_assign = read_file(assign_path)
    cluster_set = [
        " ".join(cur_line.split(" ")[2:]) for cur_line in mode_assign
    ]
    ans = []
    for cluster_id in set(cluster_set):
        idx = np.where(np.array(cluster_set) == cluster_id)[0]
        sen_cluster = [
            int(line.split(" ")[0]) for line in np.array(mode_assign)[idx]
        ]
        ans.append(sen_cluster)
    ans.sort(key=len, reverse=True)
    print ans
    return ans
Example #12
0
def get_memog_ans():
    memog_res = read_file("../data/2017_results/MSS2017_MeMoG_CI_March30.csv")
    res_value = np.array(
        [line.replace(" ", "").split(",")[4] for line in memog_res])
    team = np.array(
        [line.replace(" ", "").split(",")[0] for line in memog_res])
    priority = np.array(
        [line.replace(" ", "").split(",")[1] for line in memog_res])
    lang = np.array(
        [line.replace(" ", "").split(",")[2] for line in memog_res])
    ans = []
    memog_answer_value = []
    tmp_set = set()
    for cur_lang in set(lang):
        print cur_lang
        idx = np.where(lang == cur_lang)[0]
        cur_value = res_value[idx]
        cur_priority = priority[idx]
        cur_team = team[idx]
        tmp_ans = cur_lang + '\t'
        tmp_len = 0
        while tmp_len < len(cur_value):
            idx_max = np.where(cur_value == max(cur_value))[0]
            tmp_len += len(idx_max)
            for idx_1 in idx_max:
                tmp_ans += cur_team[idx_1] + "-" + cur_priority[idx_1] + '\t'
            cur_value[idx_max] = -1
        ans.append(tmp_ans)
    tmp_set = set([team[i] + "-" + priority[i] for i in range(len(team))])
    write_file(ans, "../memog_ans_march30.txt")
    final_ans = []
    tmp_set = sorted(list(tmp_set))
    final_ans.append("lang," + ",".join(tmp_set))
    for cur_ans in ans:
        ans_list = cur_ans.split('\t')
        print ans_list
        final_tmp_ans = ["" for i in range(len(tmp_set) + 1)]
        final_tmp_ans[0] = ans_list[0]
        for i in range(len(tmp_set)):
            if tmp_set[i] in ans_list:
                final_tmp_ans[i + 1] = str(
                    np.where(np.array(ans_list) == tmp_set[i])[0][0])
        final_ans.append(",".join(final_tmp_ans))
    write_file(final_ans, "../final_memog_ans_march30.csv")
    print "end"
Example #13
0
def get_rouge_value():
    memog_res = read_file("../data/2017_results/MSS2017_ROUGE_1.5.7_CI.csv")
    ori_res_value = np.array(
        [line.replace(" ", "").split(",")[4] for line in memog_res])
    ori_team_priority = np.array([
        line.replace(" ", "").split(",")[0] + "-" +
        line.replace(" ", "").split(",")[1] for line in memog_res
    ])
    ori_lang = np.array(
        [line.replace(" ", "").split(",")[2] for line in memog_res])
    rouge_n = np.array(
        [line.replace(" ", "").split(",")[3] for line in memog_res])
    for cur_rouge in ["ROUGE-1", "ROUGE-2", "ROUGE-3", "ROUGE-4"]:
        ans = []
        # get team priority
        tmp_team_priority = []
        rouge_idx = np.where(rouge_n == cur_rouge)[0]
        lang = ori_lang[rouge_idx]
        team_priority = ori_team_priority[rouge_idx]
        res_value = ori_res_value[rouge_idx]
        for cur_lang in set(lang):
            idx = np.where(lang == cur_lang)[0]
            if len(tmp_team_priority) < len(team_priority[idx]):
                tmp_team_priority = team_priority[idx]
        tmp_team_priority = sorted(tmp_team_priority)
        print tmp_team_priority
        print len(tmp_team_priority)

        ans.append("lang\t" + "\t".join(tmp_team_priority))
        for cur_lang in set(lang):
            print cur_lang
            tmp_ans = cur_lang
            idx = np.where(lang == cur_lang)[0]
            cur_lang_value = res_value[idx]
            print team_priority[idx]
            for cur_team_priority in tmp_team_priority:
                idx_1 = np.where(team_priority[idx] == cur_team_priority)[0]
                if len(idx_1) == 0:
                    tmp_ans += "\t-"
                    continue
                tmp_ans += "\t" + str(float(cur_lang_value[idx_1[0]]) * 100)
            ans.append(tmp_ans)
        write_file(ans, "../final_" + cur_rouge + "_value.csv")
    print "end"
Example #14
0
def analyse_rouge_value(file_path, rouge_n):
    log.info("analysing rouge result ...")
    analysed_log = []
    for cur_log_file in os.listdir(file_path):
        if not cur_log_file.endswith(".log"):
            continue
        log_content = read_file(
            os.path.join("%s/%s" % (file_path, cur_log_file)))
        file_name = os.path.basename(cur_log_file)
        tmp_log = ""
        for i in range(len(log_content)):
            if log_content[i].endswith("configure_all.out"):
                tmp_log = "all\t"
                tmp_log += "\t".join(
                    file_name.replace(".log", "").split(".")[3:])
                tmp_log += "\t" + log_content[i + 2].split(" ")[3]
                tmp_log += "\t" + log_content[i + 3].split(" ")[3]
                tmp_log += "\t" + log_content[i + 4].split(" ")[3]
                break
        analysed_log.append(tmp_log)
    write_file(analysed_log, "./data/log_analysis/ana.log", False)
Example #15
0
def get_memog_value():
    memog_res = read_file("../data/2017_results/MSS2017_MeMoG_CI_March30.csv")
    res_value = np.array(
        [line.replace(" ", "").split(",")[4] for line in memog_res])
    team_priority = np.array([
        line.replace(" ", "").split(",")[0] + "-" +
        line.replace(" ", "").split(",")[1] for line in memog_res
    ])
    lang = np.array(
        [line.replace(" ", "").split(",")[2] for line in memog_res])
    ans = []

    # get team priority
    tmp_team_priority = []
    for cur_lang in set(lang):
        idx = np.where(lang == cur_lang)[0]
        if len(tmp_team_priority) < len(team_priority[idx]):
            tmp_team_priority = team_priority[idx]
    tmp_team_priority = sorted(tmp_team_priority)
    print tmp_team_priority
    print len(tmp_team_priority)

    ans.append("lang\t" + "\t".join(tmp_team_priority))
    for cur_lang in set(lang):
        print cur_lang
        tmp_ans = cur_lang
        idx = np.where(lang == cur_lang)[0]
        cur_lang_value = res_value[idx]
        print team_priority[idx]
        for cur_team_priority in tmp_team_priority:
            idx_1 = np.where(team_priority[idx] == cur_team_priority)[0]
            if len(idx_1) == 0:
                tmp_ans += "\t-"
                continue
            tmp_ans += "\t" + str(float(cur_lang_value[idx_1[0]]) * 100)
        ans.append(tmp_ans)
    write_file(ans, "../final_memog_value_march30.csv")
    print "end"
Example #16
0
def get_hlda_message(path):
    mode = read_file(path + "/mode")
    mode_assign = read_file(path + "/mode.assign")
Example #17
0
    def get_mss_paper_summary(self, lang, file_name, if_write_file=True):
        """
        generate summary for one paper, single document summarization
        :param lang:
        :param file_name: current file name, used for write summary answer
        :param if_write_file: whether write generated summary to answer file named file_name
        :return:
        """
        # initial
        self.__quality, self.__paper_name = None, file_name
        self.quality_method__ = ""
        '''
        if DATA == "mms2015":
            self.__all_file.merge_mms_2015(os.path.dirname(self.__child_path), "chinese")
        elif DATA == "mss2017":
            if lang in ["vi", "ka"]:
                self.__all_file.merge_mss_2017(os.path.dirname(self.__child_path))
            else:
                self.__all_file.merge_mss_2017_ros(os.path.dirname(self.__child_path))
        self.__paper_original = self.__all_file.get_merged_paper()
        if self.stop_word_method == "remove_stop":
            self.__paper = self.__all_file.get_filtered_paper()
        elif self.stop_word_method == "with_stop":
            self.__paper = self.__all_file.get_merged_paper()
        self.__titles = self.__all_file.get_titles()
        # used for generate hLDA input file and calculate level method.
        if (not os.path.exists(self.__child_path + "model.temp")) or False:
            write_file(self.__paper, self.__child_path + "RemoveStop.temp", False)
            write_file(self.__paper_original, self.__child_path + "word_segment.temp", False)
            model_temp(self.__paper, self.__child_path)
            return ""
        '''
        if self.stop_word_method == "remove_stop":
            self.__paper = read_file(self.__child_path + "RemoveStop.temp")
        elif self.stop_word_method == "with_stop":
            self.__paper = read_file(self.__child_path + "word_segment.temp")
        self.__titles = read_file(self.__child_path + "titles.temp")
        self.__paper_original = read_file(self.__child_path +
                                          "word_segment.temp")
        self.__sub_paper_len = [
            int(i) for i in read_file(self.__child_path + "sec_idx.temp")
        ]
        # extract sentence
        feature_subset, eig = self.__cal_candidate_set()
        #        feature_subset = range(len(self.__paper_original))
        #        eig = []
        log.error("results is: ")
        log.info(feature_subset)
        log.debug(eig)
        # use feature list to extract summary
        summary = self.__construct_summary(feature_subset, eig, lang)
        if if_write_file:
            if file_name == '':
                log.error("file name is empty")
                return ""
            # write answer to file for ROUGE
            answer_path = self.__rouge_path + lang + "/systems/"
            write_file(summary,
                       os.path.join('%s%s.txt' % (answer_path, file_name)),
                       False)
            '''
            # generate gold summary split by CHAR
            gold_path = self.__rouge_path + lang + "/models/"
            if not os.path.exists(gold_path):
                os.makedirs(gold_path)
            tmp_name = lang + "/" + file_name + "_summary.txt"
            abs_human = read_file('./data/MultiLing2015-MSS/multilingMss2015Eval/summary/' + tmp_name)
            if not os.path.exists(gold_path + file_name + "_summary.txt") and lang != "vi" and lang != 'ka':
                write_file([" ".join(api.tokenize("\n".join(abs_human)))],
                           gold_path + file_name + "_summary.txt", False)
            if lang == "vi":
                write_file(abs_human, gold_path + file_name + "_summary.txt", False)
            # generate configure file of each document for ROUGE
            conf_path = self.__rouge_path + lang + "/configure/"
            if not os.path.exists(conf_path):
                os.makedirs(conf_path)
            tmp_conf_ = answer_path + file_name + ".txt " + gold_path + file_name + "_summary.txt"
            self.__all_conf.append(tmp_conf_)
            write_file([tmp_conf_], os.path.join('%s/%s.txt' % (conf_path, file_name)), False)
            '''

        return "".join(summary)