コード例 #1
0
 def _get_working_year_words(self, year_convert_file=None):
     year_list = TextHelper.get_years_pattern(self.raw_position)
     if len(year_list) == 0:
         default_year_requirement = "[0]"
         self.new_words_list.append(default_year_requirement)
         year_list = [default_year_requirement]
     elif year_convert_file is not None:
         year_convert_dict = StoreHelper.load_data(year_convert_file, {})
         year_list = [
             year_convert_dict[item] for item in year_list
             if item in year_convert_dict
         ]
     return DictHelper.dict_from_count_list(year_list)
コード例 #2
0
 def print_label(label, index_list, cluster_number=None):
     if cluster_number is None:
         label_dict = DictHelper.dict_from_count_list(label)
         print("\t".join([str(i) for i in label]))
         print(label_dict)
         print("max cluster number: %i" % max(label_dict))
         print("min cluster number: %i" % min(label_dict))
         position_tag = {}
         for i in range(len(label)):
             DictHelper.append_dic_key(position_tag, label[i],
                                       int(index_list[i]))
         for key, value in position_tag.items():
             print("%s: %s" % (key, value))
         StoreHelper.store_data(position_tag, 'position_tag.dat')
         StoreHelper.save_file(position_tag, 'position_tag.txt')
     else:
         length = len(label)
         clusters = [[str(j) for j in range(length) if label[j] == i]
                     for i in range(cluster_number)]
         for i in range(len(clusters)):
             print("Cluster %i has %i position, position: %s" %
                   (i, len(clusters[i]), str(clusters[i])))
コード例 #3
0
 def get_frequency_dict(content):
     words_list = []
     for line in content.splitlines():
         words_list.extend(
             SegmentHelper.lemmatization(SegmentHelper.segment_text(line)))
     return DictHelper.dict_from_count_list(words_list)
コード例 #4
0
 def convert_2(self, probability_dict):
     year_phase_list = self._get_working_year_words()
     phrase_list = self._remove_conjunction_segment(probability_dict)
     phrase_list.extend(year_phase_list)
     return DictHelper.dict_from_count_list(phrase_list)
コード例 #5
0
 def __init__(self, raw_position, word_list=[]):
     self.raw_position = raw_position.lower()
     self.word_list = word_list
     self.phrase_dict = DictHelper.dict_from_count_list(self.word_list)
     self.new_words_list = []