def __init__(self,elog=sys.stdout): self.elog = elog self.weight = None self.aux_weight = None self.avg_weight = None # for store the averaged weights #self.n_class = n_class #self.n_rel = n_rel #self.n_tag = n_tag self._feats_templates_file = _FEATURE_TEMPLATES_FILE self._feature_templates_list = [] self._feats_gen_filename = None self.feats_generator = None self.token_to_concept_table = defaultdict(set) self.pp_count_dict = defaultdict(int) self.total_num_words = 0 self.token_label_set = defaultdict(set) self.class_codebook = None self.feature_codebook = None self.rel_codebook = Alphabet() self.tag_codebook = { 'Concept':Alphabet(), 'ETag':Alphabet(), 'ConstTag':Alphabet(), 'ABTTag':Alphabet() } self.abttag_count = defaultdict(int)
def decode_Alphabet(line): if line.startswith('(None'): return None print(line) alpha = Alphabet() searchTxt = '_index_to_label= {' (index_to_label_text, cnt) = findSubstring(line, searchTxt, '), }', 0, 0) index_to_label_text = index_to_label_text[len(searchTxt):cnt] # print('Alphabet: ') # alpha._index_to_label = decode_dictionary(index_to_label_text) alpha._index_to_label = decode_dictionary_v2(index_to_label_text) # print( 'alpha._index_to_label = ' + str(alpha._index_to_label)) searchTxt = '_label_to_index= {' (label_to_index_text, cnt) = findSubstring(line, searchTxt, '), }', 0, cnt) label_to_index_text = label_to_index_text[len(searchTxt):cnt] # alpha._label_to_index = decode_dictionary(label_to_index_text) alpha._label_to_index = decode_dictionary_v2(label_to_index_text) # print(alpha._label_to_index) searchTxt = 'num_labels= ' (num_label_text, cnt) = findSubstring(line, searchTxt, ')', 0, cnt) num_label_text = num_label_text[len(searchTxt):cnt] alpha.num_labels = int(num_label_text) # print(alpha.num_labels) return alpha
def setup(self,action_type,instances,parser,feature_templates_file=None): if feature_templates_file: self._feats_templates_file = feature_templates_file self.class_codebook = Alphabet.from_dict(dict((i,k) for i,(k,v) in enumerate(ACTION_TYPE_TABLE[action_type])),True) self.feature_codebook = dict([(i,Alphabet()) for i in self.class_codebook._index_to_label.keys()]) self.read_templates() #n_rel,n_tag = self._set_rel_tag_codebooks(instances,parser) n_subclass = self._set_rel_tag_codebooks(instances,parser) self._set_class_weight(self.class_codebook.size(),n_subclass) self._set_statistics(instances) self.output_feature_generator()
def _pruning_abttag(self,threshold=8): pruned_abttag_codebook = Alphabet() for v in self.tag_codebook['ABTTag'].labels(): if self.abttag_count[v] >= 8: pruned_abttag_codebook.add(v) self.tag_codebook['ABTTag'] = pruned_abttag_codebook