Ejemplo n.º 1
0
 def __init__(self,elog=sys.stdout):
     self.elog = elog
     self.weight = None
     self.aux_weight = None
     self.avg_weight = None # for store the averaged weights
     #self.n_class = n_class
     #self.n_rel = n_rel
     #self.n_tag = n_tag
     self._feats_templates_file = _FEATURE_TEMPLATES_FILE
     self._feature_templates_list = []
     self._feats_gen_filename = None
     self.feats_generator = None
     self.token_to_concept_table = defaultdict(set)
     self.pp_count_dict = defaultdict(int)
     self.total_num_words = 0
     self.token_label_set = defaultdict(set)
     self.class_codebook = None
     self.feature_codebook = None
     self.rel_codebook = Alphabet()
     self.tag_codebook = {
         'Concept':Alphabet(),
         'ETag':Alphabet(),
         'ConstTag':Alphabet(),
         'ABTTag':Alphabet()
     }
     self.abttag_count = defaultdict(int)
Ejemplo n.º 2
0
def decode_Alphabet(line):
    if line.startswith('(None'):
        return None

    print(line)

    alpha = Alphabet()

    searchTxt = '_index_to_label= {'
    (index_to_label_text, cnt) = findSubstring(line, searchTxt, '), }', 0, 0)
    index_to_label_text = index_to_label_text[len(searchTxt):cnt]

    # print('Alphabet: ')
    # alpha._index_to_label = decode_dictionary(index_to_label_text)
    alpha._index_to_label = decode_dictionary_v2(index_to_label_text)
    # print( 'alpha._index_to_label = ' + str(alpha._index_to_label))

    searchTxt = '_label_to_index= {'
    (label_to_index_text, cnt) = findSubstring(line, searchTxt, '), }', 0, cnt)
    label_to_index_text = label_to_index_text[len(searchTxt):cnt]

    # alpha._label_to_index = decode_dictionary(label_to_index_text)
    alpha._label_to_index = decode_dictionary_v2(label_to_index_text)
    # print(alpha._label_to_index)

    searchTxt = 'num_labels= '
    (num_label_text, cnt) = findSubstring(line, searchTxt, ')', 0, cnt)
    num_label_text = num_label_text[len(searchTxt):cnt]
    alpha.num_labels = int(num_label_text)
    # print(alpha.num_labels)

    return alpha
Ejemplo n.º 3
0
    def setup(self,action_type,instances,parser,feature_templates_file=None):
        if feature_templates_file:
            self._feats_templates_file = feature_templates_file
        self.class_codebook = Alphabet.from_dict(dict((i,k) for i,(k,v) in enumerate(ACTION_TYPE_TABLE[action_type])),True)
        self.feature_codebook = dict([(i,Alphabet()) for i in self.class_codebook._index_to_label.keys()])
        self.read_templates()

        #n_rel,n_tag = self._set_rel_tag_codebooks(instances,parser)
        n_subclass = self._set_rel_tag_codebooks(instances,parser)
        self._set_class_weight(self.class_codebook.size(),n_subclass)
        self._set_statistics(instances)
        self.output_feature_generator()
Ejemplo n.º 4
0
 def _pruning_abttag(self,threshold=8):
     pruned_abttag_codebook = Alphabet()
     for v in self.tag_codebook['ABTTag'].labels():
         if self.abttag_count[v] >= 8:
             pruned_abttag_codebook.add(v)
     self.tag_codebook['ABTTag'] = pruned_abttag_codebook