def __init__(self, pos, chunk, event, classify, mallet_memory='256m'):
     self.clear_line_counter()
     
     self.posTagger = pos_tagger_stdin.PosTagger() if pos else None
     self.chunkTagger = chunk_tagger_stdin.ChunkTagger() if chunk and pos else None
     self.eventTagger = event_tagger_stdin.EventTagger() if event and pos else None
     self.llda = GetLLda() if classify else None
     
     if pos and chunk:
         self.ner_model = 'ner.model'
     elif pos:
         self.ner_model = 'ner_nochunk.model'
     else:
         self.ner_model = 'ner_nopos_nochunk.model'
     
     self.ner = GetNer(self.ner_model, memory=mallet_memory)
     self.fe = Features.FeatureExtractor('%s/data/dictionaries' % (BASE_DIR))
     self.capClassifier = cap_classifier.CapClassifier()
     self.vocab = Vocab('%s/hbc/data/vocab' % (BASE_DIR))
     
     self.dictMap = {}
     self.dictMap = self.dictMap
     i = 1
     for line in open('%s/hbc/data/dictionaries' % (BASE_DIR)):
         dictionary = line.rstrip('\n')
         self.dictMap[i] = dictionary
         i += 1
     
     self.dict2index = {}
     for i in self.dictMap.keys():
         self.dict2index[self.dictMap[i]] = i
     
     if self.llda:
         self.dictionaries = Dictionaries('%s/data/LabeledLDA_dictionaries3' % (BASE_DIR), self.dict2index)
     self.entityMap = {}
     i = 0
     if self.llda:
         for line in open('%s/hbc/data/entities' % (BASE_DIR)):
             entity = line.rstrip('\n')
             self.entityMap[entity] = i
             i += 1
     
     self.dict2label = {}
     for line in open('%s/hbc/data/dict-label3' % (BASE_DIR)):
         (dictionary, label) = line.rstrip('\n').split(' ')
         self.dict2label[dictionary] = label
예제 #2
0
    def __init__(self):
        self.numberLines = 0
        self.eventTagger = None
        self.posTagger = None
        self.chunkTagger = None
        self.llda = GetLLda()
        self.ner = GetNer('ner_nopos_nochunk.model')
        self.fe = Features.FeatureExtractor('%s/data/dictionaries' %
                                            (BASE_DIR))
        self.capClassifier = cap_classifier.CapClassifier()

        self.vocab = Vocab('%s/hbc/data/vocab' % (BASE_DIR))

        self.dictMap = {}
        i = 1
        for line in open('%s/hbc/data/dictionaries' % (BASE_DIR)):
            dictionary = line.rstrip('\n')
            self.dictMap[i] = dictionary
            i += 1

        dict2index = {}
        for i in self.dictMap.keys():
            dict2index[self.dictMap[i]] = i

        if self.llda:
            self.dictionaries = Dictionaries(
                '%s/data/LabeledLDA_dictionaries3' % (BASE_DIR), dict2index)
        self.entityMap = {}
        i = 0
        if self.llda:
            for line in open('%s/hbc/data/entities' % (BASE_DIR)):
                entity = line.rstrip('\n')
                self.entityMap[entity] = i
                i += 1

        self.dict2label = {}
        for line in open('%s/hbc/data/dict-label3' % (BASE_DIR)):
            (dictionary, label) = line.rstrip('\n').split(' ')
            self.dict2label[dictionary] = label
예제 #3
0
def displayScores():
    for item in computedScores:
	print(str(item) + " : " + str(computedScores[item]))


posTagger = pos_tagger_stdin.PosTagger()
chunkTagger = chunk_tagger_stdin.ChunkTagger()
eventTagger = event_tagger_stdin.EventTagger()
llda = GetLLda()

ner_model = 'ner.model'
ner = GetNer(ner_model)
fe = Features.FeatureExtractor('%s/data/dictionaries' % (BASE_DIR))


capClassifier = cap_classifier.CapClassifier()

vocab = Vocab('%s/hbc/data/vocab' % (BASE_DIR))

dictMap = {}
i = 1
for line in open('%s/hbc/data/dictionaries' % (BASE_DIR)):
    dictionary = line.rstrip('\n')
    dictMap[i] = dictionary
    i += 1

dict2index = {}
for i in dictMap.keys():
    dict2index[dictMap[i]] = i

if llda: