def test_add(self): ids = IdManager() assert 'a' not in ids ids.add_token('a') assert 'a' in ids ids.add_token('a') assert len(ids) == 1 ids.add_sent(['b', 'c']) assert len(ids) == 3 for i in ['b', 'c']: assert i in ids
class SimpleIntent(object): """General intent used to match sentences or phrases""" LENIENCE = 0.6 def __init__(self, name=''): self.name = name self.ids = IdManager(Ids) self.net = None def match(self, sent): return max(0, self.net.run(self.vectorize(sent))[0]) def vectorize(self, sent): vector = self.ids.vector() unknown = 0 for token in sent: if token in self.ids: self.ids.assign(vector, token, 1.0) else: unknown += 1 if len(sent) > 0: self.ids.assign(vector, Ids.unknown_tokens, unknown / float(len(sent))) self.ids.assign(vector, Ids.w_1, len(sent) / 1) self.ids.assign(vector, Ids.w_2, len(sent) / 2.) self.ids.assign(vector, Ids.w_3, len(sent) / 3.) self.ids.assign(vector, Ids.w_4, len(sent) / 4.) return vector def configure_net(self): self.net = fann.neural_net() self.net.create_standard_array([len(self.ids), 10, 1]) self.net.set_activation_function_hidden( fann.SIGMOID_SYMMETRIC_STEPWISE) self.net.set_activation_function_output( fann.SIGMOID_SYMMETRIC_STEPWISE) self.net.set_train_stop_function(fann.STOPFUNC_BIT) self.net.set_bit_fail_limit(0.1) def train(self, train_data): for sent in train_data.my_sents(self.name): self.ids.add_sent(sent) inputs = [] outputs = [] def add(vec, out): inputs.append(self.vectorize(vec)) outputs.append([out]) def pollute(sent, p): sent = sent[:] for _ in range(int((len(sent) + 2) / 3)): sent.insert(p, ':null:') add(sent, self.LENIENCE) def weight(sent): def calc_weight(w): return pow(len(w), 3.0) total_weight = 0.0 for word in sent: total_weight += calc_weight(word) for word in sent: weight = 0 if word.startswith('{') else calc_weight(word) add([word], weight / total_weight) for sent in train_data.my_sents(self.name): add(sent, 1.0) weight(sent) if not any(word[0] == ':' for word in sent): pollute(sent, 0) pollute(sent, len(sent)) for sent in train_data.other_sents(self.name): add(sent, 0.0) add([], 0.0) inputs, outputs = resolve_conflicts(inputs, outputs) train_data = fann.training_data() train_data.set_train_data(inputs, outputs) for _ in range(10): self.configure_net() self.net.train_on_data(train_data, 1000, 0, 0) self.net.test_data(train_data) if self.net.get_bit_fail() == 0: break def save(self, prefix): prefix += '.intent' self.net.save(str(prefix + '.net')) # Must have str() self.ids.save(prefix) @classmethod def from_file(cls, name, prefix): prefix += '.intent' self = cls(name) self.net = fann.neural_net() self.net.create_from_file(str(prefix + '.net')) # Must have str() self.ids.load(prefix) return self