def predict(self, data): rows = [] for sentence in data: rows.append(self.features_for_sentence(sentence)) feat_lu = lambda f: {self.vocab[item]:f[item] for item in f if item in self.vocab} rows = [map(feat_lu, x) for x in rows] libml.write_features(self.filename, rows, None, self.type); libml.predict(self.filename, self.type) labels_list = libml.read_labels(self.filename, self.type) for t, labels in labels_list.items(): tmp = [] for sentence in data: tmp.append([labels.pop(0) for i in range(len(sentence))]) tmp[-1] = map(lambda l: l.strip(), tmp[-1]) tmp[-1] = map(lambda l: Model.reverse_labels[int(l)], tmp[-1]) labels_list[t] = tmp return labels_list
def train(self, data, labels): rows = [] for sentence in data: rows.append(self.features_for_sentence(sentence)) for row in rows: for features in row: for feature in features: if feature not in self.vocab: self.vocab[feature] = len(self.vocab) + 1 label_lu = lambda l: Model.labels[l] labels = [map(label_lu, x) for x in labels] feat_lu = lambda f: {self.vocab[item]:f[item] for item in f} rows = [map(feat_lu, x) for x in rows] libml.write_features(self.filename, rows, labels, self.type) with open(self.filename, "w") as model: pickle.dump(self, model) libml.train(self.filename, self.type)