def __init__(self, reader, extractors, estimator, epochs, learning_rate, window_size, name_model, reader_file, optimizer): self.reader = reader self.extractors = extractors self.estimator = estimator self.label_extractor = LabelExtractor(self.reader.getPosition('LABEL')) # estimator params self.epochs = epochs self.learning_rate = learning_rate self.window_size = window_size self.name_model = name_model self.reader_file = reader_file self.optimizer_type = optimizer
class Classifier(object): def __init__(self, reader, extractors, estimator, epochs, learning_rate, window_size, name_model, reader_file, optimizer): self.reader = reader self.extractors = extractors self.estimator = estimator self.label_extractor = LabelExtractor(self.reader.getPosition('LABEL')) # estimator params self.epochs = epochs self.learning_rate = learning_rate self.window_size = window_size self.name_model = name_model self.reader_file = reader_file self.optimizer_type = optimizer def predict(self): self.reader.load(pickle.load(open(self.reader_file))) sentences, _ = self.reader.read() X = [] for sentence in sentences: feats = [] for extractor in self.extractors: feats.append(extractor.extract(sentence, self.reader.vocabulary)) X.append([el for el in zip(*feats)]) self.estimator = self.estimator( epochs = self.epochs, num_labels = len(self.reader.vocabulary[self.reader.getPosition('LABEL')]), learning_rate = self.learning_rate, window_size = self.window_size, num_feats= len(self.extractors), name_model = self.name_model ) predicted = self.estimator.predict(X) return predicted def train(self): sentences, labels = self.reader.read() X = [] y = [] for sentence, listLabels in zip(sentences, labels): feats = [] for extractor in self.extractors: feats.append(extractor.extract(sentence, self.reader.vocabulary)) X.append([el for el in zip(*feats)]) y.append(self.label_extractor.extract(listLabels, self.reader.vocabulary)) pickle.dump(self.reader.dump(), open(self.reader_file, 'wb')) # splitting in train and dev train_dataset = X[int(len(X)*0.3):] train_labels = y[int(len(X)*0.3):] dev_dataset = X[0:int(len(X)*0.3)] dev_labels = y[0:int(len(X)*0.3)] self.estimator = self.estimator( epochs = self.epochs, num_labels = len(self.reader.vocabulary[self.reader.getPosition('LABEL')]), learning_rate = self.learning_rate, window_size = self.window_size, num_feats= len(self.extractors), name_model = self.name_model ) path = self.estimator.train(train_dataset, train_labels, dev_dataset, dev_labels) return path