def __init__(self, words, pos, rels, cpos, langs, w2i, ch, options): import dynet as dy # import here so we don't load Dynet if just running parser.py --help for example global dy self.model = dy.ParameterCollection() self.trainer = dy.AdamTrainer(self.model, alpha=options.learning_rate) self.activations = {'tanh': dy.tanh, 'sigmoid': dy.logistic, 'relu': dy.rectify, 'tanh3': (lambda x: dy.tanh(dy.cwise_multiply(dy.cwise_multiply(x, x), x)))} self.activation = self.activations[options.activation] self.oracle = options.oracle self.headFlag = options.headFlag self.rlMostFlag = options.rlMostFlag self.rlFlag = options.rlFlag self.k = options.k #dimensions depending on extended features self.nnvecs = (1 if self.headFlag else 0) + (2 if self.rlFlag or self.rlMostFlag else 0) self.feature_extractor = FeatureExtractor(self.model,options,words,rels,langs,w2i,ch,self.nnvecs) self.irels = self.feature_extractor.irels mlp_in_dims = options.lstm_output_size*2*self.nnvecs*(self.k+1) self.unlabeled_MLP = MLP(self.model, 'unlabeled', mlp_in_dims, options.mlp_hidden_dims, options.mlp_hidden2_dims, 4, self.activation) self.labeled_MLP = MLP(self.model, 'labeled' ,mlp_in_dims, options.mlp_hidden_dims, options.mlp_hidden2_dims,2*len(self.irels)+2,self.activation)
def train(self, train_x, train_y): x_ids, x_train, y_train = self.get_xys(train_x, train_y) self.get_train_info(x_train, y_train) if self.task_undefined: return if self.classifier_type == 'perceptron': #averaged perceptron from sklearn.linear_model import SGDClassifier self.model = SGDClassifier(loss="perceptron", eta0=1, learning_rate="constant", penalty=None, average=10) self.model.fit(x_train, y_train) elif self.classifier_type == 'mlp': from multilayer_perceptron import MLP data = zip(x_train, y_train) labels = set(y_train) input_size = len(x_train[0]) out_size = len(labels) hidden_size = input_size # same as Adi et al. self.model = MLP(input_size, hidden_size, out_size, labels, epochs=100) self.model.train(data)
def main(): import random from pylab import plot, show def func(x): return math.pow(x, 2.0) - 10.0 * x + 21 # Variando do x' até x'' (3 -> 7), dividido em 100 partes train_set = tuple( ([i], [func(i)]) for i in util.divide_arange(1.0, 9.0, 40)) mlp = MLP(1, [10, 30, 1], ACTIVATIONS_FUNCTIONS['sigmoid'], ACTIVATIONS_FUNCTIONS['custom']) mlp.randomise_weights(lambda: random.uniform(-0.05, 0.05)) sup = Supervisor(mlp, 0.001) sup.train_set(train_set, 0.0005, 10000) validation = tuple( ([x], [func(x)]) for x in util.divide_arange(-1.0, 11.0, 200)) plot([i[0][0] for i in validation], [i[1][0] for i in validation], 'b', [i[0][0] for i in validation], [mlp.predict(i[0]) for i in validation], 'r') show()
def main(): # https://www.mathworks.com/help/deeplearning/ug/improve-neural-network-generalization-and-avoid-overfitting.html;jsessionid=d7ccdb5dad86ecd28c93a845c8c8 def func(x): return 2*math.pow(x, 3) - math.pow(x, 2) + 10*x - 4 train_set = tuple( ([i], [func(i)]) for i in util.divide_arange(-3.0, 3.0, 40) ) import random from pylab import plot, show mlp = MLP(1, [10, 30, 1], ACTIVATIONS_FUNCTIONS['sigmoid'], ACTIVATIONS_FUNCTIONS['linear']) mlp.randomise_weights(lambda: random.uniform(-1.0, 1.0)) sup = Supervisor(mlp, 0.01) sup.train_set(train_set, 0.005, 3000) validation = tuple( ([x], [func(x)]) for x in util.divide_arange(-4.0, 4.0, 200) ) plot( [i[0][0] for i in validation], [i[1][0] for i in validation], 'b', [i[0][0] for i in validation], [mlp.predict(i[0]) for i in validation], 'r' ) show()
def __init__(self, vocab, options): # import here so we don't load Dynet if just running parser.py --help for example from multilayer_perceptron import MLP from feature_extractor import FeatureExtractor import dynet as dy global dy global LEFT_ARC, RIGHT_ARC, SHIFT, SWAP LEFT_ARC, RIGHT_ARC, SHIFT, SWAP = 0, 1, 2, 3 self.model = dy.ParameterCollection() self.trainer = dy.AdamTrainer(self.model, alpha=options.learning_rate) self.activations = { 'tanh': dy.tanh, 'sigmoid': dy.logistic, 'relu': dy.rectify, 'tanh3': (lambda x: dy.tanh(dy.cwise_multiply(dy.cwise_multiply(x, x), x))) } self.activation = self.activations[options.activation] self.oracle = options.oracle self.headFlag = options.headFlag self.rlMostFlag = options.rlMostFlag self.rlFlag = options.rlFlag self.k = options.k self.recursive_composition = options.use_recursive_composition #ugly hack #dimensions depending on extended features self.nnvecs = (1 if self.headFlag else 0) + (2 if self.rlFlag or self.rlMostFlag else 0) + (1 if self.recursive_composition else 0) self.feature_extractor = FeatureExtractor(self.model, options, vocab, self.nnvecs) self.irels = self.feature_extractor.irels if options.no_bilstms > 0: mlp_in_dims = options.lstm_output_size * 2 * self.nnvecs * ( self.k + 1) else: mlp_in_dims = options.lstm_input_size * self.nnvecs * (self.k + 1) self.unlabeled_MLP = MLP(self.model, 'unlabeled', mlp_in_dims, options.mlp_hidden_dims, options.mlp_hidden2_dims, 4, self.activation) self.labeled_MLP = MLP(self.model, 'labeled', mlp_in_dims, options.mlp_hidden_dims, options.mlp_hidden2_dims, 2 * len(self.irels) + 2, self.activation)
def main(): data = load_images_data() data = convert_data(data) input_layer = len(data[0][0]) output_layer = len(data[0][1]) train_set = tuple((pixels, answer) for pixels, answer, name in data) mlp = MLP(input_layer, [input_layer, 8, output_layer], ACTIVATIONS_FUNCTIONS['sigmoid']) sup = Supervisor(mlp) sup.train_set(train_set, 0.001, 100) for input_array, target_array, name in data: output = [round(result) for result in mlp.predict(input_array)] print(f"{name} - Expected={target_array} :: Predicted={output}")
def main(): mlp = MLP(2, [2, 1], ACTIVATIONS_FUNCTIONS['sigmoid']) sup = Supervisor(mlp) train_set = ( ([0, 0], [0]), ([0, 1], [1]), ([1, 0], [1]), ([1, 1], [0]), ) start_time = time.time() sup.train_set(train_set, 0.0005, 10000) end_time = time.time() print(f"Spent time {end_time-start_time}") buffer = [''] * len(train_set) for idx, (input_array, target_array) in enumerate(train_set, 0): output = mlp.predict(input_array) buffer[ idx] = f"{input_array[0]} ^ {input_array[1]} = {output[0]} :: {target_array[0]}" print('\n'.join(buffer))
def finetuning(self, x, labels): # assign weights layers = [x.shape[1]] + [rbm.b.shape[1] for rbm in self.rbms] + [self.n_labels] mlp = MLP(act_type='Sigmoid', opt_type='Adam', layers=layers, epochs=20, learning_rate=0.01, lmbda=1e-2) mlp.w = [rbm.w for rbm in self.rbms] + \ [np.random.randn(self.rbms[-1].w.shape[1], self.n_labels)] mlp.b = [rbm.b for rbm in self.rbms] + \ [np.random.randn(1, self.n_labels)] mlp.fit(x, labels) # give back the weights # add the last feed-forward layer for rbm, w, b in zip(self.rbms, mlp.w[:-1], mlp.b[:-1]): rbm.w = w rbm.b = b self.dense = {'w': mlp.w[-1], 'b': mlp.b[-1]}
def multilayer_perceptron(tweet_features, train_labels): clf_mlp = MLP(n_hidden=100) clf_mlp.fit(tweet_features, train_labels) return clf_mlp
def __init__(self, words, pos, rels, w2i, options): self.model = Model() self.trainer = AdamTrainer(self.model) random.seed(1) #可选的激活函数 self.activations = { 'tanh': tanh, 'sigmoid': logistic, 'relu': rectify, 'cube': cube } self.activation = self.activations[options.activation] self.oracle = options.oracle self.ldims = options.lstm_dims * 2 self.wdims = options.wembedding_dims self.pdims = options.pembedding_dims self.rdims = options.rembedding_dims self.layers = options.lstm_layers self.wordsCount = words self.vocab = {word: ind + 3 for word, ind in w2i.iteritems()} self.pos = {word: ind + 3 for ind, word in enumerate(pos)} self.rels = {word: ind for ind, word in enumerate(rels)} self.irels = rels self.headFlag = options.headFlag self.rlMostFlag = options.rlMostFlag self.rlFlag = options.rlFlag self.k = options.window self.nnvecs = (1 if self.headFlag else 0) + (2 if self.rlFlag or self.rlMostFlag else 0) self.external_embedding = None if options.external_embedding is not None: external_embedding_fp = open(options.external_embedding, 'r') external_embedding_fp.readline() self.external_embedding = { line.split(' ')[0]: [float(f) for f in line.strip().split(' ')[1:]] for line in external_embedding_fp } external_embedding_fp.close() self.edim = len(self.external_embedding.values()[0]) self.noextrn = [0.0 for _ in xrange(self.edim)] self.extrnd = { word: i + 3 for i, word in enumerate(self.external_embedding) } self.elookup = self.model.add_lookup_parameters( (len(self.external_embedding) + 3, self.edim)) for word, i in self.extrnd.iteritems(): self.elookup.init_row(i, self.external_embedding[word]) self.extrnd['*PAD*'] = 1 self.extrnd['*INITIAL*'] = 2 print '载入外部词向量。 向量维度:', self.edim # logger.info('载入外部词向量。 向量维度:%s', self.edim) dims = self.wdims + self.pdims + (self.edim if self.external_embedding is not None else 0) self.blstmFlag = options.blstmFlag self.bibiFlag = options.bibiFlag self.attentionFlag = options.attentionFlag if self.bibiFlag: self.surfaceBuilders = [ VanillaLSTMBuilder(1, dims, self.ldims * 0.5, self.model), VanillaLSTMBuilder(1, dims, self.ldims * 0.5, self.model) ] self.bsurfaceBuilders = [ VanillaLSTMBuilder(1, self.ldims, self.ldims * 0.5, self.model), VanillaLSTMBuilder(1, self.ldims, self.ldims * 0.5, self.model) ] elif self.blstmFlag: if self.layers > 0: self.surfaceBuilders = [ VanillaLSTMBuilder(self.layers, dims, self.ldims * 0.5, self.model), LSTMBuilder(self.layers, dims, self.ldims * 0.5, self.model) ] else: self.surfaceBuilders = [ SimpleRNNBuilder(1, dims, self.ldims * 0.5, self.model), LSTMBuilder(1, dims, self.ldims * 0.5, self.model) ] self.hidden_units = options.hidden_units self.hidden2_units = options.hidden2_units self.vocab['*PAD*'] = 1 self.pos['*PAD*'] = 1 self.vocab['*INITIAL*'] = 2 self.pos['*INITIAL*'] = 2 self.wlookup = self.model.add_lookup_parameters( (len(words) + 3, self.wdims)) self.plookup = self.model.add_lookup_parameters( (len(pos) + 3, self.pdims)) self.rlookup = self.model.add_lookup_parameters( (len(rels), self.rdims)) self.word2lstm = self.model.add_parameters( (self.ldims, self.wdims + self.pdims + (self.edim if self.external_embedding is not None else 0))) self.word2lstmbias = self.model.add_parameters((self.ldims)) self.lstm2lstm = self.model.add_parameters( (self.ldims, self.ldims * self.nnvecs + self.rdims)) self.lstm2lstmbias = self.model.add_parameters((self.ldims)) #正向lstm # 隐藏层第一层 self.hidLayer = self.model.add_parameters( (self.hidden_units, self.ldims * self.nnvecs * (self.k + 1))) self.hidBias = self.model.add_parameters((self.hidden_units)) # 隐藏层第二层 self.hid2Layer = self.model.add_parameters( (self.hidden2_units, self.hidden_units)) self.hid2Bias = self.model.add_parameters((self.hidden2_units)) # 输出层 self.outLayer = self.model.add_parameters( (3, self.hidden2_units if self.hidden2_units > 0 else self.hidden_units)) self.outBias = self.model.add_parameters((3)) #反向lstm self.rhidLayer = self.model.add_parameters( (self.hidden_units, self.ldims * self.nnvecs * (self.k + 1))) self.rhidBias = self.model.add_parameters((self.hidden_units)) self.rhid2Layer = self.model.add_parameters( (self.hidden2_units, self.hidden_units)) self.rhid2Bias = self.model.add_parameters((self.hidden2_units)) self.routLayer = self.model.add_parameters( (2 * (len(self.irels) + 0) + 1, self.hidden2_units if self.hidden2_units > 0 else self.hidden_units)) self.routBias = self.model.add_parameters( (2 * (len(self.irels) + 0) + 1)) #attention层 encoder_input_dim = options.encoder_output_size self.stack_encoder = AttentionNetwork( model=self.model, input_dim=encoder_input_dim, output_dim=options.encoder_output_size, rnn_dropout_rate=0.33) self.buffer_encoder = AttentionNetwork( model=self.model, input_dim=encoder_input_dim, output_dim=options.encoder_output_size, rnn_dropout_rate=0.33) self.unlabeled_MLP = MLP(self.model, 'unlabeled', encoder_input_dim * 2, options.hidden_units, options.hidden2_units, 4, self.activation) self.labeled_MLP = MLP(self.model, 'labeled', encoder_input_dim * 2, options.hidden_units, options.hidden2_units, 2 * len(self.irels) + 2, self.activation)
from mnist import MNIST from multilayer_perceptron import MLP mnist = MNIST(one_hot_encoding=True, z_score=True) X_train = mnist.train_images y_train = mnist.train_labels X_test = mnist.test_images y_test = mnist.test_labels clf = MLP(hidden_layer_sizes=(500, 300), activation='swish', verbose=True) clf.fit(X_train, y_train) test_loss = clf._compute_loss(X_test, y_test) test_acc = clf.score(X_test, y_test) print('\nTest loss: {:.3}\tTest accuracy: {:.3}'.format(test_loss, test_acc))
def test_MLP_model_mnist(dataset_name='mnist.pkl.gz', learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, batch_size=20, n_hidden=500): # Set up the dataset dataset = load_data(dataset_name) # Split the data into a training, validation and test set train_data, train_labels = dataset[0] test_data, test_labels = dataset[1] validation_data, validation_labels = dataset[2] # Compute number of minibatches for each set n_train_batches = train_data.get_value(borrow=True).shape[0] / batch_size n_valid_batches = validation_data.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_data.get_value(borrow=True).shape[0] / batch_size data_dim = (28, 28) # The dimension of each image in the dataset data_classes = 10 # The number of classes within the data # Build the model # --------------- # Allocate symbolic variables for data index = T.lscalar() # This is the index to a minibatch x = T.matrix('x') # Data (rasterized images) y = T.ivector('y') # Labels (1d vector of ints) rng = np.random.RandomState(1234) # Construct MLP class classifier = MLP(rng=rng, input=x, n_in=data_dim[0]*data_dim[1], n_hidden=n_hidden, n_out=data_classes) # Cost to minimize during training # Add regularization terms cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr) # Compile a Theano function that computes mistakes made by the model on a minibatch test_model = th.function(inputs=[index], # This function is for the test data outputs=classifier.errors(y), givens={x: test_data[index * batch_size: (index + 1) * batch_size], y: test_labels[index * batch_size: (index + 1) * batch_size]}) validate_model = th.function(inputs=[index], # This function is for the validation data outputs=classifier.errors(y), givens={x: validation_data[index * batch_size: (index + 1) * batch_size], y: validation_labels[index * batch_size: (index + 1) * batch_size]}) # Compute the gradient of cost with respect to theta grad_params = [T.grad(cost,param) for param in classifier.params] # Specify how to update model parameters as a list of (variable, update expression) pairs updates = [(param, param - learning_rate * grad_param) for param, grad_param in zip(classifier.params, grad_params)] # Compile Theano function that returns the cost and updates parameters of model based on update rules train_model = th.function(inputs=[index], # Index in minibatch that defines x with label y outputs=cost, # Cost/loss associated with x,y updates=updates, givens={x: train_data[index * batch_size: (index + 1) * batch_size], y: train_labels[index * batch_size: (index + 1) * batch_size]}) # Train the model # --------------- # Setup the early-stopping parameters patience = 10000 # Minimum number of examples to examine patience_increase = 2 # How much longer to wait once a new best is found improvement_threshold = 0.995 # Value of a significant relative improvement validation_frequency = min(n_train_batches, patience / 2) # Number of minibatches before validating best_validation_loss = np.inf test_score = 0 start_time = time.clock() # Setup the training loop done_looping = False epoch = 0 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) # Set the iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # Compute the zero-one loss on the validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = np.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # Check if current validation score is the best if this_validation_loss < best_validation_loss: # Improve the patience is loss improvement is good enough if this_validation_loss < best_validation_loss * improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss # Test on test set test_losses = [test_model(i) for i in xrange(n_test_batches)] test_score = np.mean(test_losses) print('epoch %i, minibatch %i/%i, test error of best model %f %%' % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) # Stop the loop if we have exhausted our patience if patience <= iter: done_looping = True break; # The loop has ended so record the time it took end_time = time.clock() # Print out results and timing information print('Optimization complete with best validation score of %f %%, with test performance %f %%' % (best_validation_loss * 100., test_score * 100.)) print 'The code ran for %d epochs with %f epochs/sec' % (epoch, 1. * epoch / (end_time - start_time)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % ((end_time - start_time)))
def __init__(self, vocab, options): # import here so we don't load Dynet if just running parser.py --help for example from multilayer_perceptron import MLP from feature_extractor import FeatureExtractor import dynet as dy global dy global LEFT_ARC, RIGHT_ARC, SHIFT, SWAP LEFT_ARC, RIGHT_ARC, SHIFT, SWAP = 0, 1, 2, 3 global NO_COMP, SOFT_COMP, HARD_COMP, GEN_COMP NO_COMP, HARD_COMP, SOFT_COMP, GEN_COMP = 0, 1, 2, 3 self.composition = options.nucleus_composition all_rels = vocab[5] functional_rels = ['det', 'case', 'clf', 'cop', 'mark', 'aux', 'cc'] if self.composition in [HARD_COMP, SOFT_COMP]: self.compositional_relations = functional_rels elif self.composition in [GEN_COMP]: self.compositional_relations = all_rels else: self.compositional_relations = [] self.compositional_relations_dict = { rel: idx for idx, rel in enumerate(self.compositional_relations) } self.model = dy.ParameterCollection() self.trainer = dy.AdamTrainer(self.model, alpha=options.learning_rate) self.activations = { 'tanh': dy.tanh, 'sigmoid': dy.logistic, 'relu': dy.rectify, 'tanh3': (lambda x: dy.tanh(dy.cwise_multiply(dy.cwise_multiply(x, x), x))) } self.activation = self.activations[options.activation] self.oracle = options.oracle self.headFlag = options.headFlag self.rlMostFlag = options.rlMostFlag self.rlFlag = options.rlFlag self.k = options.k #dimensions depending on extended features self.nnvecs = (1 if self.headFlag else 0) + (2 if self.rlFlag or self.rlMostFlag else 0) self.feature_extractor = FeatureExtractor(self.model, options, vocab, self.nnvecs) self.irels = self.feature_extractor.irels if options.no_bilstms > 0: mlp_in_dims = options.lstm_output_size * 2 * self.nnvecs * ( self.k + 1) else: mlp_in_dims = self.feature_extractor.lstm_input_size * self.nnvecs * ( self.k + 1) print("The size of the MLP input layer is {0}".format(mlp_in_dims)) if self.composition in [SOFT_COMP, GEN_COMP]: rel_emb_sz = 10 self.cmp_rel_lookup = self.model.add_lookup_parameters( (len(self.compositional_relations), rel_emb_sz)) cmb_sz = 2 * 2 * options.lstm_output_size + rel_emb_sz out_sz = 2 * options.lstm_output_size self.combiner_W1 = self.model.add_parameters((out_sz, cmb_sz), name='cmbW1') self.combiner_b1 = self.model.add_parameters(out_sz, name='cmbb1') self.unlabeled_MLP = MLP(self.model, 'unlabeled', mlp_in_dims, options.mlp_hidden_dims, options.mlp_hidden2_dims, 4, self.activation) self.labeled_MLP = MLP(self.model, 'labeled', mlp_in_dims, options.mlp_hidden_dims, options.mlp_hidden2_dims, 2 * len(self.irels) + 2, self.activation)
def __init__(self, words, pos, rels, cpos, langs, w2i, ch, options): """ 0 = LA, 1 = RA, 2 = SH, 3 = SW """ import dynet as dy # import here so we don't load Dynet if just running parser.py --help for example global dy self.model = dy.ParameterCollection() self.trainer = dy.AdamTrainer(self.model, alpha=options.learning_rate) self.activations = { 'tanh': dy.tanh, 'sigmoid': dy.logistic, 'relu': dy.rectify, 'tanh3': (lambda x: dy.tanh(dy.cwise_multiply(dy.cwise_multiply(x, x), x))) } self.activation = self.activations[options.activation] self.oracle = options.oracle self.shareMLP = options.shareMLP self.config_lembed = options.lembed_config #vectors used self.headFlag = options.headFlag self.rlMostFlag = options.rlMostFlag self.rlFlag = options.rlFlag self.k = options.k #dimensions depending on extended features self.nnvecs = (1 if self.headFlag else 0) + (2 if self.rlFlag or self.rlMostFlag else 0) self.feature_extractor = FeatureExtractor(self.model, words, rels, langs, w2i, ch, self.nnvecs, options) self.irels = self.feature_extractor.irels #mlps mlp_in_dims = options.lstm_output_size * 2 * self.nnvecs * (self.k + 1) if self.config_lembed: mlp_in_dims += options.lang_emb_size h1 = options.mlp_hidden_dims h2 = options.mlp_hidden2_dims if not options.multiling or self.shareMLP: self.unlabeled_MLP = MLP(self.model, mlp_in_dims, h1, h2, 4, self.activation) self.labeled_MLP = MLP(self.model, mlp_in_dims, h1, h2, 2 * len(rels) + 2, self.activation) else: self.labeled_mlpdict = {} for lang in self.feature_extractor.langs: self.labeled_mlpdict[lang] = MLP(self.model, mlp_in_dims, h1, h2, 2 * len(rels) + 2, self.activation) self.unlabeled_mlpdict = {} for lang in self.feature_extractor.langs: self.unlabeled_mlpdict[lang] = MLP(self.model, mlp_in_dims, h1, h2, 4, self.activation)
class Classifier(object): def __init__(self, task_type='transitivity', classifier_type='mlp'): self.task_type = task_type self.task_undefined = False self.classifier_type = classifier_type def get_xys(self, x_dict, y_dict): x_list = [] y_list = [] x_ids = [] for key in x_dict: y_avc = y_dict[key[:2]] yItem = self.get_task_y(y_avc, self.task_type) if yItem is not None: y_list.append(yItem) x_list.append(x_dict[key]) x_ids.append(key) return x_ids, x_list, y_list def get_y_gold(self, x_ids, y_gold): return [ self.get_task_y(y_gold[x_id[:2]], self.task_type) for x_id in x_ids ] def get_task_y(self, avc, task_type): #this should not be here or class misnamed if task_type == "transitivity": return sum([avc.has_dobj, avc.has_iobj]) if task_type == "intransitive": return sum([avc.has_dobj, avc.has_iobj]) == 0 elif task_type == "dobj": return int(avc.has_dobj) elif task_type == "iobj": return int(avc.has_iobj) elif task_type == "subj": return int(avc.has_subj) elif task_type == "subj_num": return avc.subj_num elif task_type == "subj_pers": return avc.subj_pers elif task_type == 'subj_n_pers': if avc.subj_num == None and avc.subj_pers == None: return None else: return "".join([ str(i) for i in filter(None, [avc.subj_num, avc.subj_pers]) ]) else: raise Exception("Task unknown: %s" % task_type) def get_train_info(self, x_train, y_train): #check that we have at least 2 classes and 10 training examples if len(set(y_train)) < 2 or len(x_train) < 10: self.task_undefined = True self.trainsize = 0 else: self.trainsize = len(x_train) def train(self, train_x, train_y): x_ids, x_train, y_train = self.get_xys(train_x, train_y) self.get_train_info(x_train, y_train) if self.task_undefined: return if self.classifier_type == 'perceptron': #averaged perceptron from sklearn.linear_model import SGDClassifier self.model = SGDClassifier(loss="perceptron", eta0=1, learning_rate="constant", penalty=None, average=10) self.model.fit(x_train, y_train) elif self.classifier_type == 'mlp': from multilayer_perceptron import MLP data = zip(x_train, y_train) labels = set(y_train) input_size = len(x_train[0]) out_size = len(labels) hidden_size = input_size # same as Adi et al. self.model = MLP(input_size, hidden_size, out_size, labels, epochs=100) self.model.train(data) def predict(self, test_x, test_y): if not self.task_undefined: x_ids, x_test, y_test = self.get_xys(test_x, test_y) y_pred = self.model.predict(x_test) self.testsize = len(x_test) #write pred to file return x_ids, y_pred else: self.testsize = 0 return None, None def evaluate(self, x_ids, pred, all_y_gold): if x_ids is not None and pred is not None: y_gold = self.get_y_gold(x_ids, all_y_gold) return accuracy_score(y_gold, pred) else: return np.nan def majority_baseline(self, train_x, train_y, test_x, test_y): x_ids, x_train, y_train = self.get_xys(train_x, train_y) x_ids, x_test, y_test = self.get_xys(test_x, test_y) y_maj = max(y_train, key=y_train.count) y_maj_pred = [y_maj for i in range(len(y_test))] return accuracy_score(y_test, y_maj_pred)
def __init__(self, vocab, options): # import here so we don't load Dynet if just running parser.py --help for example from multilayer_perceptron import MLP from feature_extractor import FeatureExtractor import dynet as dy global dy global LEFT_ARC, RIGHT_ARC, SHIFT, SWAP LEFT_ARC, RIGHT_ARC, SHIFT, SWAP = 0, 1, 2, 3 self.model = dy.ParameterCollection() self.trainer = dy.AdamTrainer(self.model, alpha=options.learning_rate) self.activations = { 'tanh': dy.tanh, 'sigmoid': dy.logistic, 'relu': dy.rectify, 'tanh3': (lambda x: dy.tanh(dy.cwise_multiply(dy.cwise_multiply(x, x), x))) } self.activation = self.activations[options.activation] self.oracle = options.oracle self.headFlag = options.headFlag self.rlMostFlag = options.rlMostFlag self.rlFlag = options.rlFlag self.k = options.k self.distances = 4 # probe looks at distances between tokens ahead, considering distances: # normalized by the smallest, among: # s0 - b0 # s0 - b1 # b0 - closest bi: if < s0-b0, do a Shift # closest si - b0 : if ~= s0-b0, do a reduce #dimensions depending on extended features self.nnvecs = (1 if self.headFlag else 0) + (2 if self.rlFlag or self.rlMostFlag else 0) self.feature_extractor = FeatureExtractor(self.model, options, vocab, self.nnvecs) self.irels = self.feature_extractor.irels if options.no_bilstms > 0: # number of bilistms mlp_in_dims = options.lstm_output_size * 2 * self.nnvecs * ( self.k + 1) else: mlp_in_dims = self.feature_extractor.lstm_input_size * self.nnvecs * ( self.k + 1) # use attention if options.bert and options.attention: # add attention vectors for stack to top buf and viceversa attention_size = self.k * 2 # all layers #layers = self.feature_extractor.bert.model.config.num_hidden_layers #attention_size = layers * layers * self.k # * 2 mlp_in_dims += attention_size # Sartiano if options.distance_probe_conf: print('Distance Probe enabled', file=sys.stderr) from distance_probe import DistanceProbe self.distance_probe = DistanceProbe(options.distance_probe_conf, options.dynet_seed) mlp_in_dims += self.distances else: self.distance_probe = None self.attention_indices = [ int(x) for x in options.attention.split(',') ] if options.attention else [] self.unlabeled_MLP = MLP(self.model, 'unlabeled', mlp_in_dims, options.mlp_hidden_dims, options.mlp_hidden2_dims, SWAP + 1, self.activation) self.labeled_MLP = MLP(self.model, 'labeled', mlp_in_dims, options.mlp_hidden_dims, options.mlp_hidden2_dims, 2 * len(self.irels) + 2, self.activation) print('MLP size: (%d, %d)' % (mlp_in_dims, options.mlp_hidden_dims), file=sys.stderr)
def test_MLP_model_mnist(dataset_name='mnist.pkl.gz', learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, batch_size=20, n_hidden=500): # Set up the dataset dataset = load_data(dataset_name) # Split the data into a training, validation and test set train_data, train_labels = dataset[0] test_data, test_labels = dataset[1] validation_data, validation_labels = dataset[2] # Compute number of minibatches for each set n_train_batches = train_data.get_value(borrow=True).shape[0] / batch_size n_valid_batches = validation_data.get_value( borrow=True).shape[0] / batch_size n_test_batches = test_data.get_value(borrow=True).shape[0] / batch_size data_dim = (28, 28) # The dimension of each image in the dataset data_classes = 10 # The number of classes within the data # Build the model # --------------- # Allocate symbolic variables for data index = T.lscalar() # This is the index to a minibatch x = T.matrix('x') # Data (rasterized images) y = T.ivector('y') # Labels (1d vector of ints) rng = np.random.RandomState(1234) # Construct MLP class classifier = MLP(rng=rng, input=x, n_in=data_dim[0] * data_dim[1], n_hidden=n_hidden, n_out=data_classes) # Cost to minimize during training # Add regularization terms cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr) # Compile a Theano function that computes mistakes made by the model on a minibatch test_model = th.function( inputs=[index], # This function is for the test data outputs=classifier.errors(y), givens={ x: test_data[index * batch_size:(index + 1) * batch_size], y: test_labels[index * batch_size:(index + 1) * batch_size] }) validate_model = th.function( inputs=[index], # This function is for the validation data outputs=classifier.errors(y), givens={ x: validation_data[index * batch_size:(index + 1) * batch_size], y: validation_labels[index * batch_size:(index + 1) * batch_size] }) # Compute the gradient of cost with respect to theta grad_params = [T.grad(cost, param) for param in classifier.params] # Specify how to update model parameters as a list of (variable, update expression) pairs updates = [(param, param - learning_rate * grad_param) for param, grad_param in zip(classifier.params, grad_params)] # Compile Theano function that returns the cost and updates parameters of model based on update rules train_model = th.function( inputs=[index], # Index in minibatch that defines x with label y outputs=cost, # Cost/loss associated with x,y updates=updates, givens={ x: train_data[index * batch_size:(index + 1) * batch_size], y: train_labels[index * batch_size:(index + 1) * batch_size] }) # Train the model # --------------- # Setup the early-stopping parameters patience = 10000 # Minimum number of examples to examine patience_increase = 2 # How much longer to wait once a new best is found improvement_threshold = 0.995 # Value of a significant relative improvement validation_frequency = min(n_train_batches, patience / 2) # Number of minibatches before validating best_validation_loss = np.inf test_score = 0 start_time = time.clock() # Setup the training loop done_looping = False epoch = 0 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) # Set the iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # Compute the zero-one loss on the validation set validation_losses = [ validate_model(i) for i in xrange(n_valid_batches) ] this_validation_loss = np.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # Check if current validation score is the best if this_validation_loss < best_validation_loss: # Improve the patience is loss improvement is good enough if this_validation_loss < best_validation_loss * improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss # Test on test set test_losses = [ test_model(i) for i in xrange(n_test_batches) ] test_score = np.mean(test_losses) print( 'epoch %i, minibatch %i/%i, test error of best model %f %%' % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) # Stop the loop if we have exhausted our patience if patience <= iter: done_looping = True break # The loop has ended so record the time it took end_time = time.clock() # Print out results and timing information print( 'Optimization complete with best validation score of %f %%, with test performance %f %%' % (best_validation_loss * 100., test_score * 100.)) print 'The code ran for %d epochs with %f epochs/sec' % ( epoch, 1. * epoch / (end_time - start_time)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % ((end_time - start_time)))
def run(num, epochs=33, layer_sizes=None, activation=nn.ELU, use_main_effect_nets=True, num_samples=30000, num_features=10, valid_size=5, test_size=5, std_scale=True, my_data_norm=False, lr=1e-3, l1_const=5e-5, dropout_p=0, early_stopping=False, patience=5, abs_val=True, verbose=True, order=2, o=2, greedy_heuristic=False, gelu_final_layer=False, gelu_last_layer=False, gelu_alt_layer=False, gelu_main_effects=False): # Params device = torch.device("cuda" if args.cuda else "cpu") if layer_sizes is None: layer_sizes = [140, 100, 60, 20] # Data # set_seed(42) X = generate_X(num_samples, num) Y, ground_truth = globals()["f_{}".format(num)](X.transpose()) if my_data_norm: X = np.array(X) X = (X - X.min(0)) / X.ptp(0) data_loaders = preprocess_data(X, Y, valid_size=valid_size, test_size=test_size, std_scale=std_scale, get_torch_loaders=True) X_train = np.concatenate([data[0] for data in data_loaders["train"]], 0) # Model and training model = MLP(num_features, layer_sizes, use_main_effect_nets=use_main_effect_nets, activation=activation, dropout_p=dropout_p, gelu_final_layer=gelu_final_layer, gelu_last_layer=gelu_last_layer, gelu_alt_layer=gelu_alt_layer, gelu_main_effects=gelu_main_effects).to(device) model, mlp_loss = train(model, data_loaders, nepochs=epochs, device=device, learning_rate=lr, l1_const=l1_const, verbose=verbose, early_stopping=early_stopping, patience=patience) # NID AUC model_weights = get_weights(model) pairwise_interactions, _ = get_interactions(model_weights, pairwise=True, one_indexed=True) # Automatically selects the top 100 excluding redundant subsets, and unpruned -- can use internal func to prune mine anyorder_interactions_pruned, anyorder_interactions_unpruned = get_interactions( model_weights, one_indexed=True) anyorder_interactions_unpruned = [ inter for inter in anyorder_interactions_unpruned if len(inter[0]) <= order ] # auc_nid = get_auc(pairwise_interactions, [{i + 1 for i in inter} for inter in ground_truth], verbose=verbose) if order == 2: anyorder_interactions_unpruned = pairwise_interactions # My AUC n_way_NID = set([ tuple([inr - 1 for inr in inter[0]]) for inter in anyorder_interactions_unpruned if len(inter[0]) <= order ]) auc_mine, interactions = test_inputs_n_way( X_train, model, ground_truth, device, abs_val, order, o, n_way_NID, verbose, greedy_heuristic=greedy_heuristic) # auc_mine = aucs1 aucs_nid = [] for nth in interactions: new = copy.deepcopy(anyorder_interactions_unpruned) for interaction in [inr[0] for inr in nth]: if interaction not in n_way_NID: new.append(((inr + 1 for inr in interaction), 0)) # two_and_three_way = [inter for inter in anyorder_interactions_unpruned if len(inter[0]) <= order] # print(set([tuple([inr - 1 for inr in inter[0]]) for inter in new]) == set([inr[0] for inr in nth])) # print([tuple([inr - 1 for inr in inter[0]]) for inter in new]) # print([inr[0] for inr in nth]) auc_nid = [ get_auc([ item for item in list(new) if len(tuple(tuple(item)[0])) == oth + 1 ] if oth + 1 > 2 else pairwise_interactions, [{i + 1 for i in inter} for inter in ground_truth], verbose=verbose) if max([len(g) for g in ground_truth]) >= oth + 1 else 0 for oth in range(1, order) ] aucs_nid.append(auc_nid) # Requires a subset of "detected" higher-order interactions and computes precision (% of those are real) r_prec = get_anyorder_R_precision(anyorder_interactions_pruned, [{i + 1 for i in inter} for inter in ground_truth]) return auc_mine, aucs_nid
def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, dataset='mnist.pkl.gz', batch_size=20, n_hidden=500): """ Run MLP SGD on MNIST :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization) :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: the path of the MNIST dataset file from http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz """ datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) classifier = MLP( rng = rng, input = x, n_in = 28 * 28, #MNIST specific n_hidden = n_hidden, n_out = 10 ) cost = ( classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr ) # classify errors test_model = theano.function( inputs = [index], outputs = classifier.errors(y), givens = { x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] } ) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] } ) # compute gradient of cost with respect to all params gparams = [T.grad(cost, param) for param in classifier.params] updates = [ (param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams) ] train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience // 2) # go through this many # minibatches before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in range(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) & validation_frequency == 0: validation_losses = [validate_model(i) for i in range(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print( 'epoch %i, minibatch %i/%i, validation error %f %%' % ( epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100. ) ) if this_validation_loss < best_validation_loss: if( this_validation_loss < best_validation_loss * improvement_threshold ): patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter # test on test set test_losses = [test_model(i) for i in range(n_test_batches)] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print(('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)
def main(): """ Task Main """ # download and parse Fashion MNIST training set train_set = datasets.FashionMNIST( root='./data', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor() ]) ) # download and parse Fashion MNIST testing set test_set = datasets.FashionMNIST( root='./data', train=False, download=True, transform=transforms.Compose([ transforms.ToTensor() ]) ) # instantiate train data loader train_data_loader = torch.utils.data.DataLoader(train_set, batch_size=1000) # instantiate test data loader test_data_loader = torch.utils.data.DataLoader(test_set, batch_size=1000) # instantiate multilayer perceptron neural network mlp = MLP() # train the multilayer perceptron neural network mlp_performance = mlp.train( train_data_loader, learning_rate=0.01, n_epochs=20) # test the multilayer perceptron neural network mlp.test(test_data_loader) # instantiate base architecture convolutional neural network base_cnn = BaseCNN() # train the base architecture convolutional neural network base_cnn_performance = base_cnn.train( train_data_loader, learning_rate=0.01, n_epochs=20) # test the base architecture convolutional neural network base_cnn.test(test_data_loader) # instantiate the variant 1 architecture convolutional neural network cnn1 = CNN1() # train the variant 1 architecture convolutional neural network cnn1_performance = cnn1.train( train_data_loader, learning_rate=0.01, n_epochs=20) # test the variant 1 architecture convolutional neural network cnn1.test(test_data_loader) # instantiate the variant 2 architecture convolutional neural network cnn2 = CNN2() # train the variant 2 architecture convolutional neural network cnn2_performance = cnn2.train( train_data_loader, learning_rate=0.01, n_epochs=20) # test the variant 2 architecture convolutional neural network cnn2.test(test_data_loader) # plot the test results plt.plot(range(20), mlp_performance, color='black', label='MLP') plt.plot(range(20), base_cnn_performance, color='red', label='Base CNN') plt.plot(range(20), cnn1_performance, color='green', label='CNN 1') plt.plot(range(20), cnn2_performance, color='blue', label='CNN 2') plt.title('Neural Network Image Classification Accuracy') plt.xlabel('epochs') plt.ylabel('accuracy') plt.legend() plt.show()