Esempio n. 1
0
    def test(self,
             test_data,
             batch_size,
             eval_params=None,
             print_batches=False):
        epoch_loss = 0
        batches_eval = batcher.batch_iter(test_data,
                                          batch_size,
                                          1,
                                          shuffle=False)
        eval_batch_counter = 1

        for batch_eval in batches_eval:
            feed_dict_eval, golds_batch_eval = self.feed_dict_function(
                self.model, batch_eval, None, predict=True)
            preds_batch_eval = self.predict(feed_dict_eval)
            batch_eval_loss = self.model.loss.eval(session=self.session,
                                                   feed_dict=feed_dict_eval)
            epoch_loss += batch_eval_loss

            if eval_batch_counter == 1:
                golds = golds_batch_eval
                preds = preds_batch_eval
            else:
                golds = np.concatenate((golds, golds_batch_eval), axis=0)
                preds = np.concatenate((preds, preds_batch_eval), axis=0)
            if print_batches:
                print(eval_batch_counter)
            eval_batch_counter += 1

        if self.eval_func is not None:
            score = self.eval_func(golds, preds, eval_params)
            return preds, epoch_loss, score
        else:
            return preds, epoch_loss
Esempio n. 2
0
    def train(self,
              train_data,
              batch_size,
              max_num_epochs,
              num_epochs_not_better_end=5,
              epoch_diff_smaller_end=1e-5,
              print_batch_losses=True,
              configuration=None,
              eval_params=None,
              shuffle_data=True):
        batch_counter = 0
        epoch_counter = 0
        epoch_losses = []
        epoch_loss = 0
        batches_in_epoch = int(len(train_data) / batch_size) + 1

        batches = batcher.batch_iter(train_data,
                                     batch_size,
                                     max_num_epochs,
                                     shuffle=shuffle_data)
        for batch in batches:
            batch_counter += 1

            feed_dict, gold_labels = self.feed_dict_function(
                self.model, batch, config=configuration)
            if print_batch_losses:
                print("Batch " + str(batch_counter) +
                      ": running single iteration training...")
            self.train_model_single_iteration(feed_dict)

            batch_loss = self.model.loss.eval(session=self.session,
                                              feed_dict=feed_dict)
            if print_batch_losses:
                print("Batch " + str(batch_counter) + ": " + str(batch_loss))

            if batch_counter % batches_in_epoch == 0:
                epoch_counter += 1
                print("Evaluating the epoch loss for epoch " +
                      str(epoch_counter))

                preds, epoch_loss = self.test(train_data, batch_size,
                                              eval_params, False)

                print("Epoch " + str(epoch_counter) + ": " + str(epoch_loss))
                print("Previous epochs: " + str(epoch_losses))

                if len(epoch_losses) == num_epochs_not_better_end and (
                        epoch_losses[0] - epoch_loss < epoch_diff_smaller_end):
                    break
                else:
                    epoch_losses.append(epoch_loss)
                    epoch_loss = 0
                    if len(epoch_losses) > num_epochs_not_better_end:
                        epoch_losses.pop(0)
Esempio n. 3
0
    def test(self,
             test_data,
             batch_size,
             eval_params=None,
             print_batches=False,
             batch_size_irrelevant=True,
             compute_loss=False):
        if compute_loss:
            epoch_loss = 0
        batches_eval = batcher.batch_iter(test_data,
                                          batch_size,
                                          1,
                                          shuffle=False)
        eval_batch_counter = 1

        for batch_eval in batches_eval:
            if (batch_size_irrelevant or len(batch_eval) == batch_size):
                feed_dict_eval, golds_batch_eval = self.feed_dict_function(
                    self.model, batch_eval, None, predict=True)
                preds_batch_eval = self.predict(feed_dict_eval)
                if compute_loss:
                    batch_eval_loss = self.model.loss.eval(
                        session=self.session, feed_dict=feed_dict_eval)
                    epoch_loss += batch_eval_loss

                if eval_batch_counter == 1:
                    golds = golds_batch_eval
                    preds = preds_batch_eval
                else:
                    golds = np.concatenate((golds, golds_batch_eval), axis=0)
                    preds = np.concatenate((preds, preds_batch_eval), axis=0)
                if print_batches:
                    print("Eval batch counter: " + str(eval_batch_counter),
                          flush=True)
            eval_batch_counter += 1

        if self.eval_func is not None:
            score = self.eval_func(golds, preds, eval_params)
            if compute_loss:
                return preds, score, epoch_loss
            else:
                return preds, score
        else:
            if compute_loss:
                return preds, epoch_loss
            else:
                return preds
Esempio n. 4
0
    def train_and_test(self,
                       session,
                       class_labels,
                       x_train,
                       y_train,
                       x_test,
                       y_test,
                       batch_size,
                       num_epochs,
                       cl_perf=None,
                       overall_perf=True,
                       num_epochs_not_better_end=10,
                       manual_features=False):
        batch_counter = 0
        epoch_loss = 0
        epoch_counter = 0
        last_epoch_results = []
        best_f = 0
        best_epoch = 0
        best_conf_mat = None
        best_predictions = []

        num_batches_per_epoch = int(
            (len(x_train) if not manual_features else len(x_train[0])) /
            batch_size) + 1

        batches = batcher.batch_iter(
            list(zip(x_train, y_train)), batch_size,
            num_epochs) if not manual_features else batcher.batch_iter(
                list(zip(x_train[0], x_train[1], y_train)), batch_size,
                num_epochs)
        for batch in batches:
            if manual_features:
                x_b, x_b_man, y_b = zip(*batch)
                batch_loss = self.classifier.train(session,
                                                   x_b,
                                                   y_b,
                                                   man_feats=x_b_man)
            else:
                x_b, y_b = zip(*batch)
                x_b = np.array(x_b)
                y_b = np.array(y_b)
                batch_loss = self.classifier.train(session, x_b, y_b)
            epoch_loss += batch_loss

            batch_counter += 1

            #if batch_counter % 50 == 0:
            #print("Batch " + str(batch_counter) + " loss: " + str(batch_loss))
            # evaluating current model's performance on test
            #preds, gold = self.classifier.predict(session, x_test, y_test)
            #self.evaluate_performance(class_labels, preds, gold, cl_perf, overall_perf, " (test set) ")

            if batch_counter % num_batches_per_epoch == 0:
                epoch_counter += 1
                print("Epoch " + str(epoch_counter) + " loss: " +
                      str(epoch_loss))
                last_epoch_results.append(epoch_loss)
                epoch_loss = 0

                if manual_features:
                    x_test_text = x_test[0]
                    x_test_manual = x_test[1]
                    preds, gold = self.classifier.predict(
                        session, x_test_text, y_test, man_feats=x_test_manual)

                else:
                    preds, gold = self.classifier.predict(
                        session, x_test, y_test)

                cm = self.evaluate_performance(class_labels, preds, gold,
                                               cl_perf, overall_perf,
                                               " (test set) ")

                fepoch = cm.accuracy  # cm.get_class_performance("1")[2]
                if fepoch > best_f:
                    best_f = fepoch
                    best_epoch = epoch_counter
                    best_conf_mat = cm
                    best_predictions = preds

                if len(last_epoch_results) > num_epochs_not_better_end:
                    last_epoch_results.pop(0)
                print("Last epochs: " + str(last_epoch_results))

                if len(last_epoch_results
                       ) == num_epochs_not_better_end and last_epoch_results[
                           0] < last_epoch_results[-1]:
                    print("End condition satisfied, training finished. ")
                    break

        #preds, gold = self.classifier.predict(session, x_train, y_train)
        #self.evaluate_performance(class_labels, preds, gold, cl_perf, overall_perf, " (train set) ")

        #preds, gold = self.classifier.predict(session, x_test, y_test)
        #conf_mat = self.evaluate_performance(class_labels, preds, gold, cl_perf, overall_perf, " (test set) ")
        #return conf_mat
        return best_conf_mat, best_epoch, best_predictions
Esempio n. 5
0
    def train_dev(self,
                  train_data,
                  dev_data,
                  batch_size,
                  max_num_epochs,
                  num_devs_not_better_end=5,
                  batch_dev_perf=100,
                  print_batch_losses=True,
                  dev_score_maximize=True,
                  configuration=None,
                  print_training=False,
                  shuffle_data=True):
        batch_counter = 0
        epoch_counter = 0
        epoch_losses = []
        dev_performances = []
        dev_losses = []
        epoch_loss = 0

        best_model = None
        best_performance = -1
        best_preds_dev = None
        batches_in_epoch = int(len(train_data) / batch_size) + 1

        batches = batcher.batch_iter(train_data,
                                     batch_size,
                                     max_num_epochs,
                                     shuffle=shuffle_data)
        for batch in batches:
            batch_counter += 1

            if (len(batch) == batch_size):
                feed_dict, gold_labels = self.feed_dict_function(
                    self.model, batch, configuration)
                self.train_model_single_iteration(feed_dict)

                batch_loss = self.model.pure_loss.eval(session=self.session,
                                                       feed_dict=feed_dict)
                #batch_dist_loss = self.model.distance_loss.eval(session = self.session, feed_dict = feed_dict)
                epoch_loss += batch_loss

                if print_training and print_batch_losses:
                    print("Batch loss" + str(batch_counter) + ": " +
                          str(batch_loss))
                    #print("Batch distance loss" + str(batch_counter) + ": " + str(batch_dist_loss))

            if batch_counter % batches_in_epoch == 0:
                epoch_counter += 1
                if print_training:
                    print("\nEpoch " + str(epoch_counter) + ": " +
                          str(epoch_loss))
                    print("Previous epochs: " + str(epoch_losses) + "\n")
                epoch_losses.append(epoch_loss)
                epoch_loss = 0
                if len(epoch_losses) > num_devs_not_better_end:
                    epoch_losses.pop(0)

            if batch_counter % batch_dev_perf == 0:
                if print_training:
                    print("\n### Evaluation of development set, after batch " +
                          str(batch_counter) + " ###")
                batches_dev = batcher.batch_iter(dev_data,
                                                 batch_size,
                                                 1,
                                                 shuffle=False)
                dev_batch_counter = 1
                dev_loss = 0
                for batch_dev in batches_dev:
                    if (len(batch_dev) == batch_size):
                        feed_dict_dev, golds_batch_dev = self.feed_dict_function(
                            self.model, batch_dev, configuration, predict=True)
                        dev_batch_loss = self.model.pure_loss.eval(
                            session=self.session, feed_dict=feed_dict_dev)
                        dev_loss += dev_batch_loss
                        if print_training and print_batch_losses:
                            print("Dev batch: " + str(dev_batch_counter) +
                                  ": " + str(dev_batch_loss))
                        preds_batch_dev = self.predict(feed_dict_dev)
                        if dev_batch_counter == 1:
                            golds = golds_batch_dev
                            preds = preds_batch_dev
                        else:
                            golds = np.concatenate((golds, golds_batch_dev),
                                                   axis=0)
                            preds = np.concatenate((preds, preds_batch_dev),
                                                   axis=0)
                    dev_batch_counter += 1
                print("Development pure loss: " + str(dev_loss))
                score = self.eval_func(golds, preds, self.labels)
                if self.additional_results_function:
                    self.additional_results_function(self.model, self.session)
                if print_training:
                    print("Peformance: " + str(score) + "\n")
                    print("Previous performances: " + str(dev_performances))
                    print("\nLoss: " + str(dev_loss) + "\n")
                    print("Previous losses: " + str(dev_losses))
                if score > best_performance:
                    best_model = self.model.get_model(self.session)
                    best_preds_dev = preds
                    best_performance = score

                #if len(dev_performances) == num_devs_not_better_end and ((dev_score_maximize and dev_performances[0] >= score) or (not dev_score_maximize and dev_performances[0] <= score)):
                if len(dev_losses) == num_devs_not_better_end and dev_losses[
                        0] < dev_loss:
                    break
                else:
                    dev_performances.append(score)
                    dev_losses.append(dev_loss)
                    if len(dev_performances) > num_devs_not_better_end:
                        dev_performances.pop(0)
                        dev_losses.pop(0)
        return (best_model, best_performance, best_preds_dev, golds)
Esempio n. 6
0
	def train_dev(self, train_data, dev_data, batch_size, max_num_epochs, num_devs_not_better_end = 5, batch_dev_perf = 100, print_batch_losses = True, dev_score_maximize = True, configuration = None, print_training = False, shuffle_data = True, addit_eval_num_batches = None):
		batch_counter = 0
		epoch_counter = 0
		epoch_losses = []
		dev_performances = []
		dev_losses = []
		epoch_loss = 0
		
		best_model = None
		best_performance = -1
		best_dev_loss = float("inf")	
		best_preds_dev = None	
		batches_in_epoch = int(len(train_data)/batch_size) + 1

		print("Am I shuffling? " + str(shuffle_data))
		batches = batcher.batch_iter(train_data, batch_size, max_num_epochs, shuffle = shuffle_data)
		for batch in batches:
			batch_counter += 1

			if (len(batch) == batch_size):
				feed_dict, gold_labels = self.feed_dict_function(self.model, batch, configuration)
				self.train_model_single_iteration(feed_dict)
			
				batch_loss = self.model.pure_loss.eval(session = self.session, feed_dict = feed_dict)
				epoch_loss += batch_loss

				if print_training and print_batch_losses:
					print("Batch " + str(batch_counter) + ": " + str(batch_loss))
			else: 
				raise ValueError("Train batch not of expected size!")

			if batch_counter % batches_in_epoch == 0:
				epoch_counter += 1
				if print_training: 
					print("\nEpoch " + str(epoch_counter) + ": " + str(epoch_loss))
					print("Previous epochs: " + str(epoch_losses) + "\n")
				epoch_losses.append(epoch_loss)
				epoch_loss = 0
				if len(epoch_losses) > num_devs_not_better_end:
					epoch_losses.pop(0)

			if self.additional_results_function and (batch_counter % addit_eval_num_batches == 0):
					self.additional_results_function(self.model, self.session)
				
			if batch_counter % batch_dev_perf == 0:
				if print_training:
					print("\n### Evaluation of development set, after batch " + str(batch_counter) + " ###")
				batches_dev = batcher.batch_iter(dev_data, batch_size, 1, shuffle = False)
				dev_batch_counter = 1
				dev_loss = 0
				for batch_dev in batches_dev:
					if (len(batch_dev) == batch_size):
						feed_dict_dev, golds_batch_dev = self.feed_dict_function(self.model, batch_dev, configuration, predict = True)	
						dev_batch_loss = self.model.pure_loss.eval(session = self.session, feed_dict = feed_dict_dev)
						dev_loss += dev_batch_loss
						print("Dev batch: " + str(dev_batch_counter) + ": " + str(dev_batch_loss))
						if self.eval_func:
							preds_batch_dev = self.predict(feed_dict_dev) 
							if dev_batch_counter == 1:
								golds = golds_batch_dev
								preds = preds_batch_dev
							else:
								golds = np.concatenate((golds, golds_batch_dev), axis = 0)
								preds = np.concatenate((preds, preds_batch_dev), axis = 0)
					#else: 
					#	raise ValueError("Dev batch not of expected size!")
					dev_batch_counter += 1

				print("Development pure loss: " + str(dev_loss))
				if self.eval_func: 
					score = self.eval_func(golds, preds, self.labels)
				if print_training:
					if self.eval_func:
						print("Peformance: " + str(score) + "\n")
						print("Previous performances: " + str(dev_performances))
					print("\nDev Loss: " + str(dev_loss) + "\n")
					print("Previous losses: " + str(dev_losses))
				if self.eval_func:
					if score > best_performance:
						best_model = self.model.get_model(self.session)
						best_performance = score
						
					if len(dev_performances) == num_devs_not_better_end and ((dev_score_maximize and dev_performances[0] >= score) or (not dev_score_maximize and dev_performances[0] <= score)):
						break
					else: 
						dev_performances.append(score)
						if len(dev_performances) > num_devs_not_better_end:
							dev_performances.pop(0)
				else:
					if dev_loss < best_dev_loss:
						print("Getting the model hyperparameters and variables...")
						best_model = self.model.get_model(self.session)
						#self.print_objects(best_model)							
						best_dev_loss = dev_loss
						if self.model_serialization_path:
							print("Serializing new best model...")
							io_helper.serialize(best_model, self.model_serialization_path)
							print("Serialization done!")
		
					if len(dev_losses) == num_devs_not_better_end and dev_losses[0] <= dev_loss:
						break
					else:	
						dev_losses.append(dev_loss)
						if len(dev_losses) > num_devs_not_better_end:
							dev_losses.pop(0) 			
		return (best_model, best_performance, best_preds_dev, golds)
Esempio n. 7
0
    def train(self,
              train_data,
              batch_size,
              max_num_epochs,
              num_epochs_not_better_end=5,
              epoch_diff_smaller_end=1e-5,
              print_batch_losses=True,
              configuration=None,
              eval_params=None,
              shuffle_data=True,
              batch_size_irrelevant=True):
        batch_counter = 0
        epoch_counter = 0
        epoch_losses = []
        epoch_loss = 0
        batches_in_epoch = int(len(train_data) / batch_size) + 1

        batches = batcher.batch_iter(train_data,
                                     batch_size,
                                     max_num_epochs,
                                     shuffle=shuffle_data)
        for batch in batches:
            batch_counter += 1

            if (batch_size_irrelevant or len(batch) == batch_size):
                feed_dict, gold_labels = self.feed_dict_function(
                    self.model, batch, config=configuration, predict=False)
                self.train_model_single_iteration(feed_dict)
                batch_loss = self.model.loss.eval(session=self.session,
                                                  feed_dict=feed_dict)
                if print_batch_losses:
                    print("Batch " + str(batch_counter) + ": " +
                          str(batch_loss),
                          flush=True)

            if batch_counter % batches_in_epoch == 0:
                epoch_counter += 1
                print("Evaluating the epoch loss for epoch " +
                      str(epoch_counter),
                      flush=True)

                if self.eval_func:
                    preds, score, epoch_loss = self.test(
                        train_data,
                        batch_size,
                        eval_params,
                        False,
                        batch_size_irrelevant=batch_size_irrelevant,
                        compute_loss=True)
                else:
                    preds, epoch_loss = self.test(
                        train_data,
                        batch_size,
                        None,
                        False,
                        batch_size_irrelevant=batch_size_irrelevant,
                        compute_loss=True)

                print("Epoch " + str(epoch_counter) + ": " + str(epoch_loss),
                      flush=True)
                if self.eval_func:
                    print("Epoch (train) performance: " + str(score),
                          flush=True)
                print("Previous epochs: " + str(epoch_losses), flush=True)

                if len(epoch_losses) == num_epochs_not_better_end and (
                        epoch_losses[0] - epoch_loss < epoch_diff_smaller_end):
                    break
                else:
                    epoch_losses.append(epoch_loss)
                    epoch_loss = 0
                    if len(epoch_losses) > num_epochs_not_better_end:
                        epoch_losses.pop(0)
Esempio n. 8
0
	def train_dev(self, train_data, dev_data, batch_size, max_num_epochs, num_devs_not_better_end = 5, batch_dev_perf = 100, print_batch_losses = True, dev_score_maximize = True, configuration = None, print_training = False, shuffle_data = True):
		batch_counter = 0
		epoch_counter = 0
		epoch_losses = []
		dev_losses = []
		epoch_loss = 0
		
		best_model = None
		best_performance = 1000000.00
		batches_in_epoch = int(len(train_data)/batch_size) + 1

		batches = batcher.batch_iter(train_data, batch_size, max_num_epochs, shuffle = shuffle_data)
		for batch in batches:
			batch_counter += 1

			if (len(batch) == batch_size):
				feed_dict, gold_labels = self.feed_dict_function(self.model, batch, configuration)
				self.train_model_single_iteration(feed_dict)
				
				batch_loss = self.model.pure_loss.eval(session = self.session, feed_dict = feed_dict)
				batch_dist_loss = self.model.distance_loss.eval(session = self.session, feed_dict = feed_dict) 
				epoch_loss += batch_loss

				if print_training and print_batch_losses:
					print("Batch pure loss " + str(batch_counter) + ": " + str(batch_loss))
					print("Batch distance loss " + str(batch_counter) + ": " + str(batch_dist_loss))

			if batch_counter % batches_in_epoch == 0:
				epoch_counter += 1
				if print_training: 
					print("\nEpoch " + str(epoch_counter) + ": " + str(epoch_loss))
					print("Previous epochs: " + str(epoch_losses) + "\n")
				epoch_losses.append(epoch_loss)
				epoch_loss = 0
				if len(epoch_losses) > num_devs_not_better_end:
					epoch_losses.pop(0)
		
			if batch_counter % batch_dev_perf == 0:
				if print_training:
					print("\n### Evaluation of development set, after batch " + str(batch_counter) + " ###")
				batches_dev = batcher.batch_iter(dev_data, batch_size, 1, shuffle = False)
				dev_batch_counter = 1
				dev_loss_pure = 0
				dev_loss_distance = 0
				for batch_dev in batches_dev:
					if (len(batch_dev) == batch_size):
						feed_dict_dev, golds_batch_dev = self.feed_dict_function(self.model, batch_dev, configuration, predict = True)	
						dev_batch_pure_loss = self.model.pure_loss.eval(session = self.session, feed_dict = feed_dict_dev)
						dev_batch_distance_loss = self.model.distance_loss.eval(session = self.session, feed_dict = feed_dict_dev)

						if print_training and print_batch_losses:
							print("Dev batch: " + str(dev_batch_counter) + ": ")
							print("Pure loss: " + str(dev_batch_pure_loss))
							print("Distance loss: " + str(dev_batch_distance_loss))

						dev_loss_pure += dev_batch_pure_loss
						dev_loss_distance += dev_batch_distance_loss

					dev_batch_counter += 1
				dev_loss = dev_loss_distance + dev_loss_pure
				if print_training:
					print("Development loss pure: " + str(dev_loss_pure))
					print("Development loss distance: " + str(dev_loss_distance))
					print("Development loss TOTAL: " + str(dev_loss_distance + dev_loss_pure))
					print("Previous losses: " + str(dev_losses))

				if dev_loss < best_performance:
					best_performance = dev_loss
					if self.model_serialization_path:
						self.model.save_model(self.session, self.model_serialization_path)
			
				if len(dev_losses) == num_devs_not_better_end and dev_losses[0] <= dev_loss:
					break
				else: 
					dev_losses.append(dev_loss)
					if len(dev_losses) > num_devs_not_better_end:
						dev_losses.pop(0) 			

		return (best_model, best_performance)