def __init__(self): bank_full = pd.read_csv('data/bank_full_w_dummy_vars.csv') X = bank_full.ix[:,(18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36)].values y = bank_full.ix[:,17].values LogReg = LogisticRegression() LogReg.fit(X, y) self.model = LogReg
def test_logreg(): X_train, Y_train, X_test, Y_test = import_census(CENSUS_FILE_PATH) num_features = X_train.shape[1] # Add a bias X_train_b = np.append(X_train, np.ones((len(X_train), 1)), axis=1) X_test_b = np.append(X_test, np.ones((len(X_test), 1)), axis=1) # my_x=np.array([[3,4],[5,6],[7,8],[9,10],[22,22],[12,23]]) # my_y=np.array([0,1,2,0,2,1]).reshape(6,) # #print(my_y) # my_x = np.append(my_x, np.ones((len(my_x), 1)), axis=1) # test_model = LogisticRegression(2, 3, 2, CONV_THRESHOLD) # #print(test_model.predict(my_x)) # test_model.train(my_x, my_y) ## Logistic Regression ### #print(X_train_b.shape) #print(Y_train.shape) model = LogisticRegression(num_features, NUM_CLASSES, BATCH_SIZE, CONV_THRESHOLD) #print(model.loss(X_train_b, Y_train)) num_epochs = model.train(X_train_b, Y_train) acc = model.accuracy(X_test_b, Y_test) * 100 print("Test Accuracy: {:.1f}%".format(acc)) print("Number of Epochs: " + str(num_epochs)) acc = 0 return acc
def lg_k_folds(X_train, y_train, lr, b, epochs, lamda, bias, k=5, verbose=False): results = { 'accuracy': [], 'recall': [], 'precision': [] } metric_means = {} accuracy = Accuracy() recall = Recall() precision = Precision() chunk_size = int(len(X_train) / k) logistic_regression = LogisticRegression(bias) for i in range(0, len(X_train), chunk_size): end = i + chunk_size if i + chunk_size <= len(X_train) else len(X_train) new_X_valid = X_train[i: end] new_y_valid = y_train[i: end] new_X_train = np.concatenate([X_train[: i], X_train[end:]]) new_y_train = np.concatenate([y_train[: i], y_train[end:]]) logistic_regression.fit(new_X_train, new_y_train, lr, b, epochs, lamda, verbose=verbose) predictions = logistic_regression.predict(new_X_valid) results['accuracy'].append(accuracy(new_y_valid, predictions)) results['recall'].append(recall(new_y_valid, predictions)) results['precision'].append(precision(new_y_valid, predictions)) metric_means['accuracy'] = np.mean(results['accuracy']) metric_means['recall'] = np.mean(results['recall']) metric_means['precision'] = np.mean(results['precision']) return metric_means
class LogisticRegressionExperiment(object): def __init__(self): self._data_set = get_pick_data("LogisticRegression") self._num_features = self._data_set.dynamic_features.shape[1] self._time_steps = 1 self._n_output = 1 self._model_format() self._check_path() def _model_format(self): learning_rate, max_loss, max_pace, ridge, batch_size, hidden_size, epoch, dropout = lr_setup.all self._model = LogisticRegression( num_features=self._num_features, time_steps=self._time_steps, n_output=self._n_output, batch_size=batch_size, epochs=epoch, output_n_epoch=ExperimentSetup.output_n_epochs, learning_rate=learning_rate, max_loss=max_loss, dropout=dropout, max_pace=max_pace, ridge=ridge) def _check_path(self): if not os.path.exists("result_9_16_0"): os.makedirs("result_9_16_0") self._filename = "result_9_16_0" + "/" + self._model.name + " " + \ time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) def do_experiments(self): n_output = 1 dynamic_features = self._data_set.dynamic_features labels = self._data_set.labels # tol_test_index = np.zeros(shape=0, dtype=np.int32) tol_pred = np.zeros(shape=(0, n_output)) tol_label = np.zeros(shape=(0, n_output), dtype=np.int32) train_dynamic_features, test_dynamic_features, train_labels, test_labels = \ split_logistic_data(dynamic_features,labels) for i in range(5): train_dynamic_res, train_labels_res = imbalance_preprocess( train_dynamic_features[i], train_labels[i], 'LogisticRegression') train_set = DataSet(train_dynamic_res, train_labels_res) test_set = DataSet(test_dynamic_features[i].reshape(-1, 92), test_labels[i].reshape(-1, 1)) self._model.fit(train_set, test_set) y_score = self._model.predict(test_set) tol_pred = np.vstack((tol_pred, y_score)) tol_label = np.vstack((tol_label, test_labels[i])) print("Cross validation: {} of {}".format(i, 5), time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) tol_test_index = np.arange(labels.shape[0] * labels.shape[1]) evaluate(tol_test_index, tol_label, tol_pred, self._filename) self._model.close()
def train_logistic_regression(self, distribution, fan_in, fan_out, learning_rate=0.013): ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model.') X = tt.fmatrix('X') # data, presented as rasterized images y = tt.fmatrix('y') # labels, presented as 1-hot matrix of labels # construct the logistic regression class # Each MNIST image has size 28*28 classifier = LogisticRegression(X, distribution, fan_in, fan_out) # the cost we minimize during training cost = classifier.neg_log_like(y) # compute the gradient of cost with respect to theta = (W,b) grads = [tt.grad(cost=cost, wrt=param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs. updates = [(param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, grads)] # compiling a Theano function `train_model` that returns the cost, # in the same time updates the parameter based on the rules # defined in `updates` train = theano.function(inputs=[X, y], outputs=cost, updates=updates, allow_input_downcast=True) # function that computes the mistakes that are made by # the model on a minibatch test = theano.function(inputs=[X, y], outputs=classifier.errors(y), allow_input_downcast=True) validate = theano.function(inputs=[X, y], outputs=classifier.errors(y), allow_input_downcast=True) predict = theano.function(inputs=[X], outputs=classifier.y_pred, allow_input_downcast=True) self.early_stopping(classifier, train, test, validate, predict, learning_rate)
def _model_format(self): learning_rate, max_loss, max_pace, ridge, batch_size, hidden_size, epoch, dropout = lr_setup.all self._model = LogisticRegression( num_features=self._num_features, time_steps=self._time_steps, n_output=self._n_output, batch_size=batch_size, epochs=epoch, output_n_epoch=ExperimentSetup.output_n_epochs, learning_rate=learning_rate, max_loss=max_loss, dropout=dropout, max_pace=max_pace, ridge=ridge)
def main(): Dataset = namedtuple('Dataset', ['inputs', 'labels']) # Reading in data. You do not need to touch this. with open("data/train-images-idx3-ubyte.gz", 'rb') as f1, open("data/train-labels-idx1-ubyte.gz", 'rb') as f2: buf1 = gzip.GzipFile(fileobj=f1).read(16 + 60000 * 28 * 28) buf2 = gzip.GzipFile(fileobj=f2).read(8 + 60000) inputs = np.frombuffer(buf1, dtype='uint8', offset=16).reshape(60000, 28 * 28) inputs = np.where(inputs > 99, 1, 0) labels = np.frombuffer(buf2, dtype='uint8', offset=8) data_train = Dataset(inputs, labels) with open("data/t10k-images-idx3-ubyte.gz", 'rb') as f1, open("data/t10k-labels-idx1-ubyte.gz", 'rb') as f2: buf1 = gzip.GzipFile(fileobj=f1).read(16 + 10000 * 28 * 28) buf2 = gzip.GzipFile(fileobj=f2).read(8 + 10000) inputs = np.frombuffer(buf1, dtype='uint8', offset=16).reshape(10000, 28 * 28) inputs = np.where(inputs > 99, 1, 0) labels = np.frombuffer(buf2, dtype='uint8', offset=8) data_test = Dataset(inputs, labels) # run naive bayes model = NaiveBayes(10) model.train(data_train) print("{:.1f}%".format(model.accuracy(data_test) * 100)) # run logistic regression model = LogisticRegression(784, 10) model.train(data_train) print("{:.1f}%".format(model.accuracy(data_test) * 100))
def test_LogisticRegression(dim): model_name = "LogisticRegression" x, y = make_classification(n_samples=1000, n_features=dim) model = LogisticRegression(dim) check_model(model, model_name, x, y, category="binary_classification")
def train(args): """ This function trains the models :param args: the command line arguments defining the desired actions """ # load data train_data_all, dev_data_all, _ = load(args.data_dir, cachedir=args.cachedir, override_cache=args.override_cache, text_only=(args.model.lower() in ["bi-lstm", "bert"]), include_tfidf=args.include_tfidf, balanced=args.balanced) train_data, train_labels = train_data_all.X, train_data_all.y dev_data, dev_labels = dev_data_all.X, dev_data_all.y # Build model apx = get_appendix(args.include_tfidf, args.balanced) if args.model.lower() == "simple-ff": model = FeedForward(args.ff_hunits, train_data.shape[1]) train_pytorch(args, model, train_data, train_labels, dev_data, dev_labels, save_model_path=f"models/simple-ff{apx}.torch") elif args.model.lower() == "bi-lstm": model = BiLSTM(epochs=args.num_epochs, batch_size=args.batch_size, max_seq_len=args.max_seq_len) model.train(train_data, train_labels, dev_data, dev_labels) elif args.model.lower() == "logreg": model = LogisticRegression() model.train(train_data, train_labels, dev_data, dev_labels, save_model_path=f"models/logreg{apx}.pkl") elif args.model.lower() == "majority-vote": model = MajorityVote() model.train(train_labels, dev_labels) elif args.model.lower() == "bert": model = Bert(epochs=args.num_epochs, batch_size=args.batch_size, max_seq_len=args.max_seq_len, learning_rate=args.learning_rate) model.train(train_data, train_labels, dev_data, dev_labels, save_model_path=f"models/bert.pkl") elif args.model.lower() == "svm": model = SVM() model.train(train_data, train_labels, save_model_path=f"models/svm{apx}.sav") else: raise Exception("Unknown model type passed in!")
def classifier_log_reg(features, config, train_mode, **kwargs): if train_mode: features_train, features_valid = features if config.random_search.log_reg.n_runs: transformer = RandomSearchOptimizer( LogisticRegression, config.log_reg, train_input_keys=[], valid_input_keys=['X_valid', 'y_valid'], score_func=roc_auc_score, maximize=True, n_runs=config.random_search.log_reg.n_runs, callbacks=[ NeptuneMonitor(**config.random_search.log_reg.callbacks. neptune_monitor), SaveResults( **config.random_search.log_reg.callbacks.save_results), ]) else: transformer = LogisticRegression(**config.log_reg) log_reg = Step(name='log_reg', transformer=transformer, input_data=['input'], input_steps=[features_train, features_valid], adapter={ 'X': ([(features_train.name, 'X')]), 'y': ([('input', 'y')], to_numpy_label_inputs), 'X_valid': ([(features_valid.name, 'X')]), 'y_valid': ([('input', 'y_valid')], to_numpy_label_inputs), }, cache_dirpath=config.env.cache_dirpath, **kwargs) else: log_reg = Step(name='log_reg', transformer=LogisticRegression(**config.log_reg), input_steps=[features], adapter={ 'X': ([(features.name, 'features')]), }, cache_dirpath=config.env.cache_dirpath, **kwargs) return log_reg
def test_logreg(): X_train, Y_train, X_test, Y_test = import_census(CENSUS_FILE_PATH) num_features = X_train.shape[1] # Add a bias X_train_b = np.append(X_train, np.ones((len(X_train), 1)), axis=1) X_test_b = np.append(X_test, np.ones((len(X_test), 1)), axis=1) ### Logistic Regression ### model = LogisticRegression(num_features, NUM_CLASSES, BATCH_SIZE, CONV_THRESHOLD) num_epochs = model.train(X_train_b, Y_train) acc = model.accuracy(X_test_b, Y_test) * 100 print("Test Accuracy: {:.1f}%".format(acc)) print("Number of Epochs: " + str(num_epochs)) return acc
def test_logreg(): X_train, Y_train, X_test, Y_test = import_mnist(MNIST_TRAIN_INPUTS_PATH, MNIST_TRAIN_LABELS_PATH, MNIST_TEST_INPUTS_PATH, MNIST_TEST_LABELS_PATH) num_features = X_train.shape[1] # Add a bias X_train_b = np.append(X_train, np.ones((len(X_train), 1)), axis=1) X_test_b = np.append(X_test, np.ones((len(X_test), 1)), axis=1) ### Logistic Regression ### print('--------- LOGISTIC REGRESSION w/ SGD ---------') model = LogisticRegression(num_features, MNIST_CLASSES) model.train(X_train_b, Y_train) print("Test Accuracy: {:.1f}%".format( model.accuracy(X_test_b, Y_test) * 100))
def _model_format(self): if self._event_type == "qx": learning_rate, max_loss, max_pace, lasso, ridge = lr_qx_setup.all elif self._event_type == "cx": learning_rate, max_loss, max_pace, lasso, ridge = lr_cx_setup.all else: learning_rate, max_loss, max_pace, lasso, ridge = lr_xycj_setup.all self._model = LogisticRegression( num_features=self._num_features, time_steps=self._time_steps, n_output=self._n_output, batch_size=ExperimentSetup.batch_size, epochs=ExperimentSetup.epochs, output_n_epoch=ExperimentSetup.output_n_epochs, learning_rate=learning_rate, max_loss=max_loss, max_pace=max_pace, lasso=lasso, ridge=ridge)
def run_with_model(dataset, args): """ Running a particular choice of model, saves to /results folder. """ cut_data, all_data = cut_dataset(dataset, args.cens_time) if args.model == "xlearner": model = RFXLearner() model.train(cut_data["X"], cut_data["w"], cut_data["y"], cut_data["ipcw"]) pred_rr = model.predict(all_data["X"], all_data["w"], False, True) elif args.model == "cox": model = CoxAIC() model.train(all_data["X"], all_data["w"], all_data["y"], all_data["t"]) pred_rr = model.predict(args.cens_time, all_data["X"]) elif args.model == "survrf": model = SurvRF() model.train(all_data["X"], all_data["w"], all_data["y"], all_data["t"]) pred_rr = model.predict(args.cens_time) elif args.model == "causalforest": model = CausalForest() model.train(cut_data["X"], cut_data["w"], cut_data["y"]) pred_rr = np.r_[model.predict(), model.predict(all_data["X"][all_data["cens"] == 1])] elif args.model == "logreg": model = LogisticRegression() model.train(cut_data["X"], cut_data["w"], cut_data["y"], cut_data["ipcw"]) pred_rr = model.predict(all_data["X"]) elif args.model == "linearxlearner": model = LinearXLearner() model.train(cut_data["X"], cut_data["w"], cut_data["y"], cut_data["ipcw"]) pred_rr = model.predict(all_data["X"], all_data["w"], False) else: raise ValueError("Not a supported model.") return { "pred_rr": pred_rr, "X": all_data["X"], "w": all_data["w"], "y": all_data["y"], "t": all_data["t"], "y_cut": all_data["y_cut"], "cens": all_data["cens"], }
def test(args): """ This function tests our models :param args: the command line arguments with the desired actions """ _, _, test_data_all = load(args.data_dir, cachedir=args.cachedir, override_cache=args.override_cache, text_only=(args.model.lower() in ["bi-lstm", "bert"]), include_tfidf=args.include_tfidf, balanced=args.balanced) test_data, test_labels = test_data_all.X, test_data_all.y apx = get_appendix(args.include_tfidf, args.balanced) if args.model.lower() == "simple-ff": preds = test_pytorch( test_data, test_labels, load_model_path=f"models/simple-ff{apx}.torch", predictions_file=f"preds/simple-ff-preds{apx}.txt") elif args.model.lower() == "bi-lstm": model = BiLSTM(load_model_path="models/bilstm.keras", tokenizer_path='models/bilstm-tokenizer.json') preds = model.test(test_data, y_test=test_labels) elif args.model.lower() == "logreg": model = LogisticRegression(load_model_path=f"models/logreg{apx}.pkl") preds = model.test( test_data, test_labels, save_predictions_path=f"preds/logreg-preds{apx}.txt") elif args.model.lower() == "majority-vote": model = MajorityVote(load_model_path="models/majority-class.txt") preds = model.test(test_labels) elif args.model.lower() == "bert": model = Bert(load_model_path="models/bert.pkl") preds = model.test(test_data, test_labels, save_predictions_path="preds/bert-preds.txt") elif args.model.lower() == "svm": model = SVM(load_model_path=f"models/svm{apx}.sav") preds = model.test(test_data, save_predictions_path=f"preds/svm-preds{apx}.txt") else: raise Exception("Unknown model type passed in!") metrics = classification_report(test_labels, preds, output_dict=True) pprint(metrics) with open(f"scores/{args.model.lower()}{apx}.json", "w") as fout: json.dump(metrics, fout, indent=4)
def run_for_optimism(original_dataset, bootstrap_dataset, args): """ Calculates difference between performance on a bootstrapped dataset (upon which the model is trained) and the original dataset. Optimism is defined as the mean difference over many bootstrap datasets. """ cut_data, all_data = cut_dataset(bootstrap_dataset, args.cens_time) cut_data_orig, all_data_orig = cut_dataset(original_dataset, args.cens_time) if args.model == "cox": model = CoxAIC() model.train(all_data["X"], all_data["w"], all_data["y"], all_data["t"]) pred_rr = model.predict(args.cens_time, all_data["X"]) pred_rr_orig = model.predict(args.cens_time, all_data_orig["X"]) elif args.model == "logreg": model = LogisticRegression() model.train(cut_data["X"], cut_data["w"], cut_data["y"], cut_data["ipcw"]) pred_rr = model.predict(all_data["X"]) pred_rr_orig = model.predict(all_data_orig["X"]) elif args.model == "linearxlearner": model = LinearXLearner() model.train(cut_data["X"], cut_data["w"], cut_data["y"], cut_data["ipcw"]) pred_rr = model.predict(all_data["X"], all_data["w"], False) pred_rr_orig = model.predict(all_data_orig["X"], all_data_orig["w"], False) else: raise ValueError("Not a supported model.") c_stat_bootstrap = c_statistic(pred_rr[all_data["cens"] == 0], cut_data["y"], cut_data["w"]) c_stat_original = c_statistic(pred_rr_orig[all_data_orig["cens"] == 0], cut_data_orig["y"], cut_data_orig["w"]) rmst_bootstrap = decision_value_rmst(pred_rr, all_data["y"], all_data["w"], all_data["t"], args.cens_time) rmst_original = decision_value_rmst(pred_rr_orig, all_data_orig["y"], all_data_orig["w"], all_data_orig["t"], args.cens_time) return { "c_stat_diff": c_stat_bootstrap - c_stat_original, "decision_value_rmst_diff": rmst_bootstrap - rmst_original }
def run_for_optimism(original_dataset, bootstrap_dataset, args): """ Calculate optimism for a particular bootstrap run. """ cut_data, all_data = cut_dataset_at_cens_time(bootstrap_dataset, args.cens_time) cut_data_orig, all_data_orig = cut_dataset_at_cens_time( original_dataset, args.cens_time) if args.model == "cox": model = CoxAIC() model.train(all_data["X"], all_data["w"], all_data["y"], all_data["t"]) pred_rr = model.predict(args.cens_time, all_data["X"]) pred_rr_orig = model.predict(args.cens_time, all_data_orig["X"]) elif args.model == "logreg": model = LogisticRegression() model.train(cut_data["X"], cut_data["w"], cut_data["y"], cut_data["ipcw"]) pred_rr = model.predict(all_data["X"]) pred_rr_orig = model.predict(all_data_orig["X"]) elif args.model == "linearxlearner": model = LinearXLearner() model.train(cut_data["X"], cut_data["w"], cut_data["y"], cut_data["ipcw"]) pred_rr = model.predict(all_data["X"], all_data["w"], False) pred_rr_orig = model.predict(all_data_orig["X"], all_data_orig["w"], False) else: raise ValueError("Not a supported model.") c_stat_bootstrap = c_statistic(pred_rr[all_data["cens"] == 0], cut_data["y"], cut_data["w"]) c_stat_original = c_statistic(pred_rr_orig[all_data_orig["cens"] == 0], cut_data_orig["y"], cut_data_orig["w"]) rmst_bootstrap = decision_value_rmst(pred_rr, all_data["y"], all_data["w"], all_data["t"], args.cens_time) rmst_original = decision_value_rmst(pred_rr_orig, all_data_orig["y"], all_data_orig["w"], all_data_orig["t"], args.cens_time) return { "c_stat_diff": c_stat_bootstrap - c_stat_original, "decision_value_rmst_diff": rmst_bootstrap - rmst_original }
def load_model(json_file): model_data = utils.load_json_data(json_file) if model_data['model_type'] == 'logistic_regression': model = LogisticRegression(model_data) elif model_data['model_type'] == 'decision_tree': model = DecisionTree(model_data) elif model_data['model_type'] == 'scoring': model = ScoringModel(model_data) elif model_data['model_type'] == 'nomogram': model = NomogramModel(model_data) elif model_data['model_type'] == 'NOCOS': model = NOCOS(model_data) else: raise Exception('model type [{0}] not recognised'.format(model_data['model_type'])) logging.info('{0} loaded as a {1} model'.format(json_file, model.model_type)) return model
def train_lr(self, cid): params = { "offline_model_dir": PROJECT_ROOT+"/ltr/weights/lr", } params.update(self.params_common) X_train, X_valid = self.load_data_by_id("train", cid), self.load_data_by_id("vali", cid) model = LogisticRegression("ranking", params, self.logger) model.fit(X_train, validation_data=X_valid) model.save_session()
def _initialize_models(self, data_generator): """Initializes models prior to training.""" models = { "Linear Regression": LinearRegression(), "Logistic Regression": LogisticRegression(), "Quadratic Regression": QuadraticRegression(), "Naive Bayes'": NaiveBayes(std_X=data_generator.std_X, m0=data_generator.m0s, m1=data_generator.m1s), "kNN CV": kNNCV(n_folds=self.n_folds) } return models
def confusion_matrix_plot(outputdir, X_train, Y_train, cv=5): f, ax = plt.subplots(2, 3, figsize=(12, 10)) y_pred = cross_val_predict(svm.SVC(kernel='linear'), X_train, Y_train, cv=cv) sns.heatmap(confusion_matrix(Y_train, y_pred), ax=ax[0, 0], annot=True, fmt='2.0f') ax[0, 0].set_title('Matrix for Linear-SVM') y_pred = cross_val_predict(rfgd.best_estimator_, X_train, Y_train, cv=cv) sns.heatmap(confusion_matrix(Y_train, y_pred), ax=ax[0, 1], annot=True, fmt='2.0f') ax[0, 1].set_title('Matrix for Random-Forests') y_pred = cross_val_predict(LogisticRegression(), X_train, Y_train, cv=cv) sns.heatmap(confusion_matrix(Y_train, y_pred), ax=ax[0, 2], annot=True, fmt='2.0f') ax[0, 2].set_title('Matrix for Logistic Regression') y_pred = cross_val_predict(gd.best_estimator_, X_train, Y_train, cv=cv) sns.heatmap(confusion_matrix(Y_train, y_pred), ax=ax[1, 0], annot=True, fmt='2.0f') ax[1, 0].set_title('Matrix for Ada Boosting') y_pred = cross_val_predict(ensemble_lin_rbf, X_train, Y_train, cv=cv) sns.heatmap(confusion_matrix(Y_train, y_pred), ax=ax[1, 1], annot=True, fmt='2.0f') ax[1, 1].set_title('Matrix for Ensemble-classfier') plt.subplots_adjust(hspace=0.2, wspace=0.2) f.savefig(outputdir + '/confusion_plot.png') # save the figure to file plt.close(f) print('confusion matrix plot is saved to {}'.format(outputdir + '/confusion_plot.png'))
def main(grid): # Get Clean Data X, Y = read_clean_data() # Linear Regression try: LinearRegression(X, Y, grid) except Exception as e: print(e) # Binarize Y Y_binary = BinaryY(Y) # Logistic Regression try: LogisticRegression(X, Y_binary, grid) except Exception as e: print(e) # Decision Tree try: DecisionTree(X, Y_binary, grid) except Exception as e: print(e) # Support Vector Machine try: SVM(X, Y_binary, grid) except Exception as e: print(e) # Random Forest try: RandomForest(X, Y_binary, grid) except Exception as e: print(e) # Bagging Classifier try: Bagging(X, Y_binary, grid) except Exception as e: print(e) # Neural Network try: NeuralNet(X, Y_binary, grid) except Exception as e: print(e)
def get_model(model_type: str, model_config: Dict, w2v: torch.Tensor, vocab_list: List, model_name: str) -> nn.Module: # Instantiate model and configuration train_config = { "num_epochs": 30, "lr_rate": 2e-5, "log_step": 100, "l2norm": False, "l2factor": 3., "lambda": 0.01, } if model_type == "nb": model = NaiveBayes(model_config) elif model_type == "lr": model = LogisticRegression(model_config) train_config["lr_rate"] = 2e-3 elif model_type == "ff": model = feedforwardNN(model_config, w2v) train_config["num_epochs"] = 50 train_config["lr_rate"] = 2e-4 elif model_type == "cnn": model = convolutionalNN(model_config, w2v) train_config["num_epochs"] = 30 train_config["lr_rate"] = 2e-4 train_config["l2norm"] = False elif model_type == "bertff": model = BERTfeedforward(model_config, vocab_list) train_config["num_epochs"] = 30 train_config["lr_rate"] = 1e-5 else: raise ValueError("Model type is not supported.") # Load model if model_name is not "": model = torch.load("./models/"+model_name) return model, train_config
) dataset = getattr( __import__('datasets'), args.dataset.capitalize() + 'Dataset' ) train_dataset = dataset('train', args) val_dataset = dataset('validation', args) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=True ) val_loader = torch.utils.data.DataLoader( val_dataset, batch_size=args.batch_size, shuffle=False ) autoencoder = Autoencoder(args.encoder_layers, args.decoder_layers) classifier = LogisticRegression(args.encoder_layers[-1]) oracle = DL2_Oracle( learning_rate=args.dl2_lr, net=autoencoder, use_cuda=torch.cuda.is_available(), constraint=ConstraintBuilder.build( autoencoder, train_dataset, args.constraint ) ) binary_cross_entropy = nn.BCEWithLogitsLoss( pos_weight=train_dataset.pos_weight('train') if args.balanced else None ) optimizer = torch.optim.Adam( list(autoencoder.parameters()) + list(classifier.parameters()), lr=args.learning_rate, weight_decay=args.weight_decay
def logistic_test(): n_samples = 100 np.random.seed(0) X_train = np.random.normal(size=n_samples) y_train = (X_train > 0).astype(float) X_train[X_train > 0] *= 4 X_train += 0.3 * np.random.normal(size=n_samples) X_train = X_train[:, np.newaxis] X, y = make_classification( n_features=1, n_classes=2, n_redundant=0, n_informative=1, n_clusters_per_class=1, class_sep=0.75, shuffle=True, random_state=0, ) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=0) df_test = pd.DataFrame(data=[X_test.flatten(), y_test]).T df_test.columns = ["X", "y"] lr = LogisticRegression() lr.fit(X_train, y_train) y_pred = lr.predict(X_test) score = [1 if yi == yi_pred else 0 for yi, yi_pred in zip(y_test, y_pred)] print(np.sum(score) / len(score)) # and plot the result plt.figure(1, figsize=(4, 3)) plt.clf() plt.scatter(X_train.ravel(), y_train, color="black", zorder=20) df_test["loss"] = expit(X_test * lr.theta + lr.bias).ravel() df_test = df_test.sort_values("X") plt.plot(df_test["X"], df_test["loss"], color="red", linewidth=3) ols = LinearRegression() ols.fit(X_train, y_train) plt.plot(X_test, ols.theta * X_test + ols.bias, linewidth=1) plt.axhline(0.5, color=".5") plt.ylabel("y") plt.xlabel("X") plt.xticks(range(-5, 10)) plt.yticks([0, 0.5, 1]) plt.ylim(-0.25, 1.25) plt.xlim(-2, 2) plt.legend( ("Logistic Regression Model", "Linear Regression Model"), loc="lower right", fontsize="small", ) plt.tight_layout() plt.show()
def _convolutional_mlp(new_digit, nkerns=[20, 50], batch_size=1): rng = numpy.random.RandomState(23455) # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # Reshape matrix of rasterized images of shape (batch_size, 28 * 28) # to a 4D tensor, compatible with our LeNetConvPoolLayer # (28, 28) is the size of MNIST images. #layer0_input = x.reshape((batch_size, 1, 28, 28)) layer0_input = new_digit.reshape((1, 1, 28, 28)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24) # maxpooling reduces this further to (24/2, 24/2) = (12, 12) # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12) layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape=(batch_size, 1, 28, 28), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2) ) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8) # maxpooling reduces this further to (8/2, 8/2) = (4, 4) # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4) layer1 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2) ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4), # or (500, 50 * 4 * 4) = (500, 800) with the default values. layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer( rng, input=layer2_input, n_in=nkerns[1] * 4 * 4, n_out=500, activation=T.tanh ) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) predict = theano.function(inputs=[], outputs=layer3.y_pred) print 'Loading the model ...' f = file('trained_models/models/convolutional_mlp.mnist.trained.pickle', 'rb') # model_data = {layer0: {W: ..., b: ...}, layer1: ..., layer2: ..., layer3: ...} model_data = cPickle.load(f) f.close() print 'Loaded the model.' layers = [layer0, layer1, layer2, layer3] for i in range(4): layers[i].W.set_value(model_data['layer%s' % i]['W']) layers[i].b.set_value(model_data['layer%s' % i]['b']) print 'Restored model parameters.' print 'Predicting' return predict()[0]
def __init__(self, **kwargs): self.n_in_src = kwargs.pop('nembed_src') self.n_in_trg = kwargs.pop('nembed_trg') self.n_hids_src = kwargs.pop('nhids_src') self.n_hids_trg = kwargs.pop('nhids_trg') self.src_vocab_size = kwargs.pop('src_vocab_size') self.trg_vocab_size = kwargs.pop('trg_vocab_size') self.method = kwargs.pop('method') self.dropout = kwargs.pop('dropout') self.maxout_part = kwargs.pop('maxout_part') self.path = kwargs.pop('saveto') self.clip_c = kwargs.pop('clip_c') self.mkl = kwargs.pop('mkl') self.with_attention = kwargs.pop('with_attention') self.with_coverage = kwargs.pop('with_coverage') self.coverage_dim = kwargs.pop('coverage_dim') self.coverage_type = kwargs.pop('coverage_type') self.max_fertility = kwargs.pop('max_fertility') if self.coverage_type is 'linguistic': # make sure the dimension of linguistic coverage is always 1 self.coverage_dim = 1 self.with_context_gate = kwargs.pop('with_context_gate') self.params = [] self.layers = [] self.table_src = LookupTable(self.src_vocab_size, self.n_in_src, name='table_src') self.layers.append(self.table_src) self.encoder = BidirectionalEncoder(self.n_in_src, self.n_hids_src, self.table_src, self.mkl, name='birnn_encoder') self.layers.append(self.encoder) self.table_trg = LookupTable(self.trg_vocab_size, self.n_in_trg, name='table_trg') self.layers.append(self.table_trg) self.decoder = Decoder(self.mkl, self.n_in_trg, self.n_hids_trg, 2 * self.n_hids_src, with_attention=self.with_attention, with_coverage=self.with_coverage, coverage_dim=self.coverage_dim, coverage_type=self.coverage_type, max_fertility=self.max_fertility, with_context_gate=self.with_context_gate, maxout_part=self.maxout_part, name='rnn_decoder') self.layers.append(self.decoder) self.logistic_layer = LogisticRegression(self.n_in_trg, self.trg_vocab_size) self.layers.append(self.logistic_layer) # for reconstruction self.with_reconstruction = kwargs.pop('with_reconstruction') self.reconstruction_weight = kwargs.pop('reconstruction_weight') if self.with_reconstruction: # note the source and target sides are reversed self.inverse_decoder = InverseDecoder(self.n_in_src, 2 * self.n_hids_src, self.n_hids_trg, with_attention=self.with_attention, maxout_part=self.maxout_part, name='rnn_inverse_decoder') self.layers.append(self.inverse_decoder) self.inverse_logistic_layer = LogisticRegression(self.n_in_src, self.src_vocab_size, name='inverse_LR') self.layers.append(self.inverse_logistic_layer) for layer in self.layers: self.params.extend(layer.params)
transform=transform) batch_numbers = len(train_set) // BATCH_SIZE + 1 train_loader = torch.utils.data.DataLoader(train_set, **kwargs) valid_loader = torch.utils.data.DataLoader(val_set, **kwargs) test_loader = torch.utils.data.DataLoader(test_dataset, **kwargs) " MODEL SETTINGS " if model_name == 'MLP': model = MLP(dropout=True).to(device) elif model_name == 'ResNet': # model = torch.hub.load('pytorch/vision:v0.6.0', 'resnet18', pretrained=True).to(device) model = MnistResNet().to(device) elif model_name == 'logistic': model = LogisticRegression().to(device) if include_temperature_scaling: model = ModelWithTemperature(model, valid_loader) optimizer = torch.optim.Adam(model.parameters(), lr=lr) # 10−3 # resnet18 = models.resnet18() loss_fn = nn.CrossEntropyLoss().to(device) # nll_criterion = nn.CrossEntropyLoss().cuda() mean_train_losses = [] mean_test_losses = [] valid_acc_list = []
batch_size=batch_size, shuffle=False) input_size = train_dataset.get_len_cvec() print('input_size: ', input_size) # set hyper parameter hidden_size = 10000 num_class = 2 learning_rate = 0.0001 num_epoch = 5 # device configuration device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') model = LogisticRegression(input_size, hidden_size, num_class).to(device) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # train model total_step = len(train_loader) for epoch in range(num_epoch): print('epoch start') for i, (data, label) in enumerate(train_loader): label = label.squeeze().to(device) data = data.to(device) outputs = model(data) loss = criterion(outputs, label) optimizer.zero_grad() loss.backward() optimizer.step()
def __init__(self, rng, **kwargs): self.n_in_src = kwargs.pop('nembed_src') self.n_in_trg = kwargs.pop('nembed_trg') self.n_hids_src = kwargs.pop('nhids_src') self.n_hids_trg = kwargs.pop('nhids_trg') self.src_vocab_size = kwargs.pop('src_vocab_size') self.trg_vocab_size = kwargs.pop('trg_vocab_size') self.method = kwargs.pop('method') self.dropout = kwargs.pop('dropout') self.maxout_part = kwargs.pop('maxout_part') self.path = kwargs.pop('saveto') self.clip_c = kwargs.pop('clip_c') self.rng = rng self.trng = RandomStreams(rng.randint(1e5)) # added by Zhaopeng Tu, 2016-06-09 self.with_attention = kwargs.pop('with_attention') # added by Zhaopeng Tu, 2016-04-29 self.with_coverage = kwargs.pop('with_coverage') self.coverage_dim = kwargs.pop('coverage_dim') self.coverage_type = kwargs.pop('coverage_type') self.max_fertility = kwargs.pop('max_fertility') if self.coverage_type is 'linguistic': # make sure the dimension of linguistic coverage is always 1 self.coverage_dim = 1 # added by Zhaopeng Tu, 2016-05-30 self.with_context_gate = kwargs.pop('with_context_gate') self.params = [] self.layers = [] self.table_src = LookupTable(self.rng, self.src_vocab_size, self.n_in_src, name='table_src') self.layers.append(self.table_src) self.encoder = BidirectionalEncoder(self.rng, self.n_in_src, self.n_hids_src, self.table_src, name='birnn_encoder') self.layers.append(self.encoder) # added by Longyue self.encoder_hist_1 = Encoder(self.rng, self.n_in_src, self.n_hids_src, self.table_src, name='rnn_encoder_hist_1') self.layers.append(self.encoder_hist_1) self.encoder_hist_2 = Encoder(self.rng, self.n_hids_src, self.n_hids_src, self.table_src, name='rnn_encoder_hist_2') self.layers.append(self.encoder_hist_2) self.table_trg = LookupTable(self.rng, self.trg_vocab_size, self.n_in_trg, name='table_trg') self.layers.append(self.table_trg) self.decoder = Decoder(self.rng, self.n_in_trg, self.n_hids_trg, 2*self.n_hids_src, self.n_hids_src, \ # added by Zhaopeng Tu, 2016-06-09 with_attention=self.with_attention, \ # added by Zhaopeng Tu, 2016-04-29 with_coverage=self.with_coverage, coverage_dim=self.coverage_dim, coverage_type=self.coverage_type, max_fertility=self.max_fertility, \ # added by Zhaopeng Tu, 2016-05-30 with_context_gate=self.with_context_gate, \ maxout_part=self.maxout_part, name='rnn_decoder') self.layers.append(self.decoder) self.logistic_layer = LogisticRegression(self.rng, self.n_in_trg, self.trg_vocab_size) self.layers.append(self.logistic_layer) # added by Zhaopeng Tu, 2016-07-12 # for reconstruction self.with_reconstruction = kwargs.pop('with_reconstruction') if self.with_reconstruction: # added by Zhaopeng Tu, 2016-07-27 self.reconstruction_weight = kwargs.pop('reconstruction_weight') # note the source and target sides are reversed self.inverse_decoder = InverseDecoder(self.rng, self.n_in_src, 2*self.n_hids_src, self.n_hids_trg, \ # added by Zhaopeng Tu, 2016-06-09 with_attention=self.with_attention, \ maxout_part=self.maxout_part, name='rnn_inverse_decoder') self.layers.append(self.inverse_decoder) self.srng = RandomStreams(rng.randint(1e5)) self.inverse_logistic_layer = LogisticRegression( self.rng, self.n_in_src, self.src_vocab_size, name='inverse_LR') self.layers.append(self.inverse_logistic_layer) for layer in self.layers: self.params.extend(layer.params)
class EncoderDecoder(object): def __init__(self, rng, **kwargs): self.n_in_src = kwargs.pop('nembed_src') self.n_in_trg = kwargs.pop('nembed_trg') self.n_hids_src = kwargs.pop('nhids_src') self.n_hids_trg = kwargs.pop('nhids_trg') self.src_vocab_size = kwargs.pop('src_vocab_size') self.trg_vocab_size = kwargs.pop('trg_vocab_size') self.method = kwargs.pop('method') self.dropout = kwargs.pop('dropout') self.maxout_part = kwargs.pop('maxout_part') self.path = kwargs.pop('saveto') self.clip_c = kwargs.pop('clip_c') self.rng = rng self.trng = RandomStreams(rng.randint(1e5)) # added by Zhaopeng Tu, 2016-06-09 self.with_attention = kwargs.pop('with_attention') # added by Zhaopeng Tu, 2016-04-29 self.with_coverage = kwargs.pop('with_coverage') self.coverage_dim = kwargs.pop('coverage_dim') self.coverage_type = kwargs.pop('coverage_type') self.max_fertility = kwargs.pop('max_fertility') if self.coverage_type is 'linguistic': # make sure the dimension of linguistic coverage is always 1 self.coverage_dim = 1 # added by Zhaopeng Tu, 2016-05-30 self.with_context_gate = kwargs.pop('with_context_gate') self.params = [] self.layers = [] self.table_src = LookupTable(self.rng, self.src_vocab_size, self.n_in_src, name='table_src') self.layers.append(self.table_src) self.encoder = BidirectionalEncoder(self.rng, self.n_in_src, self.n_hids_src, self.table_src, name='birnn_encoder') self.layers.append(self.encoder) # added by Longyue self.encoder_hist_1 = Encoder(self.rng, self.n_in_src, self.n_hids_src, self.table_src, name='rnn_encoder_hist_1') self.layers.append(self.encoder_hist_1) self.encoder_hist_2 = Encoder(self.rng, self.n_hids_src, self.n_hids_src, self.table_src, name='rnn_encoder_hist_2') self.layers.append(self.encoder_hist_2) self.table_trg = LookupTable(self.rng, self.trg_vocab_size, self.n_in_trg, name='table_trg') self.layers.append(self.table_trg) self.decoder = Decoder(self.rng, self.n_in_trg, self.n_hids_trg, 2*self.n_hids_src, self.n_hids_src, \ # added by Zhaopeng Tu, 2016-06-09 with_attention=self.with_attention, \ # added by Zhaopeng Tu, 2016-04-29 with_coverage=self.with_coverage, coverage_dim=self.coverage_dim, coverage_type=self.coverage_type, max_fertility=self.max_fertility, \ # added by Zhaopeng Tu, 2016-05-30 with_context_gate=self.with_context_gate, \ maxout_part=self.maxout_part, name='rnn_decoder') self.layers.append(self.decoder) self.logistic_layer = LogisticRegression(self.rng, self.n_in_trg, self.trg_vocab_size) self.layers.append(self.logistic_layer) # added by Zhaopeng Tu, 2016-07-12 # for reconstruction self.with_reconstruction = kwargs.pop('with_reconstruction') if self.with_reconstruction: # added by Zhaopeng Tu, 2016-07-27 self.reconstruction_weight = kwargs.pop('reconstruction_weight') # note the source and target sides are reversed self.inverse_decoder = InverseDecoder(self.rng, self.n_in_src, 2*self.n_hids_src, self.n_hids_trg, \ # added by Zhaopeng Tu, 2016-06-09 with_attention=self.with_attention, \ maxout_part=self.maxout_part, name='rnn_inverse_decoder') self.layers.append(self.inverse_decoder) self.srng = RandomStreams(rng.randint(1e5)) self.inverse_logistic_layer = LogisticRegression( self.rng, self.n_in_src, self.src_vocab_size, name='inverse_LR') self.layers.append(self.inverse_logistic_layer) for layer in self.layers: self.params.extend(layer.params) def build_trainer(self, src, src_mask, src_hist, src_hist_mask, trg, trg_mask, ite): # added by Longyue # checked by Zhaopeng: sentence dim = n_steps, hist_len, batch_size (4, 3, 25) # hist = (bath_size, sent_num, sent_len) --.T--> # hist = (sent_len, sent_num, bath_size) --lookup table--> # (sent_len, sent_num, bath_size, word_emb) --reshape--> # (sent_len, sent_num*bath_size, word_emb) --word-level rnn--> # (sent_len, sent_num*bath_size, hidden_size) --reshape--> # (sent_len, sent_num, bath_size, hidden_size) --[-1]--> # (sent_num, bath_size, hidden_size) --sent-level rnn--> # (sent_num, bath_size, hidden_size) --[-1]--> # (bath_size, hidden_size) = cross-sent context vector annotations_1 = self.encoder_hist_1.apply_1(src_hist, src_hist_mask) annotations_1 = annotations_1[-1] # get last hidden states annotations_2 = self.encoder_hist_2.apply_2(annotations_1) annotations_3 = annotations_2[-1] # get last hidden states #modified by Longyue annotations = self.encoder.apply(src, src_mask, annotations_3) # init_context = annotations[0, :, -self.n_hids_src:] # modification #1 # mean pooling init_context = (annotations * src_mask[:, :, None]).sum(0) / src_mask.sum(0)[:, None] #added by Longyue init_context = concatenate([init_context, annotations_3], axis=annotations_3.ndim - 1) trg_emb = self.table_trg.apply(trg) trg_emb_shifted = T.zeros_like(trg_emb) trg_emb_shifted = T.set_subtensor(trg_emb_shifted[1:], trg_emb[:-1]) # modified by Longyue hiddens, readout, alignment = self.decoder.run_pipeline( state_below=trg_emb_shifted, mask_below=trg_mask, init_context=init_context, c=annotations, c_mask=src_mask, hist=annotations_3) # apply dropout if self.dropout < 1.0: logger.info('Apply dropout with p = {}'.format(self.dropout)) readout = Dropout(self.trng, readout, 1, self.dropout) p_y_given_x = self.logistic_layer.get_probs(readout) self.cost = self.logistic_layer.cost(p_y_given_x, trg, trg_mask) / trg.shape[1] # self.cost = theano.printing.Print('likilihood cost:')(self.cost) # added by Zhaopeng Tu, 2016-07-12 # for reconstruction if self.with_reconstruction: # now hiddens is the annotations inverse_init_context = (hiddens * trg_mask[:, :, None] ).sum(0) / trg_mask.sum(0)[:, None] src_emb = self.table_src.apply(src) src_emb_shifted = T.zeros_like(src_emb) src_emb_shifted = T.set_subtensor(src_emb_shifted[1:], src_emb[:-1]) inverse_hiddens, inverse_readout, inverse_alignment = self.inverse_decoder.run_pipeline( state_below=src_emb_shifted, mask_below=src_mask, init_context=inverse_init_context, c=hiddens, c_mask=trg_mask) # apply dropout if self.dropout < 1.0: # logger.info('Apply dropout with p = {}'.format(self.dropout)) inverse_readout = Dropout(self.srng, inverse_readout, 1, self.dropout) p_x_given_y = self.inverse_logistic_layer.get_probs( inverse_readout) self.reconstruction_cost = self.inverse_logistic_layer.cost( p_x_given_y, src, src_mask) / src.shape[1] # self.reconstruction_cost = theano.printing.Print('reconstructed cost:')(self.reconstruction_cost) self.cost += self.reconstruction_cost * self.reconstruction_weight self.L1 = sum(T.sum(abs(param)) for param in self.params) self.L2 = sum(T.sum(param**2) for param in self.params) params_regular = self.L1 * 1e-6 + self.L2 * 1e-6 # params_regular = theano.printing.Print('params_regular:')(params_regular) # train cost train_cost = self.cost + params_regular # gradients grads = T.grad(train_cost, self.params) # apply gradient clipping here grads = grad_clip(grads, self.clip_c) # updates updates = adadelta(self.params, grads) # train function # modified by Longyue inps = [src, src_mask, src_hist, src_hist_mask, trg, trg_mask] self.train_fn = theano.function(inps, [train_cost], updates=updates, name='train_function') # self.train_fn = theano.function(inps, [train_cost], updates=updates, name='train_function', mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True)) def build_sampler(self): # added by Longyue x_hist = T.ltensor3() x_hist_mask = T.tensor3() annotations_1 = self.encoder_hist_1.apply_1(x_hist, x_hist_mask) annotations_1 = annotations_1[-1] annotations_2 = self.encoder_hist_2.apply_2(annotations_1) annotations_3 = annotations_2[-1] x = T.lmatrix() # Build Networks # src_mask is None c = self.encoder.apply(x, None, annotations_3) #init_context = ctx[0, :, -self.n_hids_src:] # mean pooling init_context = c.mean(0) # added by Longyue init_context = concatenate([init_context, annotations_3], axis=annotations_3.ndim - 1) init_state = self.decoder.create_init_state(init_context) outs = [init_state, c, annotations_3] if not self.with_attention: outs.append(init_context) # compile function print 'Building compile_init_state_and_context function ...' self.compile_init_and_context = theano.function( [x, x_hist, x_hist_mask], outs, name='compile_init_and_context') print 'Done' y = T.lvector() cur_state = T.matrix() # if it is the first word, emb should be all zero, and it is indicated by -1 trg_emb = T.switch(y[:, None] < 0, T.alloc(0., 1, self.n_in_trg), self.table_trg.apply(y)) # added by Zhaopeng Tu, 2016-06-09 # for with_attention=False if self.with_attention and self.with_coverage: cov_before = T.tensor3() if self.coverage_type is 'linguistic': print 'Building compile_fertility ...' fertility = self.decoder._get_fertility(c) fertility = T.addbroadcast(fertility, 1) self.compile_fertility = theano.function( [c], [fertility], name='compile_fertility') print 'Done' else: fertility = None else: cov_before = None fertility = None # apply one step # modified by Zhaopeng Tu, 2016-04-29 # [next_state, ctxs] = self.decoder.apply(state_below=trg_emb, results = self.decoder.apply( state_below=trg_emb, init_state=cur_state, # added by Zhaopeng Tu, 2016-06-09 init_context=None if self.with_attention else init_context, c=c if self.with_attention else None, hist=annotations_3, # added by Longyue one_step=True, # added by Zhaopeng Tu, 2016-04-27 cov_before=cov_before, fertility=fertility) next_state = results[0] if self.with_attention: ctxs, alignment = results[1], results[2] if self.with_coverage: cov = results[3] else: # if with_attention=False, we always use init_context as the source representation ctxs = init_context readout = self.decoder.readout(next_state, ctxs, trg_emb) # maxout if self.maxout_part > 1: readout = self.decoder.one_step_maxout(readout) # apply dropout if self.dropout < 1.0: readout = Dropout(self.trng, readout, 0, self.dropout) # compute the softmax probability next_probs = self.logistic_layer.get_probs(readout) # sample from softmax distribution to get the sample next_sample = self.trng.multinomial(pvals=next_probs).argmax(1) # compile function print 'Building compile_next_state_and_probs function ...' inps = [y, cur_state] if self.with_attention: inps.append(c) else: inps.append(init_context) # added by Longyue inps.append(annotations_3) outs = [next_probs, next_state, next_sample] # added by Zhaopeng Tu, 2016-06-09 if self.with_attention: outs.append(alignment) # added by Zhaopeng Tu, 2016-04-29 if self.with_coverage: inps.append(cov_before) if self.coverage_type is 'linguistic': inps.append(fertility) outs.append(cov) self.compile_next_state_and_probs = theano.function( inps, outs, name='compile_next_state_and_probs') print 'Done' # added by Zhaopeng Tu, 2016-07-18 # for reconstruction if self.with_reconstruction: # Build Networks # trg_mask is None inverse_c = T.tensor3() # mean pooling inverse_init_context = inverse_c.mean(0) inverse_init_state = self.inverse_decoder.create_init_state( inverse_init_context) outs = [inverse_init_state] if not self.with_attention: outs.append(inverse_init_context) # compile function print 'Building compile_inverse_init_state_and_context function ...' self.compile_inverse_init_and_context = theano.function( [inverse_c], outs, name='compile_inverse_init_and_context') print 'Done' src = T.lvector() inverse_cur_state = T.matrix() trg_mask = T.matrix() # if it is the first word, emb should be all zero, and it is indicated by -1 src_emb = T.switch(src[:, None] < 0, T.alloc(0., 1, self.n_in_src), self.table_src.apply(src)) # apply one step # modified by Zhaopeng Tu, 2016-04-29 inverse_results = self.inverse_decoder.apply( state_below=src_emb, init_state=inverse_cur_state, # added by Zhaopeng Tu, 2016-06-09 init_context=None if self.with_attention else inverse_init_context, c=inverse_c if self.with_attention else None, c_mask=trg_mask, one_step=True) inverse_next_state = inverse_results[0] if self.with_attention: inverse_ctxs, inverse_alignment = inverse_results[ 1], inverse_results[2] else: # if with_attention=False, we always use init_context as the source representation inverse_ctxs = init_context inverse_readout = self.inverse_decoder.readout( inverse_next_state, inverse_ctxs, src_emb) # maxout if self.maxout_part > 1: inverse_readout = self.inverse_decoder.one_step_maxout( inverse_readout) # apply dropout if self.dropout < 1.0: inverse_readout = Dropout(self.srng, inverse_readout, 0, self.dropout) # compute the softmax probability inverse_next_probs = self.inverse_logistic_layer.get_probs( inverse_readout) # sample from softmax distribution to get the sample inverse_next_sample = self.srng.multinomial( pvals=inverse_next_probs).argmax(1) # compile function print 'Building compile_inverse_next_state_and_probs function ...' inps = [src, trg_mask, inverse_cur_state] if self.with_attention: inps.append(inverse_c) else: inps.append(inverse_init_context) outs = [ inverse_next_probs, inverse_next_state, inverse_next_sample ] # added by Zhaopeng Tu, 2016-06-09 if self.with_attention: outs.append(inverse_alignment) self.compile_inverse_next_state_and_probs = theano.function( inps, outs, name='compile_inverse_next_state_and_probs') print 'Done' def save(self, path=None): if path is None: path = self.path filenpz = open(path, "w") val = dict([(value.name, value.get_value()) for index, value in enumerate(self.params)]) logger.info("save the model {}".format(path)) numpy.savez(path, **val) filenpz.close() def load(self, path=None): if path is None: path = self.path if os.path.isfile(path): logger.info("load params {}".format(path)) val = numpy.load(path) for index, param in enumerate(self.params): logger.info('Loading {} with shape {}'.format( param.name, param.get_value(borrow=True).shape)) if param.name not in val.keys(): logger.info('Adding new param {} with shape {}'.format( param.name, param.get_value(borrow=True).shape)) continue if param.get_value().shape != val[param.name].shape: logger.info("Error: model param != load param shape {} != {}".format(\ param.get_value().shape, val[param.name].shape)) raise Exception("loading params shape mismatch") else: param.set_value(val[param.name], borrow=True) else: logger.error("file {} does not exist".format(path)) self.save()