예제 #1
0
 def __init__(self):
     bank_full = pd.read_csv('data/bank_full_w_dummy_vars.csv')
     X = bank_full.ix[:,(18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36)].values
     y = bank_full.ix[:,17].values
     LogReg = LogisticRegression()
     LogReg.fit(X, y)
     self.model = LogReg
예제 #2
0
def test_logreg():
    X_train, Y_train, X_test, Y_test = import_census(CENSUS_FILE_PATH)
    num_features = X_train.shape[1]

    # Add a bias
    X_train_b = np.append(X_train, np.ones((len(X_train), 1)), axis=1)
    X_test_b = np.append(X_test, np.ones((len(X_test), 1)), axis=1)

    # my_x=np.array([[3,4],[5,6],[7,8],[9,10],[22,22],[12,23]])

    # my_y=np.array([0,1,2,0,2,1]).reshape(6,)
    # #print(my_y)
    # my_x = np.append(my_x, np.ones((len(my_x), 1)), axis=1)

    # test_model = LogisticRegression(2, 3, 2, CONV_THRESHOLD)
    # #print(test_model.predict(my_x))
    # test_model.train(my_x, my_y)

    ## Logistic Regression ###
    #print(X_train_b.shape)
    #print(Y_train.shape)
    model = LogisticRegression(num_features, NUM_CLASSES, BATCH_SIZE,
                               CONV_THRESHOLD)

    #print(model.loss(X_train_b, Y_train))

    num_epochs = model.train(X_train_b, Y_train)
    acc = model.accuracy(X_test_b, Y_test) * 100
    print("Test Accuracy: {:.1f}%".format(acc))
    print("Number of Epochs: " + str(num_epochs))

    acc = 0

    return acc
예제 #3
0
파일: kfolds.py 프로젝트: mluzu/iia
def lg_k_folds(X_train, y_train, lr, b, epochs, lamda, bias, k=5, verbose=False):
    results = {
        'accuracy': [],
        'recall': [],
        'precision': []
    }
    metric_means = {}
    accuracy = Accuracy()
    recall = Recall()
    precision = Precision()
    chunk_size = int(len(X_train) / k)

    logistic_regression = LogisticRegression(bias)

    for i in range(0, len(X_train), chunk_size):
        end = i + chunk_size if i + chunk_size <= len(X_train) else len(X_train)
        new_X_valid = X_train[i: end]
        new_y_valid = y_train[i: end]
        new_X_train = np.concatenate([X_train[: i], X_train[end:]])
        new_y_train = np.concatenate([y_train[: i], y_train[end:]])
        logistic_regression.fit(new_X_train, new_y_train,  lr, b, epochs, lamda, verbose=verbose)
        predictions = logistic_regression.predict(new_X_valid)

        results['accuracy'].append(accuracy(new_y_valid, predictions))
        results['recall'].append(recall(new_y_valid, predictions))
        results['precision'].append(precision(new_y_valid, predictions))

    metric_means['accuracy'] = np.mean(results['accuracy'])
    metric_means['recall'] = np.mean(results['recall'])
    metric_means['precision'] = np.mean(results['precision'])

    return metric_means
예제 #4
0
class LogisticRegressionExperiment(object):
    def __init__(self):
        self._data_set = get_pick_data("LogisticRegression")
        self._num_features = self._data_set.dynamic_features.shape[1]
        self._time_steps = 1
        self._n_output = 1
        self._model_format()
        self._check_path()

    def _model_format(self):
        learning_rate, max_loss, max_pace, ridge, batch_size, hidden_size, epoch, dropout = lr_setup.all
        self._model = LogisticRegression(
            num_features=self._num_features,
            time_steps=self._time_steps,
            n_output=self._n_output,
            batch_size=batch_size,
            epochs=epoch,
            output_n_epoch=ExperimentSetup.output_n_epochs,
            learning_rate=learning_rate,
            max_loss=max_loss,
            dropout=dropout,
            max_pace=max_pace,
            ridge=ridge)

    def _check_path(self):
        if not os.path.exists("result_9_16_0"):
            os.makedirs("result_9_16_0")
        self._filename = "result_9_16_0" + "/" + self._model.name + " " + \
                         time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())

    def do_experiments(self):
        n_output = 1
        dynamic_features = self._data_set.dynamic_features
        labels = self._data_set.labels
        # tol_test_index = np.zeros(shape=0, dtype=np.int32)
        tol_pred = np.zeros(shape=(0, n_output))
        tol_label = np.zeros(shape=(0, n_output), dtype=np.int32)
        train_dynamic_features, test_dynamic_features, train_labels, test_labels = \
            split_logistic_data(dynamic_features,labels)
        for i in range(5):
            train_dynamic_res, train_labels_res = imbalance_preprocess(
                train_dynamic_features[i], train_labels[i],
                'LogisticRegression')
            train_set = DataSet(train_dynamic_res, train_labels_res)
            test_set = DataSet(test_dynamic_features[i].reshape(-1, 92),
                               test_labels[i].reshape(-1, 1))
            self._model.fit(train_set, test_set)
            y_score = self._model.predict(test_set)
            tol_pred = np.vstack((tol_pred, y_score))
            tol_label = np.vstack((tol_label, test_labels[i]))
            print("Cross validation: {} of {}".format(i, 5),
                  time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))

        tol_test_index = np.arange(labels.shape[0] * labels.shape[1])
        evaluate(tol_test_index, tol_label, tol_pred, self._filename)
        self._model.close()
예제 #5
0
파일: trainer.py 프로젝트: kirk86/theano
    def train_logistic_regression(self,
                                  distribution,
                                  fan_in,
                                  fan_out,
                                  learning_rate=0.013):
        ######################
        # BUILD ACTUAL MODEL #
        ######################
        print('... building the model.')

        X = tt.fmatrix('X')  # data, presented as rasterized images
        y = tt.fmatrix('y')  # labels, presented as 1-hot matrix of labels

        # construct the logistic regression class
        # Each MNIST image has size 28*28
        classifier = LogisticRegression(X, distribution, fan_in, fan_out)

        # the cost we minimize during training
        cost = classifier.neg_log_like(y)

        # compute the gradient of cost with respect to theta = (W,b)
        grads = [tt.grad(cost=cost, wrt=param) for param in classifier.params]

        # specify how to update the parameters of the model as a list of
        # (variable, update expression) pairs.
        updates = [(param, param - learning_rate * gparam)
                   for param, gparam in zip(classifier.params, grads)]

        # compiling a Theano function `train_model` that returns the cost,
        # in the same time updates the parameter based on the rules
        # defined in `updates`
        train = theano.function(inputs=[X, y],
                                outputs=cost,
                                updates=updates,
                                allow_input_downcast=True)

        # function that computes the mistakes that are made by
        # the model on a minibatch
        test = theano.function(inputs=[X, y],
                               outputs=classifier.errors(y),
                               allow_input_downcast=True)

        validate = theano.function(inputs=[X, y],
                                   outputs=classifier.errors(y),
                                   allow_input_downcast=True)

        predict = theano.function(inputs=[X],
                                  outputs=classifier.y_pred,
                                  allow_input_downcast=True)

        self.early_stopping(classifier, train, test, validate, predict,
                            learning_rate)
예제 #6
0
 def _model_format(self):
     learning_rate, max_loss, max_pace, ridge, batch_size, hidden_size, epoch, dropout = lr_setup.all
     self._model = LogisticRegression(
         num_features=self._num_features,
         time_steps=self._time_steps,
         n_output=self._n_output,
         batch_size=batch_size,
         epochs=epoch,
         output_n_epoch=ExperimentSetup.output_n_epochs,
         learning_rate=learning_rate,
         max_loss=max_loss,
         dropout=dropout,
         max_pace=max_pace,
         ridge=ridge)
예제 #7
0
파일: main.py 프로젝트: VkaZas/CS1420
def main():
    Dataset = namedtuple('Dataset', ['inputs', 'labels'])

    # Reading in data. You do not need to touch this.
    with open("data/train-images-idx3-ubyte.gz",
              'rb') as f1, open("data/train-labels-idx1-ubyte.gz", 'rb') as f2:
        buf1 = gzip.GzipFile(fileobj=f1).read(16 + 60000 * 28 * 28)
        buf2 = gzip.GzipFile(fileobj=f2).read(8 + 60000)
        inputs = np.frombuffer(buf1, dtype='uint8',
                               offset=16).reshape(60000, 28 * 28)
        inputs = np.where(inputs > 99, 1, 0)
        labels = np.frombuffer(buf2, dtype='uint8', offset=8)
        data_train = Dataset(inputs, labels)
    with open("data/t10k-images-idx3-ubyte.gz",
              'rb') as f1, open("data/t10k-labels-idx1-ubyte.gz", 'rb') as f2:
        buf1 = gzip.GzipFile(fileobj=f1).read(16 + 10000 * 28 * 28)
        buf2 = gzip.GzipFile(fileobj=f2).read(8 + 10000)
        inputs = np.frombuffer(buf1, dtype='uint8',
                               offset=16).reshape(10000, 28 * 28)
        inputs = np.where(inputs > 99, 1, 0)
        labels = np.frombuffer(buf2, dtype='uint8', offset=8)
        data_test = Dataset(inputs, labels)

    # run naive bayes
    model = NaiveBayes(10)
    model.train(data_train)
    print("{:.1f}%".format(model.accuracy(data_test) * 100))

    # run logistic regression
    model = LogisticRegression(784, 10)
    model.train(data_train)
    print("{:.1f}%".format(model.accuracy(data_test) * 100))
def test_LogisticRegression(dim):
    model_name = "LogisticRegression"

    x, y = make_classification(n_samples=1000, n_features=dim)

    model = LogisticRegression(dim)
    check_model(model, model_name, x, y, category="binary_classification")
def train(args):
    """
    This function trains the models
    :param args: the command line arguments defining the desired actions
    """

    # load data
    train_data_all, dev_data_all, _ = load(args.data_dir,
                                           cachedir=args.cachedir,
                                           override_cache=args.override_cache,
                                           text_only=(args.model.lower()
                                                      in ["bi-lstm", "bert"]),
                                           include_tfidf=args.include_tfidf,
                                           balanced=args.balanced)
    train_data, train_labels = train_data_all.X, train_data_all.y
    dev_data, dev_labels = dev_data_all.X, dev_data_all.y

    # Build model
    apx = get_appendix(args.include_tfidf, args.balanced)
    if args.model.lower() == "simple-ff":
        model = FeedForward(args.ff_hunits, train_data.shape[1])
        train_pytorch(args,
                      model,
                      train_data,
                      train_labels,
                      dev_data,
                      dev_labels,
                      save_model_path=f"models/simple-ff{apx}.torch")
    elif args.model.lower() == "bi-lstm":
        model = BiLSTM(epochs=args.num_epochs,
                       batch_size=args.batch_size,
                       max_seq_len=args.max_seq_len)
        model.train(train_data, train_labels, dev_data, dev_labels)
    elif args.model.lower() == "logreg":
        model = LogisticRegression()
        model.train(train_data,
                    train_labels,
                    dev_data,
                    dev_labels,
                    save_model_path=f"models/logreg{apx}.pkl")
    elif args.model.lower() == "majority-vote":
        model = MajorityVote()
        model.train(train_labels, dev_labels)
    elif args.model.lower() == "bert":
        model = Bert(epochs=args.num_epochs,
                     batch_size=args.batch_size,
                     max_seq_len=args.max_seq_len,
                     learning_rate=args.learning_rate)
        model.train(train_data,
                    train_labels,
                    dev_data,
                    dev_labels,
                    save_model_path=f"models/bert.pkl")
    elif args.model.lower() == "svm":
        model = SVM()
        model.train(train_data,
                    train_labels,
                    save_model_path=f"models/svm{apx}.sav")
    else:
        raise Exception("Unknown model type passed in!")
예제 #10
0
def classifier_log_reg(features, config, train_mode, **kwargs):
    if train_mode:
        features_train, features_valid = features

        if config.random_search.log_reg.n_runs:
            transformer = RandomSearchOptimizer(
                LogisticRegression,
                config.log_reg,
                train_input_keys=[],
                valid_input_keys=['X_valid', 'y_valid'],
                score_func=roc_auc_score,
                maximize=True,
                n_runs=config.random_search.log_reg.n_runs,
                callbacks=[
                    NeptuneMonitor(**config.random_search.log_reg.callbacks.
                                   neptune_monitor),
                    SaveResults(
                        **config.random_search.log_reg.callbacks.save_results),
                ])
        else:
            transformer = LogisticRegression(**config.log_reg)

        log_reg = Step(name='log_reg',
                       transformer=transformer,
                       input_data=['input'],
                       input_steps=[features_train, features_valid],
                       adapter={
                           'X': ([(features_train.name, 'X')]),
                           'y': ([('input', 'y')], to_numpy_label_inputs),
                           'X_valid': ([(features_valid.name, 'X')]),
                           'y_valid':
                           ([('input', 'y_valid')], to_numpy_label_inputs),
                       },
                       cache_dirpath=config.env.cache_dirpath,
                       **kwargs)
    else:

        log_reg = Step(name='log_reg',
                       transformer=LogisticRegression(**config.log_reg),
                       input_steps=[features],
                       adapter={
                           'X': ([(features.name, 'features')]),
                       },
                       cache_dirpath=config.env.cache_dirpath,
                       **kwargs)
    return log_reg
예제 #11
0
def test_logreg():
    X_train, Y_train, X_test, Y_test = import_census(CENSUS_FILE_PATH)
    num_features = X_train.shape[1]

    # Add a bias
    X_train_b = np.append(X_train, np.ones((len(X_train), 1)), axis=1)
    X_test_b = np.append(X_test, np.ones((len(X_test), 1)), axis=1)

    ### Logistic Regression ###
    model = LogisticRegression(num_features, NUM_CLASSES, BATCH_SIZE,
                               CONV_THRESHOLD)
    num_epochs = model.train(X_train_b, Y_train)
    acc = model.accuracy(X_test_b, Y_test) * 100
    print("Test Accuracy: {:.1f}%".format(acc))
    print("Number of Epochs: " + str(num_epochs))

    return acc
예제 #12
0
def test_logreg():

    X_train, Y_train, X_test, Y_test = import_mnist(MNIST_TRAIN_INPUTS_PATH,
                                                    MNIST_TRAIN_LABELS_PATH,
                                                    MNIST_TEST_INPUTS_PATH,
                                                    MNIST_TEST_LABELS_PATH)
    num_features = X_train.shape[1]

    # Add a bias
    X_train_b = np.append(X_train, np.ones((len(X_train), 1)), axis=1)
    X_test_b = np.append(X_test, np.ones((len(X_test), 1)), axis=1)

    ### Logistic Regression ###
    print('--------- LOGISTIC REGRESSION w/ SGD ---------')
    model = LogisticRegression(num_features, MNIST_CLASSES)
    model.train(X_train_b, Y_train)
    print("Test Accuracy: {:.1f}%".format(
        model.accuracy(X_test_b, Y_test) * 100))
예제 #13
0
 def _model_format(self):
     if self._event_type == "qx":
         learning_rate, max_loss, max_pace, lasso, ridge = lr_qx_setup.all
     elif self._event_type == "cx":
         learning_rate, max_loss, max_pace, lasso, ridge = lr_cx_setup.all
     else:
         learning_rate, max_loss, max_pace, lasso, ridge = lr_xycj_setup.all
     self._model = LogisticRegression(
         num_features=self._num_features,
         time_steps=self._time_steps,
         n_output=self._n_output,
         batch_size=ExperimentSetup.batch_size,
         epochs=ExperimentSetup.epochs,
         output_n_epoch=ExperimentSetup.output_n_epochs,
         learning_rate=learning_rate,
         max_loss=max_loss,
         max_pace=max_pace,
         lasso=lasso,
         ridge=ridge)
예제 #14
0
def run_with_model(dataset, args):
    """
    Running a particular choice of model, saves to /results folder.
    """
    cut_data, all_data = cut_dataset(dataset, args.cens_time)

    if args.model == "xlearner":
        model = RFXLearner()
        model.train(cut_data["X"], cut_data["w"], cut_data["y"],
                    cut_data["ipcw"])
        pred_rr = model.predict(all_data["X"], all_data["w"], False, True)

    elif args.model == "cox":
        model = CoxAIC()
        model.train(all_data["X"], all_data["w"], all_data["y"], all_data["t"])
        pred_rr = model.predict(args.cens_time, all_data["X"])

    elif args.model == "survrf":
        model = SurvRF()
        model.train(all_data["X"], all_data["w"], all_data["y"], all_data["t"])
        pred_rr = model.predict(args.cens_time)

    elif args.model == "causalforest":
        model = CausalForest()
        model.train(cut_data["X"], cut_data["w"], cut_data["y"])
        pred_rr = np.r_[model.predict(),
                        model.predict(all_data["X"][all_data["cens"] == 1])]

    elif args.model == "logreg":
        model = LogisticRegression()
        model.train(cut_data["X"], cut_data["w"], cut_data["y"],
                    cut_data["ipcw"])
        pred_rr = model.predict(all_data["X"])

    elif args.model == "linearxlearner":
        model = LinearXLearner()
        model.train(cut_data["X"], cut_data["w"], cut_data["y"],
                    cut_data["ipcw"])
        pred_rr = model.predict(all_data["X"], all_data["w"], False)

    else:
        raise ValueError("Not a supported model.")

    return {
        "pred_rr": pred_rr,
        "X": all_data["X"],
        "w": all_data["w"],
        "y": all_data["y"],
        "t": all_data["t"],
        "y_cut": all_data["y_cut"],
        "cens": all_data["cens"],
    }
def test(args):
    """
    This function tests our models
    :param args: the command line arguments with the desired actions
    """
    _, _, test_data_all = load(args.data_dir,
                               cachedir=args.cachedir,
                               override_cache=args.override_cache,
                               text_only=(args.model.lower()
                                          in ["bi-lstm", "bert"]),
                               include_tfidf=args.include_tfidf,
                               balanced=args.balanced)
    test_data, test_labels = test_data_all.X, test_data_all.y

    apx = get_appendix(args.include_tfidf, args.balanced)
    if args.model.lower() == "simple-ff":
        preds = test_pytorch(
            test_data,
            test_labels,
            load_model_path=f"models/simple-ff{apx}.torch",
            predictions_file=f"preds/simple-ff-preds{apx}.txt")
    elif args.model.lower() == "bi-lstm":
        model = BiLSTM(load_model_path="models/bilstm.keras",
                       tokenizer_path='models/bilstm-tokenizer.json')
        preds = model.test(test_data, y_test=test_labels)
    elif args.model.lower() == "logreg":
        model = LogisticRegression(load_model_path=f"models/logreg{apx}.pkl")
        preds = model.test(
            test_data,
            test_labels,
            save_predictions_path=f"preds/logreg-preds{apx}.txt")
    elif args.model.lower() == "majority-vote":
        model = MajorityVote(load_model_path="models/majority-class.txt")
        preds = model.test(test_labels)
    elif args.model.lower() == "bert":
        model = Bert(load_model_path="models/bert.pkl")
        preds = model.test(test_data,
                           test_labels,
                           save_predictions_path="preds/bert-preds.txt")
    elif args.model.lower() == "svm":
        model = SVM(load_model_path=f"models/svm{apx}.sav")
        preds = model.test(test_data,
                           save_predictions_path=f"preds/svm-preds{apx}.txt")
    else:
        raise Exception("Unknown model type passed in!")

    metrics = classification_report(test_labels, preds, output_dict=True)
    pprint(metrics)
    with open(f"scores/{args.model.lower()}{apx}.json", "w") as fout:
        json.dump(metrics, fout, indent=4)
예제 #16
0
def run_for_optimism(original_dataset, bootstrap_dataset, args):
    """
    Calculates difference between performance on a bootstrapped dataset (upon
    which the model is trained) and the original dataset. Optimism is defined
    as the mean difference over many bootstrap datasets.
    """
    cut_data, all_data = cut_dataset(bootstrap_dataset, args.cens_time)
    cut_data_orig, all_data_orig = cut_dataset(original_dataset,
                                               args.cens_time)

    if args.model == "cox":
        model = CoxAIC()
        model.train(all_data["X"], all_data["w"], all_data["y"], all_data["t"])
        pred_rr = model.predict(args.cens_time, all_data["X"])
        pred_rr_orig = model.predict(args.cens_time, all_data_orig["X"])

    elif args.model == "logreg":
        model = LogisticRegression()
        model.train(cut_data["X"], cut_data["w"], cut_data["y"],
                    cut_data["ipcw"])
        pred_rr = model.predict(all_data["X"])
        pred_rr_orig = model.predict(all_data_orig["X"])

    elif args.model == "linearxlearner":
        model = LinearXLearner()
        model.train(cut_data["X"], cut_data["w"], cut_data["y"],
                    cut_data["ipcw"])
        pred_rr = model.predict(all_data["X"], all_data["w"], False)
        pred_rr_orig = model.predict(all_data_orig["X"], all_data_orig["w"],
                                     False)

    else:
        raise ValueError("Not a supported model.")

    c_stat_bootstrap = c_statistic(pred_rr[all_data["cens"] == 0],
                                   cut_data["y"], cut_data["w"])
    c_stat_original = c_statistic(pred_rr_orig[all_data_orig["cens"] == 0],
                                  cut_data_orig["y"], cut_data_orig["w"])

    rmst_bootstrap = decision_value_rmst(pred_rr, all_data["y"], all_data["w"],
                                         all_data["t"], args.cens_time)
    rmst_original = decision_value_rmst(pred_rr_orig, all_data_orig["y"],
                                        all_data_orig["w"], all_data_orig["t"],
                                        args.cens_time)

    return {
        "c_stat_diff": c_stat_bootstrap - c_stat_original,
        "decision_value_rmst_diff": rmst_bootstrap - rmst_original
    }
예제 #17
0
def run_for_optimism(original_dataset, bootstrap_dataset, args):
    """
    Calculate optimism for a particular bootstrap run.
    """
    cut_data, all_data = cut_dataset_at_cens_time(bootstrap_dataset,
                                                  args.cens_time)
    cut_data_orig, all_data_orig = cut_dataset_at_cens_time(
        original_dataset, args.cens_time)

    if args.model == "cox":
        model = CoxAIC()
        model.train(all_data["X"], all_data["w"], all_data["y"], all_data["t"])
        pred_rr = model.predict(args.cens_time, all_data["X"])
        pred_rr_orig = model.predict(args.cens_time, all_data_orig["X"])

    elif args.model == "logreg":
        model = LogisticRegression()
        model.train(cut_data["X"], cut_data["w"], cut_data["y"],
                    cut_data["ipcw"])
        pred_rr = model.predict(all_data["X"])
        pred_rr_orig = model.predict(all_data_orig["X"])

    elif args.model == "linearxlearner":
        model = LinearXLearner()
        model.train(cut_data["X"], cut_data["w"], cut_data["y"],
                    cut_data["ipcw"])
        pred_rr = model.predict(all_data["X"], all_data["w"], False)
        pred_rr_orig = model.predict(all_data_orig["X"], all_data_orig["w"],
                                     False)

    else:
        raise ValueError("Not a supported model.")

    c_stat_bootstrap = c_statistic(pred_rr[all_data["cens"] == 0],
                                   cut_data["y"], cut_data["w"])
    c_stat_original = c_statistic(pred_rr_orig[all_data_orig["cens"] == 0],
                                  cut_data_orig["y"], cut_data_orig["w"])

    rmst_bootstrap = decision_value_rmst(pred_rr, all_data["y"], all_data["w"],
                                         all_data["t"], args.cens_time)
    rmst_original = decision_value_rmst(pred_rr_orig, all_data_orig["y"],
                                        all_data_orig["w"], all_data_orig["t"],
                                        args.cens_time)

    return {
        "c_stat_diff": c_stat_bootstrap - c_stat_original,
        "decision_value_rmst_diff": rmst_bootstrap - rmst_original
    }
예제 #18
0
def load_model(json_file):
    model_data = utils.load_json_data(json_file)
    if model_data['model_type'] == 'logistic_regression':
        model = LogisticRegression(model_data)
    elif model_data['model_type'] == 'decision_tree':
        model = DecisionTree(model_data)
    elif model_data['model_type'] == 'scoring':
        model = ScoringModel(model_data)
    elif model_data['model_type'] == 'nomogram':
        model = NomogramModel(model_data)
    elif model_data['model_type'] == 'NOCOS':
        model = NOCOS(model_data)
    else:
        raise Exception('model type [{0}] not recognised'.format(model_data['model_type']))
    logging.info('{0} loaded as a {1} model'.format(json_file, model.model_type))
    return model
예제 #19
0
    def train_lr(self, cid):
        params = {
            "offline_model_dir": PROJECT_ROOT+"/ltr/weights/lr",
        }
        params.update(self.params_common)

        X_train, X_valid = self.load_data_by_id("train", cid), self.load_data_by_id("vali", cid)

        model = LogisticRegression("ranking", params, self.logger)
        model.fit(X_train, validation_data=X_valid)
        model.save_session()
예제 #20
0
 def _initialize_models(self, data_generator):
     """Initializes models prior to training."""
     models = {
         "Linear Regression":
         LinearRegression(),
         "Logistic Regression":
         LogisticRegression(),
         "Quadratic Regression":
         QuadraticRegression(),
         "Naive Bayes'":
         NaiveBayes(std_X=data_generator.std_X,
                    m0=data_generator.m0s,
                    m1=data_generator.m1s),
         "kNN CV":
         kNNCV(n_folds=self.n_folds)
     }
     return models
예제 #21
0
def confusion_matrix_plot(outputdir, X_train, Y_train, cv=5):
    f, ax = plt.subplots(2, 3, figsize=(12, 10))
    y_pred = cross_val_predict(svm.SVC(kernel='linear'),
                               X_train,
                               Y_train,
                               cv=cv)
    sns.heatmap(confusion_matrix(Y_train, y_pred),
                ax=ax[0, 0],
                annot=True,
                fmt='2.0f')
    ax[0, 0].set_title('Matrix for Linear-SVM')
    y_pred = cross_val_predict(rfgd.best_estimator_, X_train, Y_train, cv=cv)
    sns.heatmap(confusion_matrix(Y_train, y_pred),
                ax=ax[0, 1],
                annot=True,
                fmt='2.0f')
    ax[0, 1].set_title('Matrix for Random-Forests')
    y_pred = cross_val_predict(LogisticRegression(), X_train, Y_train, cv=cv)
    sns.heatmap(confusion_matrix(Y_train, y_pred),
                ax=ax[0, 2],
                annot=True,
                fmt='2.0f')
    ax[0, 2].set_title('Matrix for Logistic Regression')
    y_pred = cross_val_predict(gd.best_estimator_, X_train, Y_train, cv=cv)
    sns.heatmap(confusion_matrix(Y_train, y_pred),
                ax=ax[1, 0],
                annot=True,
                fmt='2.0f')
    ax[1, 0].set_title('Matrix for Ada Boosting')
    y_pred = cross_val_predict(ensemble_lin_rbf, X_train, Y_train, cv=cv)
    sns.heatmap(confusion_matrix(Y_train, y_pred),
                ax=ax[1, 1],
                annot=True,
                fmt='2.0f')
    ax[1, 1].set_title('Matrix for Ensemble-classfier')
    plt.subplots_adjust(hspace=0.2, wspace=0.2)
    f.savefig(outputdir + '/confusion_plot.png')  # save the figure to file
    plt.close(f)
    print('confusion matrix plot is saved to {}'.format(outputdir +
                                                        '/confusion_plot.png'))
def main(grid):
	# Get Clean Data
	X, Y = read_clean_data()
	# Linear Regression
	try:
		LinearRegression(X, Y, grid)
	except Exception as e:
		print(e)
	# Binarize Y
	Y_binary = BinaryY(Y)
	# Logistic Regression
	try:
		LogisticRegression(X, Y_binary, grid)
	except Exception as e:
		print(e)
	# Decision Tree
	try:
		DecisionTree(X, Y_binary, grid)
	except Exception as e:
		print(e)
	# Support Vector Machine
	try:
		SVM(X, Y_binary, grid)
	except Exception as e:
		print(e)
	# Random Forest
	try:
		RandomForest(X, Y_binary, grid)
	except Exception as e:
		print(e)
	# Bagging Classifier
	try:
		Bagging(X, Y_binary, grid)
	except Exception as e:
		print(e)
	# Neural Network
	try:
		NeuralNet(X, Y_binary, grid)
	except Exception as e:
		print(e)
예제 #23
0
def get_model(model_type: str, model_config: Dict, w2v: torch.Tensor, vocab_list: List, model_name: str) -> nn.Module:
    # Instantiate model and configuration
    train_config = {
                    "num_epochs": 30,
                    "lr_rate": 2e-5,
                    "log_step": 100,
                    "l2norm": False,
                    "l2factor": 3.,
                    "lambda": 0.01,
                   }

    if model_type == "nb":
        model = NaiveBayes(model_config)
    elif model_type == "lr":
        model = LogisticRegression(model_config)
        train_config["lr_rate"] = 2e-3
    elif model_type == "ff":
        model = feedforwardNN(model_config, w2v)
        train_config["num_epochs"] = 50
        train_config["lr_rate"] = 2e-4
    elif model_type ==  "cnn":
        model = convolutionalNN(model_config, w2v)
        train_config["num_epochs"] = 30
        train_config["lr_rate"] = 2e-4
        train_config["l2norm"] = False
    elif model_type ==  "bertff":
        model = BERTfeedforward(model_config, vocab_list)
        train_config["num_epochs"] = 30
        train_config["lr_rate"] = 1e-5
    else:
        raise ValueError("Model type is not supported.")

    # Load model
    if model_name is not "":
        model = torch.load("./models/"+model_name)

    return model, train_config
예제 #24
0
)

dataset = getattr(
    __import__('datasets'), args.dataset.capitalize() + 'Dataset'
)
train_dataset = dataset('train', args)
val_dataset = dataset('validation', args)
train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=args.batch_size, shuffle=True
)
val_loader = torch.utils.data.DataLoader(
    val_dataset, batch_size=args.batch_size, shuffle=False
)

autoencoder = Autoencoder(args.encoder_layers, args.decoder_layers)
classifier = LogisticRegression(args.encoder_layers[-1])

oracle = DL2_Oracle(
    learning_rate=args.dl2_lr, net=autoencoder,
    use_cuda=torch.cuda.is_available(),
    constraint=ConstraintBuilder.build(
        autoencoder, train_dataset, args.constraint
    )
)

binary_cross_entropy = nn.BCEWithLogitsLoss(
    pos_weight=train_dataset.pos_weight('train') if args.balanced else None
)
optimizer = torch.optim.Adam(
    list(autoencoder.parameters()) + list(classifier.parameters()),
    lr=args.learning_rate, weight_decay=args.weight_decay
예제 #25
0
def logistic_test():
    n_samples = 100
    np.random.seed(0)
    X_train = np.random.normal(size=n_samples)
    y_train = (X_train > 0).astype(float)
    X_train[X_train > 0] *= 4
    X_train += 0.3 * np.random.normal(size=n_samples)

    X_train = X_train[:, np.newaxis]

    X, y = make_classification(
        n_features=1,
        n_classes=2,
        n_redundant=0,
        n_informative=1,
        n_clusters_per_class=1,
        class_sep=0.75,
        shuffle=True,
        random_state=0,
    )
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=0)

    df_test = pd.DataFrame(data=[X_test.flatten(), y_test]).T
    df_test.columns = ["X", "y"]

    lr = LogisticRegression()
    lr.fit(X_train, y_train)
    y_pred = lr.predict(X_test)

    score = [1 if yi == yi_pred else 0 for yi, yi_pred in zip(y_test, y_pred)]
    print(np.sum(score) / len(score))

    # and plot the result
    plt.figure(1, figsize=(4, 3))
    plt.clf()
    plt.scatter(X_train.ravel(), y_train, color="black", zorder=20)

    df_test["loss"] = expit(X_test * lr.theta + lr.bias).ravel()
    df_test = df_test.sort_values("X")
    plt.plot(df_test["X"], df_test["loss"], color="red", linewidth=3)

    ols = LinearRegression()
    ols.fit(X_train, y_train)
    plt.plot(X_test, ols.theta * X_test + ols.bias, linewidth=1)
    plt.axhline(0.5, color=".5")

    plt.ylabel("y")
    plt.xlabel("X")
    plt.xticks(range(-5, 10))
    plt.yticks([0, 0.5, 1])
    plt.ylim(-0.25, 1.25)
    plt.xlim(-2, 2)
    plt.legend(
        ("Logistic Regression Model", "Linear Regression Model"),
        loc="lower right",
        fontsize="small",
    )
    plt.tight_layout()
    plt.show()
예제 #26
0
def _convolutional_mlp(new_digit, nkerns=[20, 50], batch_size=1):
    rng = numpy.random.RandomState(23455)

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # start-snippet-1
    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # Reshape matrix of rasterized images of shape (batch_size, 28 * 28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    # (28, 28) is the size of MNIST images.
    #layer0_input = x.reshape((batch_size, 1, 28, 28))
    layer0_input = new_digit.reshape((1, 1, 28, 28))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
    # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
    layer0 = LeNetConvPoolLayer(
        rng,
        input=layer0_input,
        image_shape=(batch_size, 1, 28, 28),
        filter_shape=(nkerns[0], 1, 5, 5),
        poolsize=(2, 2)
    )

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
    # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
    # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4)
    layer1 = LeNetConvPoolLayer(
        rng,
        input=layer0.output,
        image_shape=(batch_size, nkerns[0], 12, 12),
        filter_shape=(nkerns[1], nkerns[0], 5, 5),
        poolsize=(2, 2)
    )

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
    # or (500, 50 * 4 * 4) = (500, 800) with the default values.
    layer2_input = layer1.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(
        rng,
        input=layer2_input,
        n_in=nkerns[1] * 4 * 4,
        n_out=500,
        activation=T.tanh
    )

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)

    predict = theano.function(inputs=[], outputs=layer3.y_pred)

    print 'Loading the model ...'
    f = file('trained_models/models/convolutional_mlp.mnist.trained.pickle', 'rb')
    # model_data = {layer0: {W: ..., b: ...}, layer1: ..., layer2: ..., layer3: ...}
    model_data = cPickle.load(f)
    f.close()
    print 'Loaded the model.'
    layers = [layer0, layer1, layer2, layer3]
    for i in range(4):
        layers[i].W.set_value(model_data['layer%s' % i]['W'])
        layers[i].b.set_value(model_data['layer%s' % i]['b'])
    print 'Restored model parameters.'
    print 'Predicting'
    return predict()[0]
예제 #27
0
    def __init__(self, **kwargs):
        self.n_in_src = kwargs.pop('nembed_src')
        self.n_in_trg = kwargs.pop('nembed_trg')
        self.n_hids_src = kwargs.pop('nhids_src')
        self.n_hids_trg = kwargs.pop('nhids_trg')
        self.src_vocab_size = kwargs.pop('src_vocab_size')
        self.trg_vocab_size = kwargs.pop('trg_vocab_size')
        self.method = kwargs.pop('method')
        self.dropout = kwargs.pop('dropout')
        self.maxout_part = kwargs.pop('maxout_part')
        self.path = kwargs.pop('saveto')
        self.clip_c = kwargs.pop('clip_c')
        self.mkl = kwargs.pop('mkl')
        self.with_attention = kwargs.pop('with_attention')

        self.with_coverage = kwargs.pop('with_coverage')
        self.coverage_dim = kwargs.pop('coverage_dim')
        self.coverage_type = kwargs.pop('coverage_type')
        self.max_fertility = kwargs.pop('max_fertility')
        if self.coverage_type is 'linguistic':
            # make sure the dimension of linguistic coverage is always 1
            self.coverage_dim = 1

        self.with_context_gate = kwargs.pop('with_context_gate')

        self.params = []
        self.layers = []

        self.table_src = LookupTable(self.src_vocab_size, self.n_in_src, name='table_src')
        self.layers.append(self.table_src)

        self.encoder = BidirectionalEncoder(self.n_in_src, self.n_hids_src, self.table_src, self.mkl, name='birnn_encoder')
        self.layers.append(self.encoder)

        self.table_trg = LookupTable(self.trg_vocab_size, self.n_in_trg, name='table_trg')
        self.layers.append(self.table_trg)

        self.decoder = Decoder(self.mkl,
			       self.n_in_trg,
                               self.n_hids_trg,
                               2 * self.n_hids_src,
                               with_attention=self.with_attention,
                               with_coverage=self.with_coverage,
                               coverage_dim=self.coverage_dim,
                               coverage_type=self.coverage_type,
                               max_fertility=self.max_fertility,
                               with_context_gate=self.with_context_gate,
                               maxout_part=self.maxout_part,
                               name='rnn_decoder')

        self.layers.append(self.decoder)
        self.logistic_layer = LogisticRegression(self.n_in_trg, self.trg_vocab_size)
        self.layers.append(self.logistic_layer)

        # for reconstruction
        self.with_reconstruction = kwargs.pop('with_reconstruction')

        self.reconstruction_weight = kwargs.pop('reconstruction_weight')

        if self.with_reconstruction:
            # note the source and target sides are reversed
            self.inverse_decoder = InverseDecoder(self.n_in_src, 2 * self.n_hids_src, self.n_hids_trg,
                                                  with_attention=self.with_attention,
                                                  maxout_part=self.maxout_part, name='rnn_inverse_decoder')

            self.layers.append(self.inverse_decoder)

            self.inverse_logistic_layer = LogisticRegression(self.n_in_src, self.src_vocab_size, name='inverse_LR')
            self.layers.append(self.inverse_logistic_layer)

        for layer in self.layers:
            self.params.extend(layer.params)
예제 #28
0
                                      transform=transform)
        batch_numbers = len(train_set) // BATCH_SIZE + 1

        train_loader = torch.utils.data.DataLoader(train_set, **kwargs)
        valid_loader = torch.utils.data.DataLoader(val_set, **kwargs)
        test_loader = torch.utils.data.DataLoader(test_dataset, **kwargs)

        " MODEL SETTINGS "

        if model_name == 'MLP':
            model = MLP(dropout=True).to(device)
        elif model_name == 'ResNet':
            # model = torch.hub.load('pytorch/vision:v0.6.0', 'resnet18', pretrained=True).to(device)
            model = MnistResNet().to(device)
        elif model_name == 'logistic':
            model = LogisticRegression().to(device)

        if include_temperature_scaling:
            model = ModelWithTemperature(model, valid_loader)

        optimizer = torch.optim.Adam(model.parameters(), lr=lr)  # 10−3

        # resnet18 = models.resnet18()

        loss_fn = nn.CrossEntropyLoss().to(device)
        # nll_criterion = nn.CrossEntropyLoss().cuda()

        mean_train_losses = []
        mean_test_losses = []

        valid_acc_list = []
예제 #29
0
                         batch_size=batch_size,
                         shuffle=False)

input_size = train_dataset.get_len_cvec()
print('input_size: ', input_size)

# set hyper parameter
hidden_size = 10000
num_class = 2

learning_rate = 0.0001
num_epoch = 5
# device configuration
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

model = LogisticRegression(input_size, hidden_size, num_class).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# train model
total_step = len(train_loader)
for epoch in range(num_epoch):
    print('epoch start')
    for i, (data, label) in enumerate(train_loader):
        label = label.squeeze().to(device)
        data = data.to(device)
        outputs = model(data)
        loss = criterion(outputs, label)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
예제 #30
0
    def __init__(self, rng, **kwargs):
        self.n_in_src = kwargs.pop('nembed_src')
        self.n_in_trg = kwargs.pop('nembed_trg')
        self.n_hids_src = kwargs.pop('nhids_src')
        self.n_hids_trg = kwargs.pop('nhids_trg')
        self.src_vocab_size = kwargs.pop('src_vocab_size')
        self.trg_vocab_size = kwargs.pop('trg_vocab_size')
        self.method = kwargs.pop('method')
        self.dropout = kwargs.pop('dropout')
        self.maxout_part = kwargs.pop('maxout_part')
        self.path = kwargs.pop('saveto')
        self.clip_c = kwargs.pop('clip_c')
        self.rng = rng
        self.trng = RandomStreams(rng.randint(1e5))

        # added by  Zhaopeng  Tu, 2016-06-09
        self.with_attention = kwargs.pop('with_attention')

        # added by Zhaopeng Tu, 2016-04-29
        self.with_coverage = kwargs.pop('with_coverage')
        self.coverage_dim = kwargs.pop('coverage_dim')
        self.coverage_type = kwargs.pop('coverage_type')
        self.max_fertility = kwargs.pop('max_fertility')
        if self.coverage_type is 'linguistic':
            # make sure the dimension of linguistic coverage is always 1
            self.coverage_dim = 1

        # added by Zhaopeng Tu, 2016-05-30
        self.with_context_gate = kwargs.pop('with_context_gate')

        self.params = []
        self.layers = []

        self.table_src = LookupTable(self.rng,
                                     self.src_vocab_size,
                                     self.n_in_src,
                                     name='table_src')
        self.layers.append(self.table_src)

        self.encoder = BidirectionalEncoder(self.rng,
                                            self.n_in_src,
                                            self.n_hids_src,
                                            self.table_src,
                                            name='birnn_encoder')
        self.layers.append(self.encoder)

        # added by Longyue
        self.encoder_hist_1 = Encoder(self.rng,
                                      self.n_in_src,
                                      self.n_hids_src,
                                      self.table_src,
                                      name='rnn_encoder_hist_1')
        self.layers.append(self.encoder_hist_1)
        self.encoder_hist_2 = Encoder(self.rng,
                                      self.n_hids_src,
                                      self.n_hids_src,
                                      self.table_src,
                                      name='rnn_encoder_hist_2')
        self.layers.append(self.encoder_hist_2)

        self.table_trg = LookupTable(self.rng,
                                     self.trg_vocab_size,
                                     self.n_in_trg,
                                     name='table_trg')
        self.layers.append(self.table_trg)

        self.decoder = Decoder(self.rng, self.n_in_trg, self.n_hids_trg, 2*self.n_hids_src, self.n_hids_src, \
                               # added by Zhaopeng Tu, 2016-06-09

                               with_attention=self.with_attention, \
                               # added by Zhaopeng Tu, 2016-04-29

                               with_coverage=self.with_coverage, coverage_dim=self.coverage_dim, coverage_type=self.coverage_type, max_fertility=self.max_fertility, \
                               # added by Zhaopeng Tu, 2016-05-30

                               with_context_gate=self.with_context_gate, \
                               maxout_part=self.maxout_part, name='rnn_decoder')
        self.layers.append(self.decoder)
        self.logistic_layer = LogisticRegression(self.rng, self.n_in_trg,
                                                 self.trg_vocab_size)
        self.layers.append(self.logistic_layer)

        # added by Zhaopeng Tu, 2016-07-12
        # for reconstruction
        self.with_reconstruction = kwargs.pop('with_reconstruction')
        if self.with_reconstruction:
            # added by Zhaopeng Tu, 2016-07-27
            self.reconstruction_weight = kwargs.pop('reconstruction_weight')
            # note the source and target sides are reversed
            self.inverse_decoder = InverseDecoder(self.rng, self.n_in_src, 2*self.n_hids_src, self.n_hids_trg, \
                                   # added by Zhaopeng Tu, 2016-06-09

                                   with_attention=self.with_attention, \
                                   maxout_part=self.maxout_part, name='rnn_inverse_decoder')
            self.layers.append(self.inverse_decoder)

            self.srng = RandomStreams(rng.randint(1e5))
            self.inverse_logistic_layer = LogisticRegression(
                self.rng,
                self.n_in_src,
                self.src_vocab_size,
                name='inverse_LR')
            self.layers.append(self.inverse_logistic_layer)

        for layer in self.layers:
            self.params.extend(layer.params)
예제 #31
0
class EncoderDecoder(object):
    def __init__(self, rng, **kwargs):
        self.n_in_src = kwargs.pop('nembed_src')
        self.n_in_trg = kwargs.pop('nembed_trg')
        self.n_hids_src = kwargs.pop('nhids_src')
        self.n_hids_trg = kwargs.pop('nhids_trg')
        self.src_vocab_size = kwargs.pop('src_vocab_size')
        self.trg_vocab_size = kwargs.pop('trg_vocab_size')
        self.method = kwargs.pop('method')
        self.dropout = kwargs.pop('dropout')
        self.maxout_part = kwargs.pop('maxout_part')
        self.path = kwargs.pop('saveto')
        self.clip_c = kwargs.pop('clip_c')
        self.rng = rng
        self.trng = RandomStreams(rng.randint(1e5))

        # added by  Zhaopeng  Tu, 2016-06-09
        self.with_attention = kwargs.pop('with_attention')

        # added by Zhaopeng Tu, 2016-04-29
        self.with_coverage = kwargs.pop('with_coverage')
        self.coverage_dim = kwargs.pop('coverage_dim')
        self.coverage_type = kwargs.pop('coverage_type')
        self.max_fertility = kwargs.pop('max_fertility')
        if self.coverage_type is 'linguistic':
            # make sure the dimension of linguistic coverage is always 1
            self.coverage_dim = 1

        # added by Zhaopeng Tu, 2016-05-30
        self.with_context_gate = kwargs.pop('with_context_gate')

        self.params = []
        self.layers = []

        self.table_src = LookupTable(self.rng,
                                     self.src_vocab_size,
                                     self.n_in_src,
                                     name='table_src')
        self.layers.append(self.table_src)

        self.encoder = BidirectionalEncoder(self.rng,
                                            self.n_in_src,
                                            self.n_hids_src,
                                            self.table_src,
                                            name='birnn_encoder')
        self.layers.append(self.encoder)

        # added by Longyue
        self.encoder_hist_1 = Encoder(self.rng,
                                      self.n_in_src,
                                      self.n_hids_src,
                                      self.table_src,
                                      name='rnn_encoder_hist_1')
        self.layers.append(self.encoder_hist_1)
        self.encoder_hist_2 = Encoder(self.rng,
                                      self.n_hids_src,
                                      self.n_hids_src,
                                      self.table_src,
                                      name='rnn_encoder_hist_2')
        self.layers.append(self.encoder_hist_2)

        self.table_trg = LookupTable(self.rng,
                                     self.trg_vocab_size,
                                     self.n_in_trg,
                                     name='table_trg')
        self.layers.append(self.table_trg)

        self.decoder = Decoder(self.rng, self.n_in_trg, self.n_hids_trg, 2*self.n_hids_src, self.n_hids_src, \
                               # added by Zhaopeng Tu, 2016-06-09

                               with_attention=self.with_attention, \
                               # added by Zhaopeng Tu, 2016-04-29

                               with_coverage=self.with_coverage, coverage_dim=self.coverage_dim, coverage_type=self.coverage_type, max_fertility=self.max_fertility, \
                               # added by Zhaopeng Tu, 2016-05-30

                               with_context_gate=self.with_context_gate, \
                               maxout_part=self.maxout_part, name='rnn_decoder')
        self.layers.append(self.decoder)
        self.logistic_layer = LogisticRegression(self.rng, self.n_in_trg,
                                                 self.trg_vocab_size)
        self.layers.append(self.logistic_layer)

        # added by Zhaopeng Tu, 2016-07-12
        # for reconstruction
        self.with_reconstruction = kwargs.pop('with_reconstruction')
        if self.with_reconstruction:
            # added by Zhaopeng Tu, 2016-07-27
            self.reconstruction_weight = kwargs.pop('reconstruction_weight')
            # note the source and target sides are reversed
            self.inverse_decoder = InverseDecoder(self.rng, self.n_in_src, 2*self.n_hids_src, self.n_hids_trg, \
                                   # added by Zhaopeng Tu, 2016-06-09

                                   with_attention=self.with_attention, \
                                   maxout_part=self.maxout_part, name='rnn_inverse_decoder')
            self.layers.append(self.inverse_decoder)

            self.srng = RandomStreams(rng.randint(1e5))
            self.inverse_logistic_layer = LogisticRegression(
                self.rng,
                self.n_in_src,
                self.src_vocab_size,
                name='inverse_LR')
            self.layers.append(self.inverse_logistic_layer)

        for layer in self.layers:
            self.params.extend(layer.params)

    def build_trainer(self, src, src_mask, src_hist, src_hist_mask, trg,
                      trg_mask, ite):

        # added by Longyue
        # checked by Zhaopeng: sentence dim = n_steps, hist_len, batch_size (4, 3, 25)
        # hist = (bath_size, sent_num, sent_len) --.T-->
        # hist = (sent_len, sent_num, bath_size) --lookup table-->
        # (sent_len, sent_num, bath_size, word_emb) --reshape-->
        # (sent_len, sent_num*bath_size, word_emb) --word-level rnn-->
        # (sent_len, sent_num*bath_size, hidden_size) --reshape-->
        # (sent_len, sent_num, bath_size, hidden_size) --[-1]-->
        # (sent_num, bath_size, hidden_size) --sent-level rnn-->
        # (sent_num, bath_size, hidden_size) --[-1]-->
        # (bath_size, hidden_size) = cross-sent context vector

        annotations_1 = self.encoder_hist_1.apply_1(src_hist, src_hist_mask)
        annotations_1 = annotations_1[-1]  # get last hidden states
        annotations_2 = self.encoder_hist_2.apply_2(annotations_1)
        annotations_3 = annotations_2[-1]  # get last hidden states

        #modified by Longyue
        annotations = self.encoder.apply(src, src_mask, annotations_3)
        # init_context = annotations[0, :, -self.n_hids_src:]
        # modification #1
        # mean pooling
        init_context = (annotations *
                        src_mask[:, :, None]).sum(0) / src_mask.sum(0)[:, None]

        #added by Longyue
        init_context = concatenate([init_context, annotations_3],
                                   axis=annotations_3.ndim - 1)

        trg_emb = self.table_trg.apply(trg)
        trg_emb_shifted = T.zeros_like(trg_emb)
        trg_emb_shifted = T.set_subtensor(trg_emb_shifted[1:], trg_emb[:-1])
        # modified by Longyue
        hiddens, readout, alignment = self.decoder.run_pipeline(
            state_below=trg_emb_shifted,
            mask_below=trg_mask,
            init_context=init_context,
            c=annotations,
            c_mask=src_mask,
            hist=annotations_3)

        # apply dropout
        if self.dropout < 1.0:
            logger.info('Apply dropout with p = {}'.format(self.dropout))
            readout = Dropout(self.trng, readout, 1, self.dropout)

        p_y_given_x = self.logistic_layer.get_probs(readout)

        self.cost = self.logistic_layer.cost(p_y_given_x, trg,
                                             trg_mask) / trg.shape[1]

        # self.cost = theano.printing.Print('likilihood cost:')(self.cost)

        # added by Zhaopeng Tu, 2016-07-12
        # for reconstruction
        if self.with_reconstruction:
            # now hiddens is the annotations
            inverse_init_context = (hiddens * trg_mask[:, :, None]
                                    ).sum(0) / trg_mask.sum(0)[:, None]

            src_emb = self.table_src.apply(src)
            src_emb_shifted = T.zeros_like(src_emb)
            src_emb_shifted = T.set_subtensor(src_emb_shifted[1:],
                                              src_emb[:-1])
            inverse_hiddens, inverse_readout, inverse_alignment = self.inverse_decoder.run_pipeline(
                state_below=src_emb_shifted,
                mask_below=src_mask,
                init_context=inverse_init_context,
                c=hiddens,
                c_mask=trg_mask)

            # apply dropout
            if self.dropout < 1.0:
                # logger.info('Apply dropout with p = {}'.format(self.dropout))
                inverse_readout = Dropout(self.srng, inverse_readout, 1,
                                          self.dropout)

            p_x_given_y = self.inverse_logistic_layer.get_probs(
                inverse_readout)

            self.reconstruction_cost = self.inverse_logistic_layer.cost(
                p_x_given_y, src, src_mask) / src.shape[1]

            # self.reconstruction_cost = theano.printing.Print('reconstructed cost:')(self.reconstruction_cost)
            self.cost += self.reconstruction_cost * self.reconstruction_weight

        self.L1 = sum(T.sum(abs(param)) for param in self.params)
        self.L2 = sum(T.sum(param**2) for param in self.params)

        params_regular = self.L1 * 1e-6 + self.L2 * 1e-6
        # params_regular = theano.printing.Print('params_regular:')(params_regular)

        # train cost
        train_cost = self.cost + params_regular

        # gradients
        grads = T.grad(train_cost, self.params)

        # apply gradient clipping here
        grads = grad_clip(grads, self.clip_c)

        # updates
        updates = adadelta(self.params, grads)

        # train function
        # modified by Longyue
        inps = [src, src_mask, src_hist, src_hist_mask, trg, trg_mask]

        self.train_fn = theano.function(inps, [train_cost],
                                        updates=updates,
                                        name='train_function')
        # self.train_fn = theano.function(inps, [train_cost], updates=updates, name='train_function', mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True))

    def build_sampler(self):

        # added by Longyue
        x_hist = T.ltensor3()
        x_hist_mask = T.tensor3()
        annotations_1 = self.encoder_hist_1.apply_1(x_hist, x_hist_mask)
        annotations_1 = annotations_1[-1]
        annotations_2 = self.encoder_hist_2.apply_2(annotations_1)
        annotations_3 = annotations_2[-1]

        x = T.lmatrix()

        # Build Networks
        # src_mask is None
        c = self.encoder.apply(x, None, annotations_3)
        #init_context = ctx[0, :, -self.n_hids_src:]
        # mean pooling
        init_context = c.mean(0)

        # added by Longyue
        init_context = concatenate([init_context, annotations_3],
                                   axis=annotations_3.ndim - 1)

        init_state = self.decoder.create_init_state(init_context)

        outs = [init_state, c, annotations_3]
        if not self.with_attention:
            outs.append(init_context)

        # compile function
        print 'Building compile_init_state_and_context function ...'
        self.compile_init_and_context = theano.function(
            [x, x_hist, x_hist_mask], outs, name='compile_init_and_context')
        print 'Done'

        y = T.lvector()
        cur_state = T.matrix()
        # if it is the first word, emb should be all zero, and it is indicated by -1
        trg_emb = T.switch(y[:, None] < 0, T.alloc(0., 1, self.n_in_trg),
                           self.table_trg.apply(y))

        # added by Zhaopeng Tu, 2016-06-09
        # for with_attention=False
        if self.with_attention and self.with_coverage:
            cov_before = T.tensor3()
            if self.coverage_type is 'linguistic':
                print 'Building compile_fertility ...'
                fertility = self.decoder._get_fertility(c)
                fertility = T.addbroadcast(fertility, 1)
                self.compile_fertility = theano.function(
                    [c], [fertility], name='compile_fertility')
                print 'Done'
            else:
                fertility = None
        else:
            cov_before = None
            fertility = None

        # apply one step
        # modified by Zhaopeng Tu, 2016-04-29
        # [next_state, ctxs] = self.decoder.apply(state_below=trg_emb,
        results = self.decoder.apply(
            state_below=trg_emb,
            init_state=cur_state,
            # added by Zhaopeng Tu, 2016-06-09
            init_context=None if self.with_attention else init_context,
            c=c if self.with_attention else None,
            hist=annotations_3,  # added by Longyue
            one_step=True,
            # added by Zhaopeng Tu, 2016-04-27
            cov_before=cov_before,
            fertility=fertility)
        next_state = results[0]
        if self.with_attention:
            ctxs, alignment = results[1], results[2]
            if self.with_coverage:
                cov = results[3]
        else:
            # if with_attention=False, we always use init_context as the source representation
            ctxs = init_context

        readout = self.decoder.readout(next_state, ctxs, trg_emb)

        # maxout
        if self.maxout_part > 1:
            readout = self.decoder.one_step_maxout(readout)

        # apply dropout
        if self.dropout < 1.0:
            readout = Dropout(self.trng, readout, 0, self.dropout)

        # compute the softmax probability
        next_probs = self.logistic_layer.get_probs(readout)

        # sample from softmax distribution to get the sample
        next_sample = self.trng.multinomial(pvals=next_probs).argmax(1)

        # compile function
        print 'Building compile_next_state_and_probs function ...'
        inps = [y, cur_state]
        if self.with_attention:
            inps.append(c)
        else:
            inps.append(init_context)

        # added by Longyue
        inps.append(annotations_3)

        outs = [next_probs, next_state, next_sample]
        # added by Zhaopeng Tu, 2016-06-09
        if self.with_attention:
            outs.append(alignment)
            # added by Zhaopeng Tu, 2016-04-29
            if self.with_coverage:
                inps.append(cov_before)
                if self.coverage_type is 'linguistic':
                    inps.append(fertility)
                outs.append(cov)

        self.compile_next_state_and_probs = theano.function(
            inps, outs, name='compile_next_state_and_probs')
        print 'Done'

        # added by Zhaopeng Tu, 2016-07-18
        # for reconstruction
        if self.with_reconstruction:
            # Build Networks
            # trg_mask is None
            inverse_c = T.tensor3()
            # mean pooling
            inverse_init_context = inverse_c.mean(0)

            inverse_init_state = self.inverse_decoder.create_init_state(
                inverse_init_context)

            outs = [inverse_init_state]
            if not self.with_attention:
                outs.append(inverse_init_context)

            # compile function
            print 'Building compile_inverse_init_state_and_context function ...'
            self.compile_inverse_init_and_context = theano.function(
                [inverse_c], outs, name='compile_inverse_init_and_context')
            print 'Done'

            src = T.lvector()
            inverse_cur_state = T.matrix()
            trg_mask = T.matrix()
            # if it is the first word, emb should be all zero, and it is indicated by -1
            src_emb = T.switch(src[:, None] < 0, T.alloc(0., 1, self.n_in_src),
                               self.table_src.apply(src))

            # apply one step
            # modified by Zhaopeng Tu, 2016-04-29
            inverse_results = self.inverse_decoder.apply(
                state_below=src_emb,
                init_state=inverse_cur_state,
                # added by Zhaopeng Tu, 2016-06-09
                init_context=None
                if self.with_attention else inverse_init_context,
                c=inverse_c if self.with_attention else None,
                c_mask=trg_mask,
                one_step=True)
            inverse_next_state = inverse_results[0]
            if self.with_attention:
                inverse_ctxs, inverse_alignment = inverse_results[
                    1], inverse_results[2]
            else:
                # if with_attention=False, we always use init_context as the source representation
                inverse_ctxs = init_context

            inverse_readout = self.inverse_decoder.readout(
                inverse_next_state, inverse_ctxs, src_emb)

            # maxout
            if self.maxout_part > 1:
                inverse_readout = self.inverse_decoder.one_step_maxout(
                    inverse_readout)

            # apply dropout
            if self.dropout < 1.0:
                inverse_readout = Dropout(self.srng, inverse_readout, 0,
                                          self.dropout)

            # compute the softmax probability
            inverse_next_probs = self.inverse_logistic_layer.get_probs(
                inverse_readout)

            # sample from softmax distribution to get the sample
            inverse_next_sample = self.srng.multinomial(
                pvals=inverse_next_probs).argmax(1)

            # compile function
            print 'Building compile_inverse_next_state_and_probs function ...'
            inps = [src, trg_mask, inverse_cur_state]
            if self.with_attention:
                inps.append(inverse_c)
            else:
                inps.append(inverse_init_context)
            outs = [
                inverse_next_probs, inverse_next_state, inverse_next_sample
            ]
            # added by Zhaopeng Tu, 2016-06-09
            if self.with_attention:
                outs.append(inverse_alignment)

            self.compile_inverse_next_state_and_probs = theano.function(
                inps, outs, name='compile_inverse_next_state_and_probs')
            print 'Done'

    def save(self, path=None):
        if path is None:
            path = self.path
        filenpz = open(path, "w")
        val = dict([(value.name, value.get_value())
                    for index, value in enumerate(self.params)])
        logger.info("save the model {}".format(path))
        numpy.savez(path, **val)
        filenpz.close()

    def load(self, path=None):
        if path is None:
            path = self.path
        if os.path.isfile(path):
            logger.info("load params {}".format(path))
            val = numpy.load(path)
            for index, param in enumerate(self.params):
                logger.info('Loading {} with shape {}'.format(
                    param.name,
                    param.get_value(borrow=True).shape))
                if param.name not in val.keys():
                    logger.info('Adding new param {} with shape {}'.format(
                        param.name,
                        param.get_value(borrow=True).shape))
                    continue
                if param.get_value().shape != val[param.name].shape:
                    logger.info("Error: model param != load param shape {} != {}".format(\
                                        param.get_value().shape, val[param.name].shape))
                    raise Exception("loading params shape mismatch")
                else:
                    param.set_value(val[param.name], borrow=True)
        else:
            logger.error("file {} does not exist".format(path))
            self.save()