コード例 #1
0
def main(train_path, eval_path, pred_path):
    """Problem 1(e): Gaussian discriminant analysis (GDA)

    Args:
        train_path: Path to CSV file containing dataset for training.
        eval_path: Path to CSV file containing dataset for evaluation.
        pred_path: Path to save predictions.
    """
    # Load dataset
    x_train, y_train = util.load_dataset(train_path, add_intercept=False)

    # *** START CODE HERE ***

    # Train GDA
    model = GDA()
    model.fit(x_train, y_train)

    # Plot data and decision boundary
    util.plot(x_train, y_train, model.theta,
              'output/p01e_{}.png'.format(pred_path[-5]))

    # Save predictions
    x_eval, y_eval = util.load_dataset(eval_path, add_intercept=True)
    y_pred = model.predict(x_eval)
    np.savetxt(pred_path, y_pred > 0.5, fmt='%d')
コード例 #2
0
def main(train_path, valid_path, save_path):
    """Problem: Gaussian discriminant analysis (GDA)

    Args:
        train_path: Path to CSV file containing dataset for training.
        valid_path: Path to CSV file containing dataset for validation.
        save_path: Path to save predicted probabilities using np.savetxt().
    """
    # Load dataset
    x_train, y_train = util.load_dataset(train_path, add_intercept=False)

    # *** START CODE HERE ***
    # Train a GDA classifier
    model = GDA()
    model.fit(x_train, y_train)
    # Plot decision boundary on validation set
    x_val, y_val = util.load_dataset(valid_path, add_intercept=False)
    image_path = save_path[:-3] + "png"
    theta = np.concatenate(model.theta)
    assert theta.shape == (x_val.shape[1]+1, 1)
    util.plot(
        x=x_val, y=y_val,
        theta=theta,
        save_path=image_path
    )
    # Use np.savetxt to save outputs from validation set to save_path
    prob_val = model.predict(x_val)
    np.savetxt(save_path, prob_val)
コード例 #3
0
    def fit(self, x, y):
        """Run Newton's Method to minimize J(theta) for logistic regression.

        Args:
            x: Training example inputs. Shape (m, n).
            y: Training example labels. Shape (m,).

        Returns:
            theta: Logistic regression model parameters, including intercept.
        """
        # *** START CODE HERE ***
        m, n = x.shape
        theta = self.theta
        if theta is None:
            theta = np.zeros(n)

        while True:
            loss = self.loss(x, y, theta, m, n)
            theta_new = self.update(x, y, theta, m, n)
            if self.verbose:
                print("Loss: ", loss, " 1-norm: ",
                      np.linalg.norm(theta_new - theta, ord=1))
            if np.linalg.norm(theta_new - theta, ord=1) < self.eps:
                self.theta = theta_new
                break
            theta = theta_new

        self.theta = theta
        util.plot(x, y, self.theta, 'output/p01b')
コード例 #4
0
def logistic_regression(X, Y):
    """Train a logistic regression model"""
    m, n = X.shape
    theta = np.zeros(n)
    learning_rate = 1

    i = 0
    while True:
        i += 1
        prev_theta = theta
        grad = calc_grad(X, Y, theta)
        theta = theta - learning_rate * grad
        if i % 10000 == 0:
            DEBUG = False
            if DEBUG:
                from util import plot
                from matplotlib import pyplot as plt
                plot(X, (Y == 1), theta, 'output/{}.png'.format(i))
            print('Finished %d iterations' % i)
            print(np.linalg.norm(prev_theta - theta))
            print(theta)
        if np.linalg.norm(prev_theta - theta) < 1e-15:
            print('Converged in %d iterations' % i)
            break
    return
コード例 #5
0
ファイル: logreg.py プロジェクト: pmh9960/cs229
def main(train_path, valid_path, save_path, plot_path):
    """Problem: Logistic regression with Newton's Method.

    Args:
        train_path: Path to CSV file containing dataset for training.
        valid_path: Path to CSV file containing dataset for validation.
        save_path: Path to save predicted probabilities using np.savetxt().
    """
    x_train, y_train = util.load_dataset(train_path, add_intercept=True)

    # *** START CODE HERE ***
    # Train a logistic regression classifier
    # Plot decision boundary on top of validation set set
    # Use np.savetxt to save predictions on eval set to save_path
    clf = LogisticRegression()
    clf.fit(x_train, y_train)

    x_valid, y_valid = util.load_dataset(valid_path, add_intercept=True)
    pred = clf.predict(x_valid)
    np.savetxt(save_path, pred)
    print(f"ACC_valid : {np.sum((pred > 0.5) == y_valid) / len(y_valid)}")
    util.plot(
        x_train,
        y_train,
        clf.theta,
        plot_path.replace(".png", "_train.png"),
        correction=1.0,
    )
    util.plot(
        x_valid,
        y_valid,
        clf.theta,
        plot_path.replace(".png", "_valid.png"),
        correction=1.0,
    )
コード例 #6
0
def main(train_path, eval_path, pred_path):
    """Problem 1(e): Gaussian discriminant analysis (GDA)

    Args:
        train_path: Path to CSV file containing dataset for training.
        eval_path: Path to CSV file containing dataset for evaluation.
        pred_path: Path to save predictions.
    """
    # Load dataset
    x_train, y_train = util.load_dataset(train_path, add_intercept=False)

    # get the model
    model = GDA()
    model.fit(x_train, y_train)

    # predict using the trained model
    x_eval, y_eval = util.load_dataset(eval_path, add_intercept=False)
    y_pred = model.predict(x_eval)

    # Plot decision boundary on top of validation set set
    theta = list(model.theta)
    theta_0 = [model.theta_0]
    util.plot(x_eval, y_eval, theta_0 + theta,
              'output/{ds}_GDA.pdf'.format(ds=eval_path.split('/')[-1]))

    # Use np.savetxt to save predictions on eval set to pred_path
    np.savetxt(pred_path, y_pred)
コード例 #7
0
def main(lr, train_path, eval_path, save_path):
    """Problem: Poisson regression with gradient ascent.

    Args:
        lr: Learning rate for gradient ascent.
        train_path: Path to CSV file containing dataset for training.
        eval_path: Path to CSV file containing dataset for evaluation.
        save_path: Path to save predictions.
    """
    # Load training set
    x_train, y_train = util.load_dataset(train_path, add_intercept=True)

    # *** START CODE HERE ***
    # Fit a Poisson Regression model
    # Run on the validation set, and use np.savetxt to save outputs to save_path

    model = PoissonRegression(lr)
    model.fit(x_train, y_train)

    x_val, y_val = util.load_dataset(eval_path, add_intercept=True)
    util.plot(x_val,
              y_val,
              model.theta,
              save_path=save_path.replace(".txt", ".png"))

    np.savetxt(save_path, model.predict(x_val))
コード例 #8
0
def build_c0():
    # data
    tok = Tokenizer()
    tok.fit_on_texts(pnames)
    pnames_vec = tok.texts_to_sequences(pnames)
    maxlen_pd = max(map(len,pnames_vec))#找出最長句子的長度
    pnames_train_data = pad_sequences(pnames_vec,maxlen_pd)  #train_data 是產生完畢的訓練資料
    print(pnames_train_data[:10])
    pnames_vocab_size = len(tok.word_index) + 1
    # model
    pnames_in = Input(shape =(maxlen_pd,),dtype='int32')
    ecoded = Embedding(pnames_vocab_size,50)(pnames_in)
    ecoded = LSTM(100)(ecoded)
    out = Dense(c0_labels_len,activation='softmax')(ecoded)
    model = Model([pnames_in],out)
    model.compile(optimizer='rmsprop',
                    loss='categorical_crossentropy',
                    metrics=['acc'])

    history = model.fit(pnames_train_data
                    ,c0_labels
                    ,validation_split=0.05
                    ,batch_size=32
                    ,epochs=5 #20
                    ,verbose=2)

    model.save_weights(directory+"c0.h5")

    # --------------
    #  模型成效輸出
    # --------------
    u.plot(history.history,('acc','val_acc'),
                ' training&validation acc',('Epoch','Acc'))
コード例 #9
0
ファイル: gda.py プロジェクト: Aitous/CS229
def main(train_path, valid_path, save_path):
    """Problem: Gaussian discriminant analysis (GDA)
    
    Args:
        train_path: Path to CSV file containing dataset for training.
        valid_path: Path to CSV file containing dataset for validation.
        save_path: Path to save predicted probabilities using np.savetxt().
    """
    # Load dataset
    x_train, y_train = util.load_dataset(train_path, add_intercept=False)

    # *** START CODE HERE ***
    # Train a GDA classifier
    # Plot decision boundary on validation set
    # Use np.savetxt to save outputs from validation set to save_path
    x_val, y_val = util.load_dataset(valid_path, add_intercept=False)
        
    ###decomment to normalize the training and validation sets to improve the GDA performance:
#    x_train = (x_train - np.mean(x_train, axis=0))/np.std(x_train, axis=0)
#    x_val = (x_val - np.mean(x_val, axis=0))/np.std(x_val, axis=0)
    
#    x_train = (x_train - np.min(x_train, axis=0))/(np.max(x_train, axis=0) - np.min(x_train, axis=0))
#    x_val = (x_val - np.min(x_val, axis=0))/(np.max(x_val, axis=0) - np.min(x_val, axis=0))
    
    
    clf = GDA()
    clf.fit(x_train, y_train)
    y_predict = clf.predict(x_val)
    
    np.savetxt(save_path, y_predict)
    util.plot(x_val, (y_predict >= 0.5), clf.theta, save_path[:-4]+ "validation_expected")
    #plotting the real distribution
    util.plot(x_val, y_val, clf.theta, save_path[:-4] + "validation_real")
コード例 #10
0
ファイル: logreg.py プロジェクト: hardianlawi/stanford-cs229
def main(train_path, valid_path, save_path):
    """Problem: Logistic regression with Newton's Method.

    Args:
        train_path: Path to CSV file containing dataset for training.
        valid_path: Path to CSV file containing dataset for validation.
        save_path: Path to save predicted probabilities using np.savetxt().
    """
    x_train, y_train = util.load_dataset(train_path, add_intercept=True)

    # *** START CODE HERE ***
    # Train a logistic regression classifier
    # Plot decision boundary on top of validation set
    # Use np.savetxt to save predictions on eval set to save_path

    model = LogisticRegression()
    model.fit(x_train, y_train)

    x_val, y_val = util.load_dataset(valid_path, add_intercept=True)
    util.plot(x_val,
              y_val,
              model.theta,
              save_path=save_path.replace(".txt", "jpg"))

    yhat = model.predict(x_val)
    np.savetxt(save_path, yhat)

    print(f"LogReg acc: {util.compute_accuracy(y_val, yhat)}")
    print(f"LogReg log loss: {util.compute_log_loss(y_val, yhat)}")
コード例 #11
0
ファイル: p01b_logreg.py プロジェクト: laksh9950/cs229-2
def main(train_path, eval_path, pred_path):
    """Problem 1(b): Logistic regression with Newton's Method.

    Args:
        train_path: Path to CSV file containing dataset for training.
        eval_path: Path to CSV file containing dataset for evaluation.
        pred_path: Path to save predictions.
    """
    x_train, y_train = util.load_dataset(train_path, add_intercept=True)

    # initial guess of parameters
    theta_0 = np.zeros(shape=(3, ))

    # get the model
    model = LogisticRegression(theta_0=theta_0)
    model.fit(x_train, y_train)

    # predict using the trained model
    x_eval, y_eval = util.load_dataset(eval_path, add_intercept=True)
    y_pred = model.predict(x_eval)

    # Plot decision boundary on top of validation set set
    util.plot(x_eval, y_eval, model.theta, 'output/{ds}_log_reg.pdf'.format(ds=eval_path.split('/')[-1]))

    # Use np.savetxt to save predictions on eval set to pred_path
    np.savetxt(pred_path, y_pred)
コード例 #12
0
    def train_exploiting_greedy(self,
                                episodes=1000,
                                max_steps=1000,
                                plot_rewards=True):
        scores = []
        e = 0
        for _ in range(episodes):
            trace = []
            greedy_reversal_sort(self.env.observation_space.sample(), trace)
            for __ in range(3):
                for permutation in trace[::-1]:
                    score = self.run_episode(max_steps, forced=permutation)
                    scores.append(score)
                    print("Episode:", e, "  score:", score, "  epsilon:",
                          self.epsilon)
                    e += 1
                print()
            print()

        self.model.save_weights(FINAL_WEIGHTS_PATH)

        scores = np.array(scores)
        if plot_rewards:
            plot(scores)
            plot_running_avg(scores)
コード例 #13
0
def extract_stats(filepaths, image_size, square_function):
    print "Calculating mean, std and var of all images"

    #Running total (sum) of all images
    count_so_far = 0
    mean = np.zeros((image_size, image_size))
    M2 = np.zeros((image_size, image_size))

    n = len(filepaths)

    for i, filepath in enumerate(filepaths):

        image = misc.imread(filepath, flatten=1)

        image = process(image, square_function, image_size)
        # Online statistics
        count_so_far = count_so_far + 1
        delta = image - mean
        mean = mean + delta / count_so_far
        M2 = M2 + delta * (image - mean)

        if i % 50 == 0:
            util.update_progress(i / n)

    util.update_progress(1.0)

    mean_image = mean
    variance_image = M2 / (n - 1)
    std_image = np.sqrt(variance_image)

    print "Plotting mean image (only shows afterwards)"
    util.plot(mean_image, invert=True)

    return mean_image, variance_image, std_image
コード例 #14
0
def main():
    args = setup_argparser().parse_args()

    filepath = args.file
    num_clusters = args.num_clusters

    data, truth_clusters = import_file(filepath, correct_clusters=True)
    points = [Point(x) for x in data]

    aggclustering = AgglomerativeClustering(num_clusters=num_clusters)
    labels = aggclustering.fit(points)
    logging.info("Labels: {}".format(labels))
    logging.info("Rand score: {}".format(rand_score(truth_clusters, labels)))
    logging.info("Jaccard coefficient: {}".format(
        jaccard_coeff(truth_clusters, labels)))

    # We apply PCA dim reduction to both data, and centroids to be able to plot them
    plot(reduce_dimensionality(data),
         truth_clusters,
         None,
         suffix="hierarchical_truth")
    plot(reduce_dimensionality(data),
         labels,
         None,
         suffix="hierarchical_computed")
    return
コード例 #15
0
ファイル: main.py プロジェクト: MostafaDehghani/ToMGAN
def run_training(model, sess_context_manager, summary_writer):
    """Repeatedly runs training iterations, logging loss to screen and writing summaries"""
    tf.logging.info("starting run_training")
    with sess_context_manager as sess:
        tf.train.start_queue_runners(sess=sess)
        if FLAGS.debug:  # start the tensorflow debugger
            sess = tf_debug.LocalCLIDebugWrapperSession(sess)
            sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan)
        num_batch = 0
        while True:  # repeats until interrupted
            if num_batch % FLAGS.logging_step == 0:
                tf.logging.info('------ number of  batches: ' +
                                str(num_batch) + ' ------')
                t0 = time.time()
                model.run_train_step(sess, summary_writer, logging=True)
                t1 = time.time()
                tf.logging.info('seconds for training step: %.3f', t1 - t0)
                tf.logging.info("sampling from the generator")
                sampling_result = model.sample_generator(sess)
                if FLAGS.dataset_id == "mnist":
                    util.plot(sampling_result['g_sample'], num_batch, 1)
                elif FLAGS.dataset_id == "cifar":
                    util.plot(sampling_result['g_sample'], num_batch, 3)
                print(model.run_eval_step(sess))
            else:  # no logging
                model.run_train_step(sess, summary_writer, logging=False)

            num_batch += 1
コード例 #16
0
def main():
    args = setup_argparser().parse_args()
    filepath = args.file
    min_pts = args.min_points
    eps = args.eps

    logging.info(args)

    data, truth_clusters = import_file(filepath, correct_clusters=False)

    db = DBSCAN(eps=eps, min_points=min_pts)
    db.dbscan(data)
    logging.info("Rand Index: {}".format(rand_score(truth_clusters,
                                                    db.labels)))
    logging.info("Jaccard Coefficient: {}".format(
        jaccard_coeff(truth_clusters, db.labels)))
    # There's barely any difference b/w what we classify and what Sklearns does - this looks correct
    # We apply PCA dim reduction to both data, and centroids to be able to plot them
    plot(reduce_dimensionality(data),
         truth_clusters,
         None,
         suffix="dbscan_truth")
    plot(reduce_dimensionality(data),
         db.labels,
         None,
         suffix="dbscan_computed")
    return
コード例 #17
0
    def fit(self, x, y):
        """Run Newton's Method to minimize J(theta) for logistic regression.

        Args:
            x: Training example inputs. Shape (m, n).
            y: Training example labels. Shape (m,).
        """
        # *** START CODE HERE ***
        # *** END CODE HERE ***
        self.theta = np.zeros(x[[0], :].size)
        count = 0
        alpha = self.step_size
        N = self.max_iter
        epsilon = self.eps
        theta_i = self.theta
        # grad = gradient(zero_v)
        hess = self.hessian(theta_i, x, y)
        # grad_iter = grad - np.linalg.inv(hessian(zero_v)).dot()
        temp = theta_i
        theta_i = temp - alpha * (np.linalg.inv(hessian(temp, x, y)).dot(
            self.gradient(temp, x, y)))
        count += 1
        while count < N and norm(temp, theta_i) >= epsilon:
            temp = theta_i
            theta_i = temp - np.linalg.inv(hessian(temp, x, y)).dot(
                gradient(temp, x, y))
            count += 1

        util.plot(x, y, theta_i, correction=1.0)
        self.theta = theta_i
コード例 #18
0
    def train(self, episodes=1000, max_steps=1000, plot_rewards=True):
        # Initialize target network weights
        scores, steps = np.empty(episodes), np.empty(episodes)
        start = time.time()
        break_flag = 0
        for e in range(episodes):
            score, step, loss = self.run_episode(max_steps)
            scores[e], steps[e] = score, step
            print("Episode:", e, "  steps:", step, "  score:", score,
                  "  loss:", loss, "  time:",
                  time.time() - start)
            #break_flag = break_flag+1 if step == max_steps else 0
            #if break_flag > 60: break
        saver = tf.train.Saver()
        saver.save(self.session, self.train_path)

        if plot_rewards:
            t_time = time.time() - start
            print("Mean step:", np.mean(steps), " Total steps:", np.sum(steps),
                  " total time:", t_time)
            np.save(
                "./train_data/ddpg_enc_actions" + str(self.state_size) + '_' +
                str(self.n_neighbors) + "_scores", scores)
            np.save(
                "./train_data/ddpg_enc_actions" + str(self.state_size) + '_' +
                str(self.n_neighbors) + "_time", t_time)
            np.save(
                "./train_data/ddpg_enc_actions" + str(self.state_size) + '_' +
                str(self.n_neighbors) + "_steps", steps)
            plot(steps)
            plot_running_avg(steps)
コード例 #19
0
    def plot(self, images):
        perrow = 5

        num, c, w, h = images.size()

        rows = int(math.ceil(num / perrow))

        means, sigmas, values, _ = self.hyper(images)

        images = images.data

        plt.figure(figsize=(perrow * 3, rows * 3))

        for i in range(num):

            ax = plt.subplot(rows, perrow, i + 1)

            im = np.transpose(images[i, :, :, :].cpu().numpy(), (1, 2, 0))
            im = np.squeeze(im)

            ax.imshow(im,
                      interpolation='nearest',
                      extent=(-0.5, w - 0.5, -0.5, h - 0.5),
                      cmap='gray_r')

            util.plot(means[i, :, 1:].unsqueeze(0),
                      sigmas[i, :, 1:].unsqueeze(0),
                      values[i, :].unsqueeze(0),
                      axes=ax,
                      flip_y=h,
                      alpha_global=0.2)

        plt.gcf()
コード例 #20
0
def committe(solver, solver_name, intervals, reps):
    np.random.seed()
    X, y = util.basic_data()
    polls = util.add_noise(y)
    curr_labels = np.random.choice(range(len(X)), size=4, replace=False)
    X_train = X[curr_labels]
    square_errors = np.zeros([2, len(intervals)])
    for i in range(len(intervals)):
        print("interval: ", intervals[i])
        for j in range(reps):
            while len(curr_labels) <= intervals[i]:
                next_points = next_countys(solver, curr_labels, X, polls)
                curr_labels = np.append(curr_labels, next_points)
            curr_labels = curr_labels[:intervals[i]]
            preds = solver(X, X[curr_labels], polls[curr_labels])
            square_errors[:, i] += util.square_error(y, preds)
        square_errors[:, i] /= reps
    square_errors = np.vstack(
        (square_errors.mean(axis=0), util.performance(solver, intervals,
                                                      reps).mean(axis=0)))
    util.plot("committe",
              intervals / len(X),
              square_errors,
              legend=[solver_name, "random"],
              x_label="% counties",
              y_label="MSE",
              title="Committe")
コード例 #21
0
ファイル: gda.py プロジェクト: hardianlawi/stanford-cs229
def main(train_path, valid_path, save_path):
    """Problem: Gaussian discriminant analysis (GDA)

    Args:
        train_path: Path to CSV file containing dataset for training.
        valid_path: Path to CSV file containing dataset for validation.
        save_path: Path to save predicted probabilities using np.savetxt().
    """
    # Load dataset
    x_train, y_train = util.load_dataset(train_path, add_intercept=False)

    # *** START CODE HERE ***
    # Train a GDA classifier
    # Plot decision boundary on validation set
    # Use np.savetxt to save outputs from validation set to save_path

    model = GDA()
    model.fit(x_train, y_train)

    x_val, y_val = util.load_dataset(valid_path, add_intercept=False)
    util.plot(x_val,
              y_val,
              model.theta,
              save_path=save_path.replace(".txt", "jpg"))

    yhat = model.predict(x_val)
    np.savetxt(save_path, yhat)

    print(f"GDA acc: {util.compute_accuracy(y_val, yhat)}")
    print(f"GDA log loss: {util.compute_log_loss(y_val, yhat)}")
コード例 #22
0
    def train(self, episodes=1000, max_steps=800, plot_rewards=True):
        # Initialize target network weights
        self.actor.update_target_model(copy=True)
        self.critic.update_target_model(copy=True)
        scores, steps = np.empty(episodes), np.empty(episodes)
        start = time.time()
        for e in range(episodes):
            score, step = self.run_episode(max_steps)
            scores[e], steps[e] = score, step
            print("Episode:", e, "  steps:", step, "  score:", score,
                  "  time:",
                  time.time() - start)

        ensure_saved_models_dir()

        if plot_rewards:
            t_time = time.time() - start
            print("Mean score:", np.mean(scores), " Total steps:",
                  np.sum(steps), " total time:", t_time)
            plot(scores)
            plot_running_avg(scores)
            np.save(
                "./train_data/ddpg_enc_actions" + str(self.state_size) +
                str(self.n_neighbors) + "_scores", scores)
            np.save(
                "./train_data/ddpg_enc_actions" + str(self.state_size) +
                str(self.n_neighbors) + "_time", t_time)
            np.save(
                "./train_data/ddpg_enc_actions" + str(self.state_size) +
                str(self.n_neighbors) + "_steps", steps)
コード例 #23
0
def main(train_path, eval_path, pred_path):
    """Problem 1(b): Logistic regression with Newton's Method.

    Args:
        train_path: Path to CSV file containing dataset for training.
        eval_path: Path to CSV file containing dataset for evaluation.
        pred_path: Path to save predictions.
    """
    x_train, y_train = util.load_dataset(train_path, add_intercept=True)
    x_eval, y_eval = util.load_dataset(eval_path, add_intercept=True)

    # *** START CODE HERE ***
    # Train a logistic regression classifier
    # Plot decision boundary on top of validation set set
    # Use np.savetxt to save predictions on eval set to pred_path

    initial_theta = np.zeros(x_train.shape[1])
    log_reg = LogisticRegression(step_size=0.2,
                                 max_iter=100,
                                 eps=1e-5,
                                 theta_0=initial_theta,
                                 verbose=True)
    log_reg.fit(x_train, y_train)

    prediction = log_reg.predict(x_eval)

    plot_path = pred_path + ".plot.png"
    util.plot(x_eval, y_eval, log_reg.theta, plot_path, correction=1.0)

    np.savetxt(pred_path, prediction)
コード例 #24
0
ファイル: gda.py プロジェクト: pablo-solis/basic_ml
def main(train_path, valid_path, save_path):
    """Problem 1(e): Gaussian discriminant analysis (GDA)

    Args:
        train_path: Path to CSV file containing dataset for training.
        valid_path: Path to CSV file containing dataset for validation.
        save_path: Path to save predictions using np.savetxt().
    """
    # Load dataset
    x_train, y_train = util.load_dataset(train_path, add_intercept=False)

    # *** START CODE HERE ***
    clf = GDA()
    clf.fit(x_train, y_train)
    #check values...
    #Plot decision boundary on validation set
    x_valid, y_valid = util.load_dataset(valid_path, add_intercept=True)
    util.plot(x_valid, y_valid, clf.theta, save_path[:-4], correction=1)

    # Use np.savetxt to save predictions on eval set to save_path
    #need to add 1 intercept to x_train
    x_train, y_train = util.load_dataset(train_path, add_intercept=True)
    np.savetxt(save_path, clf.predict(x_train))
    # Use np.savetxt to save outputs from validation set to save_path
    np.savetxt(save_path, clf.predict(x_valid))
コード例 #25
0
def main(train_path, eval_path, pred_path, k = 0):
    """Problem 1(b): Logistic regression with Newton's Method.

    Args:
        train_path: Path to CSV file containing dataset for training.
        eval_path: Path to CSV file containing dataset for evaluation.
        pred_path: Path to save predictions.
    """
    
    x_train, y_train = util.load_dataset(train_path, add_intercept=True)
    x_eval, y_eval = util.load_dataset(eval_path, add_intercept = True)
    # *** START CODE HERE ***
    # Train a logistic regression classifier
    # Plot decision boundary on top of validation set set
    # Use np.savetxt to save predictions on eval set to pred_path
    clf = LogisticRegression()
    theta = clf.fit(x_train,y_train)
    p = clf.predict(x_eval)
    if(k==0):
        np.savetxt(pred_path,p,delimiter = ',')
        sp = 'output/p01b_plot'
        util.plot(x_eval,y_eval,theta,sp)
    elif(k==1):
        ind = p < 0.5
        p[ind] = 0
        index = p >= 0.5
        p[index] = 1
        return theta,p
コード例 #26
0
    def train(self, episodes=1000, max_steps=1000, plot_rewards=True):
        scores, steps = np.empty(episodes), np.empty(episodes)
        start = time.time()
        for e in range(episodes):
            score, step = self.run_episode(max_steps)
            scores[e], steps[e] = score, step
            print("Episode:", e, "  steps:", step, "  score:", score,
                  "  epsilon:", self.epsilon, "  time:",
                  time.time() - start)
            '''if e%100 == 0:
				ensure_saved_models_dir()
				self.model.save_weights(FINAL_WEIGHTS_PATH)
				print("Weights Saved")'''
        ensure_saved_models_dir()
        self.model.save_weights(FINAL_WEIGHTS_PATH)

        if plot_rewards:
            t_time = time.time() - start
            print("Mean score:", np.mean(scores), " Total steps:",
                  np.sum(steps), " total time:", t_time)
            plot(scores)
            plot_running_avg(scores)
            np.save("./train_data/ddqn_" + str(self.state_size) + "_scores",
                    scores)
            np.save("./train_data/ddqn_" + str(self.state_size) + "_time",
                    t_time)
            np.save("./train_data/ddqn_" + str(self.state_size) + "_steps",
                    steps)
コード例 #27
0
def main(train_path, valid_path, save_path):
    """Problem: Gaussian discriminant analysis (GDA)

    Args:
        train_path: Path to CSV file containing dataset for training.
        valid_path: Path to CSV file containing dataset for validation.
        save_path: Path to save predicted probabilities using np.savetxt().
    """
    # Load dataset
    x_train, y_train = util.load_dataset(train_path, add_intercept=False)

    # *** START CODE HERE ***
    # Train a GDA classifier
    model = GDA()

    # Fit model to the training data. Define theta
    model.fit(x_train, y_train)

    # Read validation set
    x_val, y_val = util.load_dataset(valid_path, add_intercept=True)

    # Save predictions to save path
    np.savetxt(save_path, model.predict(x_val))

    # Plot boundaries
    util.plot(x_val, y_val, model.theta, save_path[:-4])
コード例 #28
0
def main(train_path, eval_path, pred_path):
    """Problem 1(e): Gaussian discriminant analysis (GDA)

    Args:
        train_path: Path to CSV file containing dataset for training.
        eval_path: Path to CSV file containing dataset for evaluation.
        pred_path: Path to save predictions.
    """
    # Load dataset
    x_train, y_train = util.load_dataset(train_path, add_intercept=False)
    x_eval, y_eval = util.load_dataset(eval_path, add_intercept=True)

    # *** START CODE HERE ***
    # Train a GDA classifier
    # Plot decision boundary on validation set
    # Use np.savetxt to save outputs from validation set to pred_path
    gda = GDA(verbose=True)
    gda.fit(x_train, y_train)

    prediction = gda.predict(x_eval)

    plot_path = pred_path + ".plot.png"
    util.plot(x_eval, y_eval, gda.theta, plot_path, correction=1.0)

    np.savetxt(pred_path, prediction)
コード例 #29
0
ファイル: train.py プロジェクト: franli/glow2d
def main(args):
    start_epoch = 0
    if args.resume:
        print('Resuming from checkpoint at ckpts/flow.pth.tar...')
        checkpoint = torch.load('ckpts/flow.pth.tar')
        flow.load_state_dict(checkpoint['flow'])
        start_epoch = checkpoint['epoch'] + 1
    for epoch in range(start_epoch, start_epoch + args.epoch):
        for i, x in enumerate(dataloader):
            x = x.to(device)

            optim_flow.zero_grad()
            loss_flow = -flow.log_prob(x).mean()
            loss_flow.backward()
            optim_flow.step()

            print("[Epoch %d/%d] [Batch %d/%d] [Loss: %f]" %
                  (epoch, start_epoch + args.epoch, i, len(dataloader),
                   loss_flow.item()))

        print('Saving flow model to ckpts/flow.pth.tar...')
        state = {
            'flow': flow.state_dict(),
            'value': loss_flow,
            'epoch': epoch,
        }
        os.makedirs('ckpts', exist_ok=True)
        torch.save(state, 'ckpts/flow.pth.tar')

        # visualization
        util.plot(dataset, flow, epoch, device)
コード例 #30
0
def main(train_path, valid_path, save_path):
    """Problem: Gaussian discriminant analysis (GDA)

    Args:
        train_path: Path to CSV file containing dataset for training.
        valid_path: Path to CSV file containing dataset for validation.
        save_path: Path to save predicted probabilities using np.savetxt().
    """
    # Load dataset
    x_train, y_train = util.load_dataset(train_path, add_intercept=False)
    x_eval, y_eval = util.load_dataset(valid_path, add_intercept=False)

    # *** START CODE HERE ***
    # Train a GDA classifier
    clf = GDA()
    clf.fit(x_train, y_train)
    preds = clf.predict(x_eval)

    # Plot decision boundary on validation set
    theta_ = np.insert(clf.theta, 0, clf.theta_zero)
    save_path_ = save_path.strip('.txt')
    util.plot(x_eval, y_eval, theta_, save_path_)

    # Use np.savetxt to save outputs from validation set to save_path
    np.savetxt(save_path, preds)
コード例 #31
0
def analyze_test_info(data: list, title: str, graph_name: str):
    data_str = ' '.join(data)
    total_count = 20
    glossary = GlossaryScraper()

    freq_words = [(w, len(re.findall(w, data_str, re.IGNORECASE))) for w in glossary.glossaries]
    sorted_freq_words = sorted(freq_words, key=itemgetter(1), reverse=True)

    fig1 = plot(sorted_freq_words[0: total_count],
                '{} (Test Topics)'.format(title), 'Word Counts')
    fig1.savefig('{}_glossaries.png'.format(graph_name))

    fig2 = plot(get_word_frequencies(data, total_count),
                '{} (Word Frequencies)'.format(title), 'Word Counts')
    fig2.savefig('{}_word_frequencies.png'.format(graph_name))
コード例 #32
0
def main(): 
    iterations = []
    inv_iterations = []
    dets = []
    traces = []
    total_fails = 0

    for _ in range(200):
        i = 0
        vec = None
        inv_vec = None
        tries = -1
        while vec is None or inv_vec is None:
            i = 0
            a, a_inv = get_matrix()
            while vec is None and i < len(VECTORS):
                val, vec, n_iter = power(a, VECTORS[i], VECTORS[i], 0.00005, 100)
                i += 1
            i = 0
            while inv_vec is None and i < len(VECTORS):
                inv_val, inv_vec, inv_n_iter = power(a_inv, VECTORS[i], VECTORS[i], 0.00005, 100)
                i += 1
            tries += 1
        total_fails += tries
        iterations.append(n_iter / 100.)
        inv_iterations.append(inv_n_iter / 100.)
        traces.append(trace(a))
        dets.append(determinant_2_2(a))
    util.plot([min(dets)-.1, max(dets)+.1, min(traces)-.1, max(traces)+.1],
          dets, 
          traces, 
          iterations, 
          "Determinant", 
          "Trace", 
          "Determinant vs Trace by A Power Iteration", 
          "writing/3/writing_3_a")
    util.plot([min(dets)-.1, max(dets)+.1, min(traces)-.1, max(traces)+.1],
          dets, 
          traces, 
          inv_iterations, 
          "Determinant", 
          "Trace", 
          "Determinant vs Trace by A Inverse Power Iteration",
          "writing/3/writing_3_a_inv")
    print "Number of rejected matrices: ", total_fails
コード例 #33
0
ファイル: expe_2.py プロジェクト: phalves/SequenceAlignment
            y = "".join([random.choice(z) for _ in range(size)])

            # Sequence Alignment
            v, cost = linear_sequence_alignment(x, y, g, a)
            out_x, out_y = get_sequence_linear(x, y, g, a)

            elapsed = (time.clock() - start_time)/60
            used_men = memory_usage() - start_mem
            
            # Saves chain size
            result[0].append(size)
            # Saves time
            result[1].append(elapsed)
            # Saves consumed memory
            result[2].append(used_men)
            
            print "i = %d, j = %d\nElapsed Time: %.3f mins\nUsed memory: %.3f MB" % (i, j, elapsed, used_men)

            # Releasing unused memory
            del x, y, v, out_x, out_y
            gc.collect()
            if elapsed > 15:
                return result
    return result


if __name__ == '__main__':
    result = task2_linear()
    plot(result)