Esempio n. 1
0
def train_network(step, hyperparameters):
    # hyperparameters['base_dir'] = get_base_dir(step)
    # hyperparameters['train_steps'] = 6
    # accuracy = targeted_generate.start_training(**hyperparameters)
    K.clear_session()

    predictor_model, _ = models.make_predictor_model(**hyperparameters)
    train_grids, train_curves = data.get_all_data(matching='none')
    # Define our loss function and compile our model
    loss_func = hyperparameters.get('loss_func', 'kullback_leibler_divergence')
    models.unfreeze(predictor_model)
    learning_rate = 10**-3
    optimizer = Adam(learning_rate, clipnorm=1.0)
    predictor_model.compile(optimizer,
                            loss=loss_func,
                            metrics=['mae', models.worst_abs_loss])
    # Fit our model to the dataset
    predictor_batch_size = hyperparameters.get('predictor_batch_size', 64)
    predictor_epochs = 15
    h = predictor_model.fit(x=train_grids,
                            y=train_curves,
                            batch_size=predictor_batch_size,
                            epochs=predictor_epochs,
                            validation_split=0.1)
    mae = h.history['val_mae'][-1] + h.history['val_mae'][-2] + h.history[
        'val_mae'][-3]
    mae /= 3
    return mae
Esempio n. 2
0
def get_cluster_quality():
    """Returns cluster quality.
    """

    print('Getting vocabulary ...')
    data_file = os.path.join(args.data_path, 'min_df_{}'.format(args.min_df))
    vocab, cluster_valid = data.get_all_data(data_file, temporal=True)
    vocab_size = len(vocab)
    topics_distributions = []

    # get data
    print('Getting full data ...')
    tokens = train['tokens']
    counts = train['counts']
    times = train['times']
    num_times = len(np.unique(train_times))
    num_docs = len(tokens)
    rnn_inp = data.get_rnn_input(tokens, counts, times, num_times, vocab_size, num_docs)
    model.eval()
    with torch.no_grad():
        indices = torch.split(torch.tensor(range(num_docs)), args.eval_batch_size)

        eta = get_eta(rnn_inp)

        acc_loss = 0
        cnt = 0
        for idx, ind in enumerate(indices):
            data_batch, times_batch = data.get_batch(
                tokens, counts, ind, vocab_size, args.emb_size, temporal=True, times=times)
            sums = data_batch.sum(1).unsqueeze(1)
            if args.bow_norm:
                normalized_data_batch = data_batch / sums
            else:
                normalized_data_batch = data_batch

            eta_td = eta[times_batch.type('torch.LongTensor')]
            theta = get_theta(eta_td, normalized_data_batch)


        print('\n')
        print('Get topic coherence...')
        print('train_tokens: ', train_tokens[0])
        TC_all = []
        cnt_all = []
        for tt in range(args.num_times):
            tc, cnt = get_topic_coherence(beta[:, tt, :].detach().numpy(), train_tokens, vocab)
            TC_all.append(tc)
            cnt_all.append(cnt)
        print('TC_all: ', TC_all)
        TC_all = torch.tensor(TC_all)
        print('TC_all: ', TC_all.size())
        print('\n')
        print('Get topic quality...')
        quality = tc * diversity
        print('Topic Quality is: {}'.format(quality))
        print('#'*100)
Esempio n. 3
0
def quality_histogram():
	all_data = data.get_all_data('winequality-white.csv')
	y = []

	for each in all_data:
		y.append(int(each['quality']))

	print("3 ", round(y.count(3)/4898, 3))
	print("4 ", round(y.count(4)/4898, 3))
	print("5 ", round(y.count(5)/4898, 3))
	print("6 ", round(y.count(6)/4898, 3))
	print("7 ", round(y.count(7)/4898, 3))
	print("8 ", round(y.count(8)/4898, 3))
	print("9 ", round(y.count(9)/4898, 3))

	plt.hist(y, 7)
	plt.title("Quality Score Distribution")
	plt.show()
Esempio n. 4
0
import numpy as np
import time
import tensorflow as tf

from data import get_all_data
from model import Model
from environment import sample, evaluate, sample_and_evaluate
from utils import save, load, info

try:
    records = load("records")
    info("load saved records")
except:
    records = get_all_data()
    info("no saved records")
    save(records, "records")

from search import search

with tf.device("/gpu:0"):
    search(records[15])

raise SystemExit

with tf.device("/gpu:0"):
    model = Model(records[0]["op_table"])

    try:
        model.load_weights('weights')
        info("load saved weight")
    except:
Esempio n. 5
0
def data():

    return get_all_data('2019_tripdata')
Esempio n. 6
0
from sklearn.pipeline import Pipeline
from sklearn.svm import LinearSVC
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.experimental import enable_halving_search_cv  # noqa
from sklearn.model_selection import HalvingGridSearchCV
import pickle

from data import get_all_data, score_metric, normalize_review_weight

if __name__ == '__main__':
    X_train, y_train, X_test, y_test = get_all_data()
    train_weights = [normalize_review_weight(w) for w in X_train['helpful']]

    tfidf_grid = {
        'vectorizer__lowercase': [True, False],
        'vectorizer__ngram_range': [(1, 3), (1, 4), (2, 4)],
        'vectorizer__max_df': [1.0, 0.95, 0.9, 0.85, 0.8],
        'vectorizer__min_df': [25, 50, 100, 200, 0.01, 0.05],
    }

    svm = Pipeline([('vectorizer', TfidfVectorizer()),
                    ('classifier', LinearSVC(class_weight='balanced'))])
    grid_search = HalvingGridSearchCV(svm,
                                      tfidf_grid,
                                      random_state=42,
                                      verbose=10,
                                      n_jobs=12)
    grid_search.fit(X_train['reviewText'],
                    y_train,
                    classifier__sample_weight=train_weights)
Esempio n. 7
0
import sys
def info(*args):
    print(*args, file=sys.stdout, flush=True)

def neighbour(strategy):
    new_strategy = copy(s)
    new_strategy[np.random.choice(len(s))] = np.random.choice(1), np.random.choice(8)
    return new_strategy

def P(loss_old, loss_new, T):
    if loss_new <= loss_old:
        return 1
    else:
        return np.exp(1 - 1 / T)

record = get_all_data()[2]

# decisions = [ [1, 7] for _ in range(len(record["cgroups"])) ]
# evaluate(record, decisions)
# sys.exit(0)

s, baseline = None, 9999
for nccl in range(2):
    for i in range(8):
        decisions = [ [nccl, i] for _ in range(len(record["cgroups"])) ]
        loss = evaluate(record, decisions)
        info(decisions, loss)
        if loss < baseline:
            s, baseline = decisions, loss

loss = 1
Planar Monocular SLAM -- Helpful Tool Functions Setup
"""

# Import libraries
from data import get_all_data

import numpy as np
import os
import math

# Set working directory
directory = os.getcwd()
# Set directory with dataset
dataset_dir = os.path.join(directory, "dataset")

_, _, camera_data = get_all_data(dataset_dir)

# Get useful info about camera
cam_matrix = camera_data[0][1]
cam_transform = camera_data[1][1]

# Dimensions
projection_dim = 2
pose_dim = 6
landmark_dim = 3

# Get initial locations of Principal Point Offset, focal length, and z far/near
u_0 =  cam_matrix[0,2]
v_0 = cam_matrix[1,2]
f = cam_matrix[0,0]
z_near = camera_data[2][1]
if args.with_noise:
    WITH_NOISE = True
else:
    WITH_NOISE = False

if WITH_NOISE:
    GAMMA = 2.6
else:
    GAMMA = 1.36e-2
print('Plotting predictions with gamma = %.2e' % GAMMA)

N = 6400
LAYERS = [2, 20, 20, 1]
X_train, u_train, lb, ub = get_data(N, WITH_NOISE)
X_star, u_star, x, t, Exact, X, T = get_all_data(WITH_NOISE)

model = PhysicsInformedNN(X_train, u_train, LAYERS, lb, ub, GAMMA)
model.train(0)
u_pred, f_pred = model.predict(X_star)
error_u = np.linalg.norm(u_star - u_pred, 2) / np.linalg.norm(u_star, 2)
U_pred = interpolate.griddata(X_star, u_pred.flatten(), (X, T), method='cubic')
lambda_value = model.get_pde_params()[0]
error_lambda_ = np.abs(lambda_value - 1.0) * 100

print('Error u: %e' % (error_u))
print('Error l1: %.2f%%' % (error_lambda_))

# -----------------------------------------------------------------------------
# Plot predictions.
fig, axes = plt.subplots(1, 2, figsize=(4, 2.47), sharey=True)
Esempio n. 10
0
# plt.ion()

# Import files
from data import get_all_data, get_new_seqdata
from prediction import pose_model
from error_ellipse import error_ellipse
from correction import correction
from newlandmark import newlandmark
from associatelandmark import associateLandmarkIDs
from landmarks_model import landmark_model
from funcTools import *

directory = os.getcwd()  # Set working directory
dataset_dir = os.path.join(directory, "dataset")  # Set directory with dataset

world_data, trajectory_data, camera_data = get_all_data(
    dataset_dir)  # Get the data info

# Initialize variables
rob_poseH = np.zeros([4, 4, 1])
rob_poseH_gt = np.zeros([4, 4, 1])
rob_update = np.zeros([4, 4, 1])  # updated pose after correction

id_to_state_map = np.ones((1000, 14), dtype='float64') * -1
state_to_id_map = np.ones((1000, 1), dtype='int32') * -1
# will retain the pose of the robot for each time sequence
robot_pose_map = np.zeros((336, 3))
robot_gt_pose_map = np.zeros((336, 3))

land_triang = np.ones([3, 1]) * -1
land_triang_gt = np.ones([3, 1]) * -1
Land_TriangPrev = np.ones([3, 1]) * -1
Esempio n. 11
0
    sample_range_y = np.round(grid_y, 1)
    plt.xticks(pixel_range, sample_range_x)
    plt.yticks(pixel_range, sample_range_y)
    plt.xlabel('z[0]')
    plt.ylabel('z[1]')
    plt.imshow(figure, cmap='Greys_r', vmin=0.0, vmax=1.0)
    plt.title('Grids Over Latent Distribution')
    if save:
        plt.savefig(filename)
    plt.show()


if __name__ == '__main__':
    encoder = load_model(model_name + '/encoder.tf')
    decoder = load_model(model_name + '/decoder.tf')
    x_test, y_test = data.get_all_data(matching='../generative_model_3')
    # x_test, y_test = data.get_all_data(matching='../generative_model_2')

    # p = np.random.permutation(len(x_test))
    # x_test  = x_test[p]
    # y_test  = y_test[p]

    # x_test = x_test[:100]
    x_test = np.reshape(x_test, [-1, GRID_SIZE, GRID_SIZE, 1])
    # y_test = y_test[:100]
    plot_latent((encoder, decoder), (x_test, y_test),
                use_curve=True,
                save=True)

    # show_grids('vae_conditional')
Esempio n. 12
0
import data
from constants import *

import pdb

def sampling(args):
    z_mean, z_log_var = args
    batch = K.shape(z_mean)[0]
    dim = K.int_shape(z_mean)[1]
    # by default, random_normal has mean = 0 and std = 1.0
    epsilon = K.random_normal(shape=(batch, dim))
    return z_mean + K.exp(0.5 * z_log_var) * epsilon

# x_train, y_train = data.get_all_data(matching='vae_cnn_data')
x_train, y_train = data.get_all_data(matching='../generative_model_2')

num_test = 100
x_test = x_train[:num_test]
y_test = y_train[:num_test]
x_train = x_train[num_test:]
y_train = y_train[num_test:]

x_train = np.reshape(x_train, [-1, GRID_SIZE, GRID_SIZE, 1])
x_test = np.reshape(x_test, [-1, GRID_SIZE, GRID_SIZE, 1])

# network parameters
original_dim = GRID_SIZE * GRID_SIZE
input_shape = (GRID_SIZE, GRID_SIZE, 1)
batch_size = 128
kernel_size = 3
Esempio n. 13
0
                    default=0,
                    help='whether to compute tc or not')

args = parser.parse_args()

pca = PCA(n_components=2)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## set seed
np.random.seed(args.seed)
torch.backends.cudnn.deterministic = True
torch.manual_seed(args.seed)

print('Getting vocabulary ...')
data_file = os.path.join(args.data_path, 'min_df_{}'.format(args.min_df))
vocab, cluster_valid = data.get_all_data(data_file, temporal=True)
vocab_size = len(vocab)

# get data
print('Getting full data ...')
tokens = cluster_valid['tokens']
counts = cluster_valid['counts']
times = cluster_valid['times']
num_times = len(np.unique(times))
num_docs = len(tokens)

## get embeddings
print('Getting embeddings ...')
emb_path = args.emb_path
vect_path = os.path.join(args.data_path.split('/')[0], 'embeddings.pkl')
vectors = {}
def train_step(step, predictor_model, lc_model, generator_model, **kwargs):
    # Setup our directory
    # -------------------
    base_dir = kwargs.get('base_dir', 'generative_model_default')
    step_dir = os.path.join(base_dir, 'step_{}'.format(step))
    grids_dir = os.path.join(step_dir, 'grids')
    densities_dir = os.path.join(step_dir, 'results')
    target_densities_dir = os.path.join(step_dir, 'target_densities')
    model_save_dir = os.path.join(base_dir, 'model_saves')
    predictor_model_logs = os.path.join(step_dir, 'predictor_model_logs')
    generator_model_logs = os.path.join(step_dir, 'generator_model_logs')
    make_dirs(step_dir, grids_dir, densities_dir, target_densities_dir,
              model_save_dir, predictor_model_logs, generator_model_logs)

    predictor_save_file = os.path.join(model_save_dir,
                                       'predictor_step_{}.hdf5'.format(step))
    generator_save_file = os.path.join(model_save_dir,
                                       'generator_step_{}.hdf5'.format(step))

    # Train predictor on dataset
    # --------------------------
    # Get our training data
    train_grids, train_curves = data.get_all_data(matching=base_dir,
                                                  augment_factor=20)
    # Define our loss function and compile our model
    predictor_loss_func = kwargs.get(
        'predictor_loss_func', 'binary_crossentropy')  # or binary_crossentropy
    models.unfreeze(predictor_model)
    learning_rate = 10**-2
    optimizer = SGD(learning_rate, clipnorm=1.0)
    predictor_model.compile(optimizer,
                            loss=predictor_loss_func,
                            metrics=['mae', models.worst_abs_loss])
    # Fit our model to the dataset
    predictor_batch_size = kwargs.get('predictor_batch_size', 64)
    predictor_epochs = kwargs.get('predictor_epochs', 6)
    if step == 0:
        predictor_epochs += kwargs.get('predictor_first_step_epoch_boost',
                                       10)  # train more to start off
    lr_patience = max(int(round(predictor_epochs * 0.2)),
                      3)  # clip to at least 1
    es_patience = max(int(round(predictor_epochs * 0.8)),
                      4)  # clip to at least 1

    predictor_model.fit(x=train_grids,
                        y=train_curves,
                        batch_size=predictor_batch_size,
                        epochs=predictor_epochs,
                        validation_split=0.1,
                        callbacks=[
                            ReduceLROnPlateau(patience=lr_patience,
                                              factor=0.1),
                            EarlyStopping(patience=es_patience,
                                          restore_best_weights=True),
                            TensorBoard(log_dir=predictor_model_logs,
                                        histogram_freq=1,
                                        write_graph=False,
                                        write_images=False)
                        ])
    # Save our model
    print('Saving model', end='... ', flush=True)
    predictor_model.save(predictor_save_file, include_optimizer=False)
    print('done')

    # Train generator on predictor
    # ----------------------------
    # Get our training data
    print('Picking random curves ', end='... ', flush=True)
    num_curves = 10000
    boost_dim = kwargs.get('boost_dim', 5)
    random_curves = data.make_generator_input(num_curves,
                                              boost_dim,
                                              allow_squeeze=True,
                                              as_generator=False)
    print('Done')

    # Create the training model
    models.freeze(predictor_model)
    lc_inp = Input(shape=(boost_dim, ), name='latent_code')
    curve_inp = Input(shape=(N_ADSORP, ), name='target_curve')
    generator_out = generator_model([curve_inp, lc_inp])
    predictor_out = predictor_model(generator_out)
    lc_out = lc_model(generator_out)
    training_model = Model(inputs=[curve_inp, lc_inp],
                           outputs=[predictor_out, lc_out])
    # Define our loss function and compile our model
    generator_loss_func = kwargs.get(
        'generator_loss_func', 'binary_crossentropy')  # or binary_crossentropy
    loss_weights = kwargs.get('loss_weights', [1.0, 0.6])
    learning_rate = 10**-2
    optimizer = Adam(learning_rate)
    training_model.compile(optimizer,
                           loss=[generator_loss_func, 'mse'],
                           metrics={
                               'predictor_model':
                               ['mae', models.worst_abs_loss],
                               'latent_code_model':
                               ['mae', models.worst_abs_loss]
                           },
                           loss_weights=loss_weights)
    # Fit our model to the curves
    generator_batch_size = kwargs.get('generator_batch_size', 64)
    generator_epochs = kwargs.get('generator_epochs', 3)
    if step == 0:
        generator_epochs += kwargs.get('generator_first_step_epoch_boost',
                                       20)  # train more to start off
    lr_patience = max(int(round(generator_epochs * 0.1)),
                      3)  # clip to at least 1
    es_patience = max(int(round(generator_epochs * 0.8)),
                      4)  # clip to at least 1
    training_model.fit(x=random_curves,
                       y=random_curves,
                       batch_size=generator_batch_size,
                       epochs=generator_epochs,
                       validation_split=0.1,
                       callbacks=[
                           ReduceLROnPlateau(patience=lr_patience, factor=0.1),
                           EarlyStopping(patience=es_patience),
                           TensorBoard(log_dir=generator_model_logs,
                                       histogram_freq=1,
                                       write_graph=False,
                                       write_images=False)
                       ])
    # Save our model
    generator_model.save(generator_save_file, include_optimizer=False)

    # Generate new data
    # -----------------
    num_new_grids = kwargs.get('num_new_grids', 100)
    data_upscale_factor = kwargs.get('data_upscale_factor', 1.5)
    artificial_curves, latent_codes = data.make_generator_input(
        int(num_new_grids * data_upscale_factor),
        boost_dim,
        as_generator=False)
    generated_grids = generator_model.predict(
        [artificial_curves, latent_codes])
    saved_grids = generated_grids.astype('int')
    for i, grid in enumerate(saved_grids):
        path = os.path.join(grids_dir, 'grid_%04d.csv' % i)
        np.savetxt(path, grid, fmt='%i', delimiter=',')

    print('Evaluating candidate grids')
    os.system('./fast_dft {}'.format(step_dir))

    target_densities_dir
    for i, artificial_curve in enumerate(artificial_curves):
        path = os.path.join(target_densities_dir,
                            'artificial_curve_%04d.csv' % i)
        np.savetxt(path, artificial_curve, fmt='%f', delimiter=',')

    # Prune data
    # ----------
    # Get the actual, target, and predicted curves
    density_files = glob.glob(os.path.join(densities_dir, 'density_*.csv'))
    density_files.sort()
    actual_densities = [
        np.append(
            np.genfromtxt(density_file,
                          delimiter=',',
                          skip_header=1,
                          max_rows=N_ADSORP)[:, 1], 1)
        for density_file in density_files
    ]
    target_densities = [
        np.cumsum(np.insert(curve_diffs, 0, 0))
        for curve_diffs in artificial_curves
    ]
    predicted_densities = [
        np.cumsum(np.insert(curve_diffs, 0, 0))
        for curve_diffs in predictor_model.predict(generated_grids)
    ]
    generated_grids = list(generated_grids)
    new_data = list(
        zip(actual_densities, target_densities, predicted_densities,
            generated_grids))

    # Sort the grids by some metric
    # Sample k curves from our dataset to see how close we are to our dataset
    def generator_err(x):
        actual_curve, target_curve, predicted_curve, _ = x
        delta_prime_err = np.sum(np.abs(actual_curve - target_curve))
        return delta_prime_err

    def predictor_err(x):
        actual_curve, target_curve, predicted_curve, _ = x
        gamma_err = np.sum(np.abs(actual_curve - predicted_curve))
        return gamma_err

    def cross_err(x):
        actual_curve, target_curve, predicted_curve, _ = x
        delta_err = np.sum(np.abs(target_curve - predicted_curve))
        return delta_err

    # Evaluate our accuracies
    generator_error = np.array(list(map(generator_err,
                                        new_data))) / (N_ADSORP + 1)
    predictor_error = np.array(list(map(predictor_err,
                                        new_data))) / (N_ADSORP + 1)
    cross_error = np.array(list(map(cross_err, new_data))) / (N_ADSORP + 1)
    print('Generated data error metric: {:.3f} ± {:.3f}'.format(
        generator_error.mean(), generator_error.std()))
    print('Predictor error metric: {:.3f} ± {:.3f}'.format(
        predictor_error.mean(), predictor_error.std()))
    print('Cross error metric: {:.3f} ± {:.3f}'.format(cross_error.mean(),
                                                       cross_error.std()))

    # Remove the grids that are already good
    print('Finding most dissimilar grids')
    divergences = np.fromiter(map(lambda x: divergence(x[0]), new_data),
                              dtype=float)
    divergences = divergences**1.2
    divergences /= np.sum(divergences)
    new_data_inds = np.random.choice(len(new_data),
                                     num_new_grids,
                                     replace=False,
                                     p=divergences)
    new_data = [new_data[i] for i in new_data_inds]

    # Add data back to dataset
    # ------------------------
    # Remove our tmp data
    shutil.rmtree(grids_dir)
    shutil.rmtree(densities_dir)
    shutil.rmtree(target_densities_dir)
    make_dirs(grids_dir, densities_dir, target_densities_dir)

    # Save new data
    print('Saving new grids')
    for i, (density, target_density, _, grid) in enumerate(new_data):
        grid_path = os.path.join(grids_dir, 'grid_%04d.csv' % i)
        density_path = os.path.join(densities_dir, 'density_%04d.csv' % i)
        target_density_path = os.path.join(target_densities_dir,
                                           'artificial_curve_%04d.csv' % i)
        np.savetxt(grid_path, grid, fmt='%i', delimiter=',')
        np.savetxt(target_density_path,
                   np.diff(target_density),
                   fmt='%f',
                   delimiter=',')

    print('Evaluating new grids')
    os.system('./fast_dft {}'.format(step_dir))

    return generator_error, predictor_error, cross_error
Esempio n. 15
0
    def predict(self, image):
        if self._model is not None:
            resized = cv2.resize(image, (28, 28))
            reshaped = resized.reshape(1, 1, 28, 28)
            prediction = self._model.predict_classes(reshaped, verbose=0)
            return prediction[0]
        else:
            raise Exception("Model does not exist. Please load a model first.")
        return 0


if __name__ == '__main__':
    import data
    model_dir = "models"
    x_train, y_train, x_test, y_test = data.get_all_data()
    #x_train, y_train, x_test, y_test = get_all_data()
    print(F"train/test shape: {x_train.shape}/{x_test.shape}")
    classifier = DigitClassifier()

    train = True
    if train:
        classifier.train(x_train, y_train, x_test, y_test)
        classifier.save(model_dir)
    else:
        classifier.load(model_dir)
        for idx in range(10):
            image = x_test[idx][0]
            result = classifier.predict(image)
            print(F"Prediction is: {result}")
            cv2.imshow("test image", image)