def train_network(step, hyperparameters): # hyperparameters['base_dir'] = get_base_dir(step) # hyperparameters['train_steps'] = 6 # accuracy = targeted_generate.start_training(**hyperparameters) K.clear_session() predictor_model, _ = models.make_predictor_model(**hyperparameters) train_grids, train_curves = data.get_all_data(matching='none') # Define our loss function and compile our model loss_func = hyperparameters.get('loss_func', 'kullback_leibler_divergence') models.unfreeze(predictor_model) learning_rate = 10**-3 optimizer = Adam(learning_rate, clipnorm=1.0) predictor_model.compile(optimizer, loss=loss_func, metrics=['mae', models.worst_abs_loss]) # Fit our model to the dataset predictor_batch_size = hyperparameters.get('predictor_batch_size', 64) predictor_epochs = 15 h = predictor_model.fit(x=train_grids, y=train_curves, batch_size=predictor_batch_size, epochs=predictor_epochs, validation_split=0.1) mae = h.history['val_mae'][-1] + h.history['val_mae'][-2] + h.history[ 'val_mae'][-3] mae /= 3 return mae
def get_cluster_quality(): """Returns cluster quality. """ print('Getting vocabulary ...') data_file = os.path.join(args.data_path, 'min_df_{}'.format(args.min_df)) vocab, cluster_valid = data.get_all_data(data_file, temporal=True) vocab_size = len(vocab) topics_distributions = [] # get data print('Getting full data ...') tokens = train['tokens'] counts = train['counts'] times = train['times'] num_times = len(np.unique(train_times)) num_docs = len(tokens) rnn_inp = data.get_rnn_input(tokens, counts, times, num_times, vocab_size, num_docs) model.eval() with torch.no_grad(): indices = torch.split(torch.tensor(range(num_docs)), args.eval_batch_size) eta = get_eta(rnn_inp) acc_loss = 0 cnt = 0 for idx, ind in enumerate(indices): data_batch, times_batch = data.get_batch( tokens, counts, ind, vocab_size, args.emb_size, temporal=True, times=times) sums = data_batch.sum(1).unsqueeze(1) if args.bow_norm: normalized_data_batch = data_batch / sums else: normalized_data_batch = data_batch eta_td = eta[times_batch.type('torch.LongTensor')] theta = get_theta(eta_td, normalized_data_batch) print('\n') print('Get topic coherence...') print('train_tokens: ', train_tokens[0]) TC_all = [] cnt_all = [] for tt in range(args.num_times): tc, cnt = get_topic_coherence(beta[:, tt, :].detach().numpy(), train_tokens, vocab) TC_all.append(tc) cnt_all.append(cnt) print('TC_all: ', TC_all) TC_all = torch.tensor(TC_all) print('TC_all: ', TC_all.size()) print('\n') print('Get topic quality...') quality = tc * diversity print('Topic Quality is: {}'.format(quality)) print('#'*100)
def quality_histogram(): all_data = data.get_all_data('winequality-white.csv') y = [] for each in all_data: y.append(int(each['quality'])) print("3 ", round(y.count(3)/4898, 3)) print("4 ", round(y.count(4)/4898, 3)) print("5 ", round(y.count(5)/4898, 3)) print("6 ", round(y.count(6)/4898, 3)) print("7 ", round(y.count(7)/4898, 3)) print("8 ", round(y.count(8)/4898, 3)) print("9 ", round(y.count(9)/4898, 3)) plt.hist(y, 7) plt.title("Quality Score Distribution") plt.show()
import numpy as np import time import tensorflow as tf from data import get_all_data from model import Model from environment import sample, evaluate, sample_and_evaluate from utils import save, load, info try: records = load("records") info("load saved records") except: records = get_all_data() info("no saved records") save(records, "records") from search import search with tf.device("/gpu:0"): search(records[15]) raise SystemExit with tf.device("/gpu:0"): model = Model(records[0]["op_table"]) try: model.load_weights('weights') info("load saved weight") except:
def data(): return get_all_data('2019_tripdata')
from sklearn.pipeline import Pipeline from sklearn.svm import LinearSVC from sklearn.naive_bayes import MultinomialNB from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.experimental import enable_halving_search_cv # noqa from sklearn.model_selection import HalvingGridSearchCV import pickle from data import get_all_data, score_metric, normalize_review_weight if __name__ == '__main__': X_train, y_train, X_test, y_test = get_all_data() train_weights = [normalize_review_weight(w) for w in X_train['helpful']] tfidf_grid = { 'vectorizer__lowercase': [True, False], 'vectorizer__ngram_range': [(1, 3), (1, 4), (2, 4)], 'vectorizer__max_df': [1.0, 0.95, 0.9, 0.85, 0.8], 'vectorizer__min_df': [25, 50, 100, 200, 0.01, 0.05], } svm = Pipeline([('vectorizer', TfidfVectorizer()), ('classifier', LinearSVC(class_weight='balanced'))]) grid_search = HalvingGridSearchCV(svm, tfidf_grid, random_state=42, verbose=10, n_jobs=12) grid_search.fit(X_train['reviewText'], y_train, classifier__sample_weight=train_weights)
import sys def info(*args): print(*args, file=sys.stdout, flush=True) def neighbour(strategy): new_strategy = copy(s) new_strategy[np.random.choice(len(s))] = np.random.choice(1), np.random.choice(8) return new_strategy def P(loss_old, loss_new, T): if loss_new <= loss_old: return 1 else: return np.exp(1 - 1 / T) record = get_all_data()[2] # decisions = [ [1, 7] for _ in range(len(record["cgroups"])) ] # evaluate(record, decisions) # sys.exit(0) s, baseline = None, 9999 for nccl in range(2): for i in range(8): decisions = [ [nccl, i] for _ in range(len(record["cgroups"])) ] loss = evaluate(record, decisions) info(decisions, loss) if loss < baseline: s, baseline = decisions, loss loss = 1
Planar Monocular SLAM -- Helpful Tool Functions Setup """ # Import libraries from data import get_all_data import numpy as np import os import math # Set working directory directory = os.getcwd() # Set directory with dataset dataset_dir = os.path.join(directory, "dataset") _, _, camera_data = get_all_data(dataset_dir) # Get useful info about camera cam_matrix = camera_data[0][1] cam_transform = camera_data[1][1] # Dimensions projection_dim = 2 pose_dim = 6 landmark_dim = 3 # Get initial locations of Principal Point Offset, focal length, and z far/near u_0 = cam_matrix[0,2] v_0 = cam_matrix[1,2] f = cam_matrix[0,0] z_near = camera_data[2][1]
if args.with_noise: WITH_NOISE = True else: WITH_NOISE = False if WITH_NOISE: GAMMA = 2.6 else: GAMMA = 1.36e-2 print('Plotting predictions with gamma = %.2e' % GAMMA) N = 6400 LAYERS = [2, 20, 20, 1] X_train, u_train, lb, ub = get_data(N, WITH_NOISE) X_star, u_star, x, t, Exact, X, T = get_all_data(WITH_NOISE) model = PhysicsInformedNN(X_train, u_train, LAYERS, lb, ub, GAMMA) model.train(0) u_pred, f_pred = model.predict(X_star) error_u = np.linalg.norm(u_star - u_pred, 2) / np.linalg.norm(u_star, 2) U_pred = interpolate.griddata(X_star, u_pred.flatten(), (X, T), method='cubic') lambda_value = model.get_pde_params()[0] error_lambda_ = np.abs(lambda_value - 1.0) * 100 print('Error u: %e' % (error_u)) print('Error l1: %.2f%%' % (error_lambda_)) # ----------------------------------------------------------------------------- # Plot predictions. fig, axes = plt.subplots(1, 2, figsize=(4, 2.47), sharey=True)
# plt.ion() # Import files from data import get_all_data, get_new_seqdata from prediction import pose_model from error_ellipse import error_ellipse from correction import correction from newlandmark import newlandmark from associatelandmark import associateLandmarkIDs from landmarks_model import landmark_model from funcTools import * directory = os.getcwd() # Set working directory dataset_dir = os.path.join(directory, "dataset") # Set directory with dataset world_data, trajectory_data, camera_data = get_all_data( dataset_dir) # Get the data info # Initialize variables rob_poseH = np.zeros([4, 4, 1]) rob_poseH_gt = np.zeros([4, 4, 1]) rob_update = np.zeros([4, 4, 1]) # updated pose after correction id_to_state_map = np.ones((1000, 14), dtype='float64') * -1 state_to_id_map = np.ones((1000, 1), dtype='int32') * -1 # will retain the pose of the robot for each time sequence robot_pose_map = np.zeros((336, 3)) robot_gt_pose_map = np.zeros((336, 3)) land_triang = np.ones([3, 1]) * -1 land_triang_gt = np.ones([3, 1]) * -1 Land_TriangPrev = np.ones([3, 1]) * -1
sample_range_y = np.round(grid_y, 1) plt.xticks(pixel_range, sample_range_x) plt.yticks(pixel_range, sample_range_y) plt.xlabel('z[0]') plt.ylabel('z[1]') plt.imshow(figure, cmap='Greys_r', vmin=0.0, vmax=1.0) plt.title('Grids Over Latent Distribution') if save: plt.savefig(filename) plt.show() if __name__ == '__main__': encoder = load_model(model_name + '/encoder.tf') decoder = load_model(model_name + '/decoder.tf') x_test, y_test = data.get_all_data(matching='../generative_model_3') # x_test, y_test = data.get_all_data(matching='../generative_model_2') # p = np.random.permutation(len(x_test)) # x_test = x_test[p] # y_test = y_test[p] # x_test = x_test[:100] x_test = np.reshape(x_test, [-1, GRID_SIZE, GRID_SIZE, 1]) # y_test = y_test[:100] plot_latent((encoder, decoder), (x_test, y_test), use_curve=True, save=True) # show_grids('vae_conditional')
import data from constants import * import pdb def sampling(args): z_mean, z_log_var = args batch = K.shape(z_mean)[0] dim = K.int_shape(z_mean)[1] # by default, random_normal has mean = 0 and std = 1.0 epsilon = K.random_normal(shape=(batch, dim)) return z_mean + K.exp(0.5 * z_log_var) * epsilon # x_train, y_train = data.get_all_data(matching='vae_cnn_data') x_train, y_train = data.get_all_data(matching='../generative_model_2') num_test = 100 x_test = x_train[:num_test] y_test = y_train[:num_test] x_train = x_train[num_test:] y_train = y_train[num_test:] x_train = np.reshape(x_train, [-1, GRID_SIZE, GRID_SIZE, 1]) x_test = np.reshape(x_test, [-1, GRID_SIZE, GRID_SIZE, 1]) # network parameters original_dim = GRID_SIZE * GRID_SIZE input_shape = (GRID_SIZE, GRID_SIZE, 1) batch_size = 128 kernel_size = 3
default=0, help='whether to compute tc or not') args = parser.parse_args() pca = PCA(n_components=2) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") ## set seed np.random.seed(args.seed) torch.backends.cudnn.deterministic = True torch.manual_seed(args.seed) print('Getting vocabulary ...') data_file = os.path.join(args.data_path, 'min_df_{}'.format(args.min_df)) vocab, cluster_valid = data.get_all_data(data_file, temporal=True) vocab_size = len(vocab) # get data print('Getting full data ...') tokens = cluster_valid['tokens'] counts = cluster_valid['counts'] times = cluster_valid['times'] num_times = len(np.unique(times)) num_docs = len(tokens) ## get embeddings print('Getting embeddings ...') emb_path = args.emb_path vect_path = os.path.join(args.data_path.split('/')[0], 'embeddings.pkl') vectors = {}
def train_step(step, predictor_model, lc_model, generator_model, **kwargs): # Setup our directory # ------------------- base_dir = kwargs.get('base_dir', 'generative_model_default') step_dir = os.path.join(base_dir, 'step_{}'.format(step)) grids_dir = os.path.join(step_dir, 'grids') densities_dir = os.path.join(step_dir, 'results') target_densities_dir = os.path.join(step_dir, 'target_densities') model_save_dir = os.path.join(base_dir, 'model_saves') predictor_model_logs = os.path.join(step_dir, 'predictor_model_logs') generator_model_logs = os.path.join(step_dir, 'generator_model_logs') make_dirs(step_dir, grids_dir, densities_dir, target_densities_dir, model_save_dir, predictor_model_logs, generator_model_logs) predictor_save_file = os.path.join(model_save_dir, 'predictor_step_{}.hdf5'.format(step)) generator_save_file = os.path.join(model_save_dir, 'generator_step_{}.hdf5'.format(step)) # Train predictor on dataset # -------------------------- # Get our training data train_grids, train_curves = data.get_all_data(matching=base_dir, augment_factor=20) # Define our loss function and compile our model predictor_loss_func = kwargs.get( 'predictor_loss_func', 'binary_crossentropy') # or binary_crossentropy models.unfreeze(predictor_model) learning_rate = 10**-2 optimizer = SGD(learning_rate, clipnorm=1.0) predictor_model.compile(optimizer, loss=predictor_loss_func, metrics=['mae', models.worst_abs_loss]) # Fit our model to the dataset predictor_batch_size = kwargs.get('predictor_batch_size', 64) predictor_epochs = kwargs.get('predictor_epochs', 6) if step == 0: predictor_epochs += kwargs.get('predictor_first_step_epoch_boost', 10) # train more to start off lr_patience = max(int(round(predictor_epochs * 0.2)), 3) # clip to at least 1 es_patience = max(int(round(predictor_epochs * 0.8)), 4) # clip to at least 1 predictor_model.fit(x=train_grids, y=train_curves, batch_size=predictor_batch_size, epochs=predictor_epochs, validation_split=0.1, callbacks=[ ReduceLROnPlateau(patience=lr_patience, factor=0.1), EarlyStopping(patience=es_patience, restore_best_weights=True), TensorBoard(log_dir=predictor_model_logs, histogram_freq=1, write_graph=False, write_images=False) ]) # Save our model print('Saving model', end='... ', flush=True) predictor_model.save(predictor_save_file, include_optimizer=False) print('done') # Train generator on predictor # ---------------------------- # Get our training data print('Picking random curves ', end='... ', flush=True) num_curves = 10000 boost_dim = kwargs.get('boost_dim', 5) random_curves = data.make_generator_input(num_curves, boost_dim, allow_squeeze=True, as_generator=False) print('Done') # Create the training model models.freeze(predictor_model) lc_inp = Input(shape=(boost_dim, ), name='latent_code') curve_inp = Input(shape=(N_ADSORP, ), name='target_curve') generator_out = generator_model([curve_inp, lc_inp]) predictor_out = predictor_model(generator_out) lc_out = lc_model(generator_out) training_model = Model(inputs=[curve_inp, lc_inp], outputs=[predictor_out, lc_out]) # Define our loss function and compile our model generator_loss_func = kwargs.get( 'generator_loss_func', 'binary_crossentropy') # or binary_crossentropy loss_weights = kwargs.get('loss_weights', [1.0, 0.6]) learning_rate = 10**-2 optimizer = Adam(learning_rate) training_model.compile(optimizer, loss=[generator_loss_func, 'mse'], metrics={ 'predictor_model': ['mae', models.worst_abs_loss], 'latent_code_model': ['mae', models.worst_abs_loss] }, loss_weights=loss_weights) # Fit our model to the curves generator_batch_size = kwargs.get('generator_batch_size', 64) generator_epochs = kwargs.get('generator_epochs', 3) if step == 0: generator_epochs += kwargs.get('generator_first_step_epoch_boost', 20) # train more to start off lr_patience = max(int(round(generator_epochs * 0.1)), 3) # clip to at least 1 es_patience = max(int(round(generator_epochs * 0.8)), 4) # clip to at least 1 training_model.fit(x=random_curves, y=random_curves, batch_size=generator_batch_size, epochs=generator_epochs, validation_split=0.1, callbacks=[ ReduceLROnPlateau(patience=lr_patience, factor=0.1), EarlyStopping(patience=es_patience), TensorBoard(log_dir=generator_model_logs, histogram_freq=1, write_graph=False, write_images=False) ]) # Save our model generator_model.save(generator_save_file, include_optimizer=False) # Generate new data # ----------------- num_new_grids = kwargs.get('num_new_grids', 100) data_upscale_factor = kwargs.get('data_upscale_factor', 1.5) artificial_curves, latent_codes = data.make_generator_input( int(num_new_grids * data_upscale_factor), boost_dim, as_generator=False) generated_grids = generator_model.predict( [artificial_curves, latent_codes]) saved_grids = generated_grids.astype('int') for i, grid in enumerate(saved_grids): path = os.path.join(grids_dir, 'grid_%04d.csv' % i) np.savetxt(path, grid, fmt='%i', delimiter=',') print('Evaluating candidate grids') os.system('./fast_dft {}'.format(step_dir)) target_densities_dir for i, artificial_curve in enumerate(artificial_curves): path = os.path.join(target_densities_dir, 'artificial_curve_%04d.csv' % i) np.savetxt(path, artificial_curve, fmt='%f', delimiter=',') # Prune data # ---------- # Get the actual, target, and predicted curves density_files = glob.glob(os.path.join(densities_dir, 'density_*.csv')) density_files.sort() actual_densities = [ np.append( np.genfromtxt(density_file, delimiter=',', skip_header=1, max_rows=N_ADSORP)[:, 1], 1) for density_file in density_files ] target_densities = [ np.cumsum(np.insert(curve_diffs, 0, 0)) for curve_diffs in artificial_curves ] predicted_densities = [ np.cumsum(np.insert(curve_diffs, 0, 0)) for curve_diffs in predictor_model.predict(generated_grids) ] generated_grids = list(generated_grids) new_data = list( zip(actual_densities, target_densities, predicted_densities, generated_grids)) # Sort the grids by some metric # Sample k curves from our dataset to see how close we are to our dataset def generator_err(x): actual_curve, target_curve, predicted_curve, _ = x delta_prime_err = np.sum(np.abs(actual_curve - target_curve)) return delta_prime_err def predictor_err(x): actual_curve, target_curve, predicted_curve, _ = x gamma_err = np.sum(np.abs(actual_curve - predicted_curve)) return gamma_err def cross_err(x): actual_curve, target_curve, predicted_curve, _ = x delta_err = np.sum(np.abs(target_curve - predicted_curve)) return delta_err # Evaluate our accuracies generator_error = np.array(list(map(generator_err, new_data))) / (N_ADSORP + 1) predictor_error = np.array(list(map(predictor_err, new_data))) / (N_ADSORP + 1) cross_error = np.array(list(map(cross_err, new_data))) / (N_ADSORP + 1) print('Generated data error metric: {:.3f} ± {:.3f}'.format( generator_error.mean(), generator_error.std())) print('Predictor error metric: {:.3f} ± {:.3f}'.format( predictor_error.mean(), predictor_error.std())) print('Cross error metric: {:.3f} ± {:.3f}'.format(cross_error.mean(), cross_error.std())) # Remove the grids that are already good print('Finding most dissimilar grids') divergences = np.fromiter(map(lambda x: divergence(x[0]), new_data), dtype=float) divergences = divergences**1.2 divergences /= np.sum(divergences) new_data_inds = np.random.choice(len(new_data), num_new_grids, replace=False, p=divergences) new_data = [new_data[i] for i in new_data_inds] # Add data back to dataset # ------------------------ # Remove our tmp data shutil.rmtree(grids_dir) shutil.rmtree(densities_dir) shutil.rmtree(target_densities_dir) make_dirs(grids_dir, densities_dir, target_densities_dir) # Save new data print('Saving new grids') for i, (density, target_density, _, grid) in enumerate(new_data): grid_path = os.path.join(grids_dir, 'grid_%04d.csv' % i) density_path = os.path.join(densities_dir, 'density_%04d.csv' % i) target_density_path = os.path.join(target_densities_dir, 'artificial_curve_%04d.csv' % i) np.savetxt(grid_path, grid, fmt='%i', delimiter=',') np.savetxt(target_density_path, np.diff(target_density), fmt='%f', delimiter=',') print('Evaluating new grids') os.system('./fast_dft {}'.format(step_dir)) return generator_error, predictor_error, cross_error
def predict(self, image): if self._model is not None: resized = cv2.resize(image, (28, 28)) reshaped = resized.reshape(1, 1, 28, 28) prediction = self._model.predict_classes(reshaped, verbose=0) return prediction[0] else: raise Exception("Model does not exist. Please load a model first.") return 0 if __name__ == '__main__': import data model_dir = "models" x_train, y_train, x_test, y_test = data.get_all_data() #x_train, y_train, x_test, y_test = get_all_data() print(F"train/test shape: {x_train.shape}/{x_test.shape}") classifier = DigitClassifier() train = True if train: classifier.train(x_train, y_train, x_test, y_test) classifier.save(model_dir) else: classifier.load(model_dir) for idx in range(10): image = x_test[idx][0] result = classifier.predict(image) print(F"Prediction is: {result}") cv2.imshow("test image", image)