def create_counterbalance(self, n_rep, seeds_file=None): """ Generates seeds of all the trials """ if seeds_file: self.seeds = load_data(seeds_file, 'seed') self.truth = load_data(seeds_file, 'answer') else: n_unique = self.n_trials // n_rep max_seed = np.iinfo(np.uint32).max self.seeds = np.tile(np.random.randint(0, high=max_seed, size=(n_unique, 1), dtype=np.uint32), (n_rep, 1)) np.random.shuffle(self.seeds) self.truth = None
def main(arguments): if arguments.operation == 'train': # get the train data # features: train_data[0], labels: train_data[1] train_features, train_labels = data.load_data(dataset=arguments.train_dataset) # get the validation data # features: validation_data[0], labels: validation_data[1] validation_features, validation_labels = data.load_data(dataset=arguments.validation_dataset) # get the size of the dataset for slicing train_size = train_features.shape[0] validation_size = validation_features.shape[0] # slice the dataset to be exact as per the batch size # e.g. train_size = 1898322, batch_size = 256 # [:1898322-(1898322%256)] = [:1898240] # 1898322 // 256 = 7415; 7415 * 256 = 1898240 train_features = train_features[:train_size-(train_size % BATCH_SIZE)] train_labels = train_labels[:train_size-(train_size % BATCH_SIZE)] # modify the size of the dataset to be passed on model.train() train_size = train_features.shape[0] # slice the dataset to be exact as per the batch size validation_features = validation_features[:validation_size-(validation_size % BATCH_SIZE)] validation_labels = validation_labels[:validation_size-(validation_size % BATCH_SIZE)] # modify the size of the dataset to be passed on model.train() validation_size = validation_features.shape[0] model = GruSoftmax(alpha=LEARNING_RATE, batch_size=BATCH_SIZE, cell_size=CELL_SIZE, dropout_rate=DROPOUT_P_KEEP, num_classes=N_CLASSES, sequence_length=SEQUENCE_LENGTH) model.train(checkpoint_path=arguments.checkpoint_path, log_path=arguments.log_path, model_name=arguments.model_name, epochs=HM_EPOCHS, train_data=[train_features, train_labels], train_size=train_size, validation_data=[validation_features, validation_labels], validation_size=validation_size, result_path=arguments.result_path) elif arguments.operation == 'test': test_features, test_labels = data.load_data(dataset=arguments.validation_dataset) test_size = test_features.shape[0] test_features = test_features[:test_size - (test_size % BATCH_SIZE)] test_labels = test_labels[:test_size - (test_size % BATCH_SIZE)] test_size = test_features.shape[0] GruSoftmax.predict(batch_size=BATCH_SIZE, cell_size=CELL_SIZE, dropout_rate=DROPOUT_P_KEEP, num_classes=N_CLASSES, test_data=[test_features, test_labels], test_size=test_size, checkpoint_path=arguments.checkpoint_path, result_path=arguments.result_path)
def __init__(self, args: Namespace, logger: HtmlLogger): # init model model = self.buildModel(args) model = model.cuda() # create DataParallel model instance self.modelParallel = model # self.modelParallel = DataParallel(model, args.gpu) # assert (id(model) == id(self.modelParallel.module)) self.args = args self.model = model self.logger = logger # load data self.train_queue, self.valid_queue, self.createSearchQueue = load_data( args) # init train folder path, where to save loggers, checkpoints, etc. self.trainFolderPath = '{}/{}'.format(args.save, args.trainFolder) # build statistics containers containers = self.buildStatsContainers() # build statistics rules rules = self.buildStatsRules() # init statistics instance self.statistics = Statistics(containers, rules, args.save) # log parameters logParameters(logger, args, model)
def testNormalize(self): df = dt.load_data(params.global_params['db_path'], 'C', index_col='date', from_date=20100101, to_date=20100405, limit=30) df = df['close'] look_back = 20 look_ahead = 1 coeff = 3.0 print(df.tail()) data = df.values print('data.shape', data.shape) _, y_data = dt.normalize(data, look_back=look_back, look_ahead=look_ahead, alpha=coeff) tmp = dt.denormalize(y_data, data, look_back, look_ahead, coeff) print('denorma.shape', tmp.shape) plt.plot(data[look_back:], label='actual') plt.plot(tmp, label='denorm') plt.legend(loc='upper left') plt.show()
def load_page(): markets, exception = load_data() if exception: st.sidebar.text(str(exception)) st.title("⭕️The data was not correctly loaded") return names = get_names() title = st.empty() st.sidebar.title("Crypto Explorer") # OHLC Visualisation st.sidebar.subheader('Choose your asset:') base = st.sidebar.selectbox('Select base', ['USDT', 'BTC', 'ETH']) keys = get_keys(markets, base=base) market = st.sidebar.selectbox('Select market', keys) resolution = st.sidebar.selectbox('Select resulution', ['1d', '1h', '1m']) code = market.split('/')[0] name = names[code] if code in names else code title.header(market + ' - ' + name) data = get_ohlcv(market, timeframe=resolution) range_ = st.sidebar.slider('Historical range', min_value=min([30, data.shape[0]]), max_value=min([2000, data.shape[0]]), value=min([1000, int(data.shape[0] / 2)]), step=10) plot_candlestick(data[-range_:])
def training_pipeline(args): seed(args.seed) set_random_seed(args.seed) x_train, x_test, y_test, x_val, y_val = load_data(args) G, D, GAN = load_model(args) pretrain(args, G, D, GAN, x_train, x_test, y_test, x_val, y_val) train(args, G, D, GAN, x_train, x_test, y_test, x_val, y_val)
def test_load_data(tmpdir): """test load data from csv""" save_data(tmpdir.join("test.csv"), [["Poprad", "Poprad", "A", "Adam", "Adam"]]) df = load_data(tmpdir.join("test.csv")) assert df.shape == (1, 5), "loaded data has wrong size" assert df.iloc[0]["okres"] == "Poprad" assert df.iloc[0]["katastralne uzemie"] == "Poprad" assert df.iloc[0]["prve pismeno"] == "A" assert df.iloc[0]["priezvisko"] == "Adam" assert df.iloc[0]["vlastnik"] == "Adam"
def visualize(model_weights, model, dataset='val', batch_size=1, shuffle=True): DATADIR = 'datasets/citys' '''device''' no_cuda = False use_cuda = not no_cuda and torch.cuda.is_available() device = torch.device('cuda:0' if use_cuda else 'cpu') print('using device:', device) model = model.to(device) model.load_state_dict(model_weights['model_state_dict']) print('Finished loading model!') model.eval() data_generator = load_data(DATADIR, batch_size=batch_size, shuffle=shuffle) val_generator = data_generator[dataset] data = next(iter(val_generator)) imgs, mask = data[0].to(device), data[1].to(device) with torch.no_grad(): prediction = model(imgs) pred = torch.argmax(prediction, dim=1).cpu() mask = 255 * torch.squeeze(mask, dim=1) # remove redundant channel imgs = imgs.permute(0, 2, 3, 1).cpu() fig, ax = plt.subplots(nrows=batch_size, ncols=3) for j in range(batch_size): pred_img = get_color_image(pred[j]) mask_img = get_color_image(mask[j]) ax[j, 0].imshow(imgs[j]) ax[j, 1].imshow(pred_img) ax[j, 2].imshow(mask_img) np.vectorize(lambda ax: ax.axis('off'))(ax) # disable axis cols = ['image', 'prediction', 'ground truth'] # titles for ax, col in zip(ax[0], cols): ax.set_title(col) # set titles plt.tight_layout() plt.show() return
def prediction(groundTruthImgList, file, model, dataset='val', batch_size=1, shuffle=True): DATADIR = 'datasets/citys' '''device''' no_cuda = False use_cuda = not no_cuda and torch.cuda.is_available() device = torch.device('cuda:0' if use_cuda else 'cpu') print('using device:', device) model_weights = torch.load('weights/{}.pt'.format(file), map_location=device) model = model.to(device) model.load_state_dict(model_weights['model_state_dict']) print('Finished loading model!') model.eval() data_generator = load_data(DATADIR, batch_size=batch_size, shuffle=shuffle) val_generator = data_generator[dataset] for i, (X, y) in enumerate(val_generator): imgs = X.to(device) with torch.no_grad(): prediction = model(imgs) pred = torch.argmax(prediction, dim=1).cpu() # convert to right format to save prediciton image image_to_save = torch.squeeze(pred, dim=0).numpy() # get name of prediction image to save csFile = getCsFileInfo(groundTruthImgList[i]) # save the prediction images in the 'results' folder filePattern = "results/{}/{}/{}_{}_{}_pred.png".format( dataset, file, csFile.city, csFile.sequenceNb, csFile.frameNb) # save prediction image cv2.imwrite(filePattern, image_to_save) # if i == 4: # break print('Prediction images saved.') return
def main(arguments): model_choice = arguments.model model_path = arguments.model_path dataset_path = arguments.dataset assert (model_choice == 1 or model_choice == 2 or model_choice == 3), "Invalid choice: Choose among 1, 2, and 3 only." assert os.path.exists( path=model_path), "{} does not exist!".format(model_path) assert os.path.exists( path=dataset_path), "{} does not exist!".format(dataset_path) dataset = np.load(dataset_path) features, labels = load_data(dataset=dataset) labels = one_hot_encode(labels=labels) dataset_size = features.shape[0] print(features.shape) if model_choice == 2: features = np.reshape( features, ( features.shape[0], int(np.sqrt(features.shape[1])), int(np.sqrt(features.shape[1])), ), ) predictions, accuracies = predict( dataset=[features, labels], model=model_choice, model_path=model_path, size=dataset_size, batch_size=256, cell_size=256, ) else: predictions, accuracies = predict( dataset=[features, labels], model=model_choice, model_path=model_path, size=dataset_size, batch_size=256, ) print("Predictions : {}".format(predictions)) print("Accuracies : {}".format(accuracies)) print("Average accuracy : {}".format(np.mean(accuracies)))
def main(config): """The main function.""" # ---------------------------------------- # Load pascal voc datasets print("\n--- Reading PASCAL {} data".format(config.year)) dataset_train = load_data(config.data_dir, config.record_file, config.year, 'train') dataset_val = load_data(config.data_dir, config.record_file, config.year, 'val') # ---------------------------------------- # Create the model yolo = Yolo(config, dataset_train, dataset_val, debug=config.debug) # ---------------------------------------- # Start training try: yolo.train() except tf.errors.OutOfRangeError: pass return
def evaluate(model_weights, model, dataset='val', batch_size=1): DATADIR = 'datasets/citys' '''device''' no_cuda = False use_cuda = not no_cuda and torch.cuda.is_available() device = torch.device('cuda:0' if use_cuda else 'cpu') print('using device:', device) model = model.to(device) model.load_state_dict(model_weights['model_state_dict']) print('Finished loading model!') model.eval() data_generator = load_data(DATADIR, batch_size=batch_size) val_generator = data_generator[dataset] for i, (X, y) in enumerate(val_generator): imgs = X.to(device) mask = y.to(device) with torch.no_grad(): prediction = model(imgs) pred = torch.argmax(prediction, dim=1).cpu() if i == 0: intersection = np.zeros(34, dtype=int) union = np.zeros(34, dtype=int) # calculate intersection and union per batch and add to previous batches intersection, union = calculate_I_and_U(mask, pred, intersection=intersection, union=union) # calculate IoU over full set IoU = calculate_IoU(intersection, union, n_classes=34) IoU_dict, IoU_average = calculate_average_IoU(IoU) print('IoU per class: ') for key, value in IoU_dict.items(): print(key, ' : ', value) print('IoU average for 34 classes: ', IoU_average) IoU_19_average = calculate_IoU_train_classes(IoU) print('IoU average for 19 classes: ', IoU_19_average) return
def count_classes(data_set='train', batch_size=100): '''device''' no_cuda = False use_cuda = not no_cuda and torch.cuda.is_available() device = torch.device('cuda:0' if use_cuda else 'cpu') print('using device:', device) data_generator = load_data('datasets/citys', batch_size=batch_size)[data_set] n_classes = 34 class_amount = {entry: 0 for entry in range(n_classes)} for i, (_, y) in enumerate(data_generator): y = (255 * y).int().to(device) classes, counts = torch.unique(y, return_counts=True) for j in range(len(classes)): class_amount[classes[j].item()] += counts[j].item() print(class_amount) return class_amount
def main(arguments): model_choice = arguments.model model_path = arguments.model_path dataset_path = arguments.dataset assert os.path.exists( path=model_path), '{} does not exists!'.format(model_path) assert os.path.exists( path=dataset_path), '{} does not exists!'.format(dataset_path) dataset = np.load(dataset_path) features, labels = load_data(dataset=dataset) labels = one_hot_encode(labels=labels) dataset_size = features.shape[0] print(features.shape) if model_choice == 2: features = np.reshape( features, (features.shape[0], int(np.sqrt( (features.shape[1]))), int(np.sqrt((features.shape[1]))))) predictions, accuracies = predict(dataset=[features, labels], model=model_choice, model_path=model_path, size=dataset_size, batch_size=256, cell_size=256) else: predictions, accuracies = predict(dataset=[features, labels], model=model_choice, model_path=model_path, size=dataset_size, batch_size=256) print('predictions: {}'.format(predictions)) print('Accuracies: {}'.format(accuracies)) print('Avrage accuracy : {}'.format(np.mean(accuracies)))
import matplotlib.pyplot as plt import seaborn as sns sns.set() import pandas as pd import numpy as np import params import utils.data as dt data = pd.DataFrame() for s in params.symbols: symbol = s['symbol'] df = dt.load_data(params.global_params['db_path'], symbol, to_date=20161231, index_col='date')['close'] data[symbol] = pd.Series(np.cumsum(df.pct_change()), index=df.index) print(data.head()) sns.heatmap(data.corr(), annot=True) plt.show()
# Set Summary writer for Tensorboard tb_logdir = 'runs/' + ts writer = SummaryWriter(log_dir=tb_logdir) # Information to be logged optim_name = re.split(' ', str(optim), maxsplit=1)[0] scheduler_name = str(scheduler).split('.')[3].split(' ')[0] init_lr = optim.param_groups[0]['lr'] # Load Dataset, Dataloaders, etc # Grab the stored Lesk scores # Get and Store WordNet Synsets of each class print(args) print("Training ", model.name) dataset, dataloader, dataset_sizes = load_data(Path(args.data_path), batch_size=batch_size) lesk_scores = pd.read_csv(args.lesk_path) if Path( args.lesk_path) else exhuastive_lesk_simarity_metric(classes) scene_synsets = get_scene_synset_dictionary(classes) # Each epoch has a training and validation phase phases = ['train', 'val'] for epoch in range(epochs): for phase in phases: if phase == 'train': scheduler.step() model.train() # Set model to training mode model = model.to(device) else: model.eval() # Set model to evaluate mode model = model.to(device)
import pandas as pd from utils import data from utils import metrics from sklearn.naive_bayes import GaussianNB # Dataset 1 (Latin letters) # Training trainX, trainY = data.load_data('train_1.csv') clf = GaussianNB() clf.fit(trainX, trainY) # Testing testX, testY = data.load_data('test_with_label_1.csv') predictions = pd.DataFrame(clf.predict(testX)) data.generate_csv(predictions, 'GNB-DS1.csv') metrics.compute(predictions, testY, 'GNB-DS1.csv') data.generate_cm(predictions, testY, 'GNB-DS1.png') # Dataset 2 (Greek letters) # Training trainX, trainY = data.load_data('train_2.csv') clf = GaussianNB() clf.fit(trainX, trainY) # Testing testX, testY = data.load_data('test_with_label_2.csv') predictions = pd.DataFrame(clf.predict(testX)) data.generate_csv(predictions, 'GNB-DS2.csv') metrics.compute(predictions, testY, 'GNB-DS2.csv') data.generate_cm(predictions, testY, 'GNB-DS2.png')
from utils.config import set_gpu from utils.data import BalancedDataGenerator, load_data from utils.model import load_model from utils.plot import make_confusion_matrix from utils.utils import get_acc parser = argparse.ArgumentParser() parser.add_argument('--dataset', type=str, default='chestx') parser.add_argument('--model', type=str, default='inceptionv3') parser.add_argument('--gpu', type=str, default='0') args = parser.parse_args() set_gpu(args.gpu) X_train, X_test, y_train, y_test = load_data( dataset=args.dataset, normalize=True) model = load_model( dataset=args.dataset, nb_class=y_train.shape[1], model_type=args.model, mode='train' ) def step_decay(epoch): lr = 1e-3 if epoch > 45: lr = 1e-5 elif epoch > 40: lr = 1e-4
print("Saving: {}".format(path_embedding_latent)) np.savez(path_embedding_latent, **latent_list) if save_loss: loss_file = "loss_plots/{}_loss_iters_{}_step_{}_{}.npy".format( basename, str(ITERATIONS).zfill(6), str(SAVE_STEP).zfill(4), init) path_loss = os.path.join(SAVING_DIR, loss_file) print("Saving Loss: {}".format(path_loss)) np.save(path_loss, np.array(loss_list)) return loss_list # load images from directory data = u_data.load_data(PATH_DIR) # testing downsampling test_name = 'only_embed' options_lambdas = [0.001, 0.005, 0.01] # for i in range(len(data)): # for lambda_v in options_lambdas: # loss_list = run_optimization(data, id = i, # init = 'w_mean', # sub_fix=f"_{test_name}_lambda_{lambda_v}", # save_loss = True, # lambda_v=lambda_v) condition_function_options = { "colorization": mu.convert2grayscale,
def main(args): # Setting up an experiment config, params = setup(args) # Setting up logger logger = get_logger(config['model_name'], config['dirs']['logs_dir']) # Extracting configurations data_config = config['data'] logs_config = config['logs'] training_config = config['training'] sampling_config = config['sampling'] dirs_config = config['dirs'] logger.info('[SETUP] Experiment configurations') logger.info( f'[SETUP] Experiment directory: {os.path.abspath(dirs_config["exp_dir"])}' ) # Loading the dataset (X_train, len_train), (X_valid, len_valid), (_, _) = load_data( data_config=data_config, step_size=training_config['num_pixels']) logger.info(f'[LOAD] Dataset (shape: {X_train[0].shape})') # Computing beat size in time steps beat_size = float(data_config['beat_resolution'] / training_config['num_pixels']) # Preparing inputs for sampling intro_songs, save_ids, song_labels = prepare_sampling_inputs( X_train, X_valid, sampling_config, beat_size) num_save_intro = len(save_ids) // sampling_config['num_save'] logger.info('[SETUP] Inputs for sampling') # Creating the MultINN model tf.reset_default_graph() model = MultINN(config, params, mode=params['mode'], name=config['model_name']) logger.info('[BUILT] Model') # Building the sampler and evaluator sampler = model.sampler(num_beats=sampling_config['sample_beats']) logger.info('[BUILT] Sampler') evaluator = model.evaluator() logger.info('[BUILT] Evaluator') # Building optimizer and training ops if args.sgd: optimizer = tf.train.GradientDescentOptimizer( learning_rate=training_config['learning_rate']) else: optimizer = tf.train.AdamOptimizer( learning_rate=training_config['learning_rate'], epsilon=1e-4) init_ops, update_ops, metrics, metrics_upd, summaries = model.train_generators( optimizer=optimizer, lr=training_config['learning_rate']) logger.info('[BUILT] Optimizer and update ops') # Extracting placeholders, metrics and summaries placeholders = model.placeholders x, lengths, is_train = placeholders['x'], placeholders[ 'lengths'], placeholders['is_train'] loss = metrics['batch/loss'] loglik, global_loglik = metrics['log_likelihood'], metrics['global'][ 'log_likelihood'] weights_sum, metrics_sum, gradients_sum = summaries['weights'], summaries[ 'metrics'], summaries['gradients'] # TensorFlow Session set up tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True tf.set_random_seed(training_config['random_seed']) np.random.seed(training_config['random_seed']) with tf.Session(config=tf_config) as sess: logger.info('[START] TF Session') with tf.variable_scope('init_global'): init_global = tf.global_variables_initializer() with tf.variable_scope('init_local'): init_local = tf.local_variables_initializer() sess.run([init_global, init_local]) stats = TrainingStats() # Loading the model's weights or using initial weights if not args.from_init: if args.from_last: if model.load(sess, dirs_config['model_last_dir']): last_stats_file = os.path.join( dirs_config['model_last_dir'], 'steps') if os.path.isfile(last_stats_file): stats.load(last_stats_file) logger.info('[LOAD] Training stats file') logger.info( f'[LOAD] Pre-trained weights (last, epoch={stats.epoch})' ) else: logger.info('[LOAD] Initial weights') elif model.load(sess, dirs_config['model_dir']): if os.path.isfile(dirs_config['model_stats_file']): stats.load(dirs_config['model_stats_file']) logger.info('[LOAD] Training file') logger.info( f'[LOAD] Pre-trained weights (best, epoch={stats.epoch})') else: logger.info('[LOAD] Initial weights') # run initialization update if exists if init_ops: sess.run(init_ops, feed_dict={x: X_train[:1600]}) logger.info('[END] Run initialization ops') else: logger.info('[LOAD] Initial weights') if args.encoders and params['encoder']['type'] != 'Pass': encoder_dir = os.path.join(args.encoders, 'ckpt', 'encoders') if model.load_encoders(sess, os.path.join(encoder_dir)): logger.info('[LOAD] Encoders\' weights') else: logger.info('[WARN] Failed to load encoders\' weights') stats.new_run() # Preparing to the training graph = sess.graph if logs_config['save_graph'] else None writer_train = tf.summary.FileWriter( f'{dirs_config["logs_dir"]}/Graph/run_{stats.run}/train', graph) writer_valid = tf.summary.FileWriter( f'{dirs_config["logs_dir"]}/Graph/run_{stats.run}/valid') batch_size = training_config['batch_size'] piece_size = int(training_config['piece_size'] * beat_size) logger.info(f'[START] Training, RUN={stats.run}') ids = np.arange(X_train.shape[0]) # Logging initial weights if logs_config['log_weights_steps'] > 0: writer_train.add_summary(sess.run(weights_sum), stats.steps) logger.info('[LOG] Initial weights') loss_accum = LossAccumulator() # Training on all of the songs `num_epochs` times past_epochs = stats.epoch for epoch in range(past_epochs + 1, past_epochs + training_config['epochs'] + 1): stats.new_epoch() tf.set_random_seed(epoch) np.random.seed(epoch) start = time.time() np.random.shuffle(ids) loss_accum.clear() base_info = f'\r epoch: {epoch:3d} ' for i in range(0, X_train.shape[0], batch_size): for j in range(0, X_train.shape[1], piece_size): len_batch = len_train[ids[i:i + batch_size]] - j non_empty = np.where(len_batch > 0)[0] if len(non_empty) > 0: len_batch = np.minimum(len_batch[non_empty], piece_size) max_length = len_batch.max() songs_batch = X_train[ids[i:i + batch_size], j:j + max_length, ...][non_empty] if logs_config['log_weights_steps'] > 0 \ and (stats.steps + 1) % logs_config['log_weights_steps'] == 0 \ and j + piece_size >= X_train.shape[1]: _, loss_i, summary = sess.run( [update_ops, loss, weights_sum], feed_dict={ x: songs_batch, lengths: len_batch, is_train: True }) writer_train.add_summary(summary, stats.steps + 1) del summary else: _, loss_i = sess.run([update_ops, loss], feed_dict={ x: songs_batch, lengths: len_batch, is_train: True }) del songs_batch loss_accum.update(loss_i) stats.new_step() # Log the progress during training if logs_config[ 'log_loss_steps'] > 0 and stats.steps % logs_config[ 'log_loss_steps'] == 0: info = f' (steps: {stats.steps:5d}) time: {time_to_str(time.time() - start)}' + str( loss_accum) sys.stdout.write(base_info + info) sys.stdout.flush() info = f' (steps: {stats.steps:5d}) time: {time_to_str(time.time() - start)}\n' + str( loss_accum) logger.info(base_info + info) logger.info( f'[END] Epoch training time {time_to_str(time.time() - start)}' ) # Evaluating the model on the training and validation data if logs_config['evaluate_epochs'] > 0 and epoch % logs_config[ 'evaluate_epochs'] == 0: num_eval = X_valid.shape[0] collect_metrics(sess, metrics_upd, data=X_train[:num_eval, ...], data_lengths=len_train[:num_eval, ...], placeholders=placeholders, batch_size=batch_size * 2, piece_size=piece_size) summary, loglik_val, gl_loglik_val = sess.run( [metrics_sum, loglik, global_loglik]) writer_train.add_summary(summary, epoch) del summary logger.info( f'[EVAL] Training set log-likelihood: ' f'gen.={loglik_val:7.3f} enc.={gl_loglik_val:7.3f}') collect_metrics(sess, metrics_upd, data=X_valid, data_lengths=len_valid, placeholders=placeholders, batch_size=batch_size * 2, piece_size=piece_size) summary, loglik_val, gl_loglik_val = sess.run( [metrics_sum, loglik, global_loglik]) writer_valid.add_summary(summary, epoch) del summary logger.info( f'[EVAL] Validation set log-likelihood: ' f'gen.={loglik_val:7.3f} enc.={gl_loglik_val:7.3f}') # Sampling input using the model if logs_config['generate_epochs'] > 0 and epoch % logs_config[ 'generate_epochs'] == 0: samples = generate_music( sess, sampler, intro_songs, placeholders, num_songs=sampling_config['num_songs']) logger.info('[EVAL] Generated music samples') summary_sample = sess.run(evaluator, feed_dict={ x: samples, is_train: False }) writer_train.add_summary(summary_sample, epoch) del summary_sample logger.info('[EVAL] Evaluated music samples') samples_to_save = samples[save_ids] del samples samples_to_save = pad_to_midi(samples_to_save, data_config) # Saving the music if logs_config[ 'save_samples_epochs'] > 0 and epoch % logs_config[ 'save_samples_epochs'] == 0: save_music(samples_to_save, num_intro=num_save_intro, data_config=data_config, base_path=f'{model.name}_e{epoch}', save_dir=dirs_config['samples_dir'], song_labels=song_labels) logger.info('[SAVE] Saved music samples') # Saving the model if the monitored metric decreased if loglik_val < stats.metric_best: stats.update_metric_best(loglik_val) stats.reset_idle_epochs() if logs_config['generate_epochs'] > 0 and epoch % logs_config[ 'generate_epochs'] == 0: save_music(samples_to_save, num_intro=num_save_intro, data_config=data_config, base_path=f'{model.name}_best', save_dir=dirs_config['samples_dir'], song_labels=song_labels) if logs_config[ 'save_checkpoint_epochs'] > 0 and epoch % logs_config[ 'save_checkpoint_epochs'] == 0: model.save(sess, dirs_config['model_dir'], global_step=stats.steps) stats.save(dirs_config['model_stats_file']) logger.info( f'[SAVE] Saved model after {epoch} epoch(-s) ({stats.steps} steps)' ) else: stats.new_idle_epoch() if stats.idle_epochs >= training_config['early_stopping']: # Early stopping after no improvement logger.info( f'[WARN] No improvement after {training_config["early_stopping"]} epochs, quiting' ) save_music(samples_to_save, num_intro=num_save_intro, data_config=data_config, base_path=f'{model.name}_last', save_dir=dirs_config['samples_dir'], song_labels=song_labels) break del samples_to_save logger.info( f'[END] Epoch time {time_to_str(time.time() - start)}') if not args.save_best_only: model.save(sess, dirs_config['model_last_dir'], global_step=stats.steps) stats.save(os.path.join(dirs_config['model_last_dir'], 'steps')) logger.info( f'[SAVE] Saved model after {epoch} epoch(-s) ({stats.steps} steps)' ) writer_train.close() writer_valid.close()
import numpy as np from hmmlearn.hmm import GaussianHMM import utils.data as dt import params from matplotlib import cm, pyplot as plt from matplotlib.dates import YearLocator, MonthLocator np.random.seed(123) ''' Input parameters ''' symbol = 'GOOG' look_back = 15 #15 look_ahead = 1 ''' Loading data ''' df = dt.load_data(params.global_params['db_path'], symbol, from_date=20100101, to_date=20110101, index_col='date') df = df[['open', 'high', 'low', 'close', 'volume']] dates = df.index.values close_v = df['close'].values volume = df['volume'].values #_log_returns = np._log_returns(close_v) _log_returns = np.diff(np.log(close_v)) dates = dates[1:] close_v = close_v[1:] volume = volume[1:] #
elif current_argument in ("--adv"): print("training the shared model with adv loss function") ADV_LOSS = True elif current_argument in ("--firstshared"): ADV_LOSS = False SHARE_FIRST = True if LABEL_OF_ALL_1 and NON_NEG_GRAD: raise Exception("ONLY ONE OF THE TWO CAN BE TRUE: LABEL_OF_ALL_1, NON_NEG_GRAD") if __name__ == "__main__": # load data data = load_data(DATASET_NAME, N_CLUSTERS) complete_data = get_complete_data(DATASET_NAME, N_CLUSTERS, LEAVE_INTRA_CLUSTERS) # extact the informations stored into data adjs, features_, tests, valids, clust_to_node, node_to_clust, com_idx_to_clust_idx = data # get the false edges and save them for each couple of clusters complete_train_matrix, _, complete_test_matrix, complete_valid_matrix = complete_data # turn the dictionary into a list of features features = [features_[i] for i in range(len(features_))] train_edges = get_edges_formatted(complete_train_matrix, clust_to_node, N_CLUSTERS) valid_edges = get_edges_formatted(complete_valid_matrix, clust_to_node, N_CLUSTERS) test_edges = get_edges_formatted(complete_test_matrix, clust_to_node, N_CLUSTERS)
def main(cfg: DictConfig): print('Nishika Second-hand Apartment Price Training') cur_dir = hydra.utils.get_original_cwd() os.chdir(cur_dir) data_dir = './input' seed_everything(cfg.data.seed) experiment = Experiment(api_key=cfg.exp.api_key, project_name=cfg.exp.project_name, auto_output_logging='simple', auto_metric_logging=False) experiment.log_parameters(dict(cfg.data)) # Config #################################################################################### del_tar_col = ['取引時点'] id_col = 'ID' tar_col = '取引価格(総額)_log' g_col = 'year' criterion = MAE cv = KFold(n_splits=cfg.data.n_splits, shuffle=True, random_state=cfg.data.seed) # cv = GroupKFold(n_splits=5) # Load Data #################################################################################### if cfg.exp.use_pickle: # pickleから読み込み df = unpickle('./input/data.pkl') else: df = load_data(data_dir, sampling=cfg.data.sampling, seed=cfg.data.seed, id_col=id_col, target_col=tar_col) # Preprocessing print('Preprocessing') df = preprocessing(df, cfg) # pickle形式で保存 to_pickle('./input/data.pkl', df) try: experiment.log_asset(file_data='./input/data.pkl', file_name='data.pkl') except: pass features = [c for c in df.columns if c not in del_tar_col] # Model #################################################################################### model = None if cfg.exp.model == 'lgb': model = LGBMModel(dict(cfg.lgb)) elif cfg.exp.model == 'cat': model = CatBoostModel(dict(cfg.cat)) # Train & Predict ############################################################################## trainer = Trainer(model, id_col, tar_col, g_col, features, cv, criterion, experiment) trainer.fit(df) trainer.predict(df) trainer.get_feature_importance()
DATASET = "chipwhisperer" # ascad_fixed, ascad_variable, ches_ctf, chipwhisperer TARGET_BYTE = 0 NUM_OF_FEATURES = 100 FEATURE_SPACING = 1 GE_NUMBER_OF_EXPERIMENTS = 100 GE_NUMBER_OF_TRACES = 10 LEAKAGE_MODEL = LeakageModel.HW # intermediate, HW ######################################################################## ############################## Load data ############################### ######################################################################## train, test = load_data(DATA_ROOT / DATASET, TARGET_BYTE) (tracesTrain, ptTrain, keyTrain) = train (tracesTest, ptTest, keyTest) = test # X = (traces | plain_text) # y = key X_train = np.hstack((tracesTrain, ptTrain.reshape(-1, 1))) y_train = keyTrain X_test = np.hstack((tracesTest, ptTest.reshape(-1, 1))) y_test = keyTest ######################################################################## ############################## Profiling ############################### ########################################################################
central), "ocean": determine_path("ocean", config, glacier_name, central) if config["ocean_PATH"] else None } except FileNotFoundError as e: if "data path not exists" in str(e): print(str(e)) continue else: traceback.print_exc() sys.exit() try: x_all, y_all = load_data(glacier_name, logger=logger, use_summary=config["use_summary"], use_pca=config["use_pca"], n=config["n"], **path_dict) target_shape = 1 if config["combine"]: test_size = int(len(y_all) / 3) % 7 if first: (x_combine_train, x_combine_test, y_combine_train, y_combine_test) = train_test_split( x_all, y_all, test_size=test_size) first = False else: (x_train, x_test, y_train, y_test) = train_test_split(x_all, y_all, test_size=test_size)
def train_model(symbol='C', look_back=5, look_ahead=1, train_size=0.95, plot=True): np.random.seed(123) ''' Input parameters ''' input_fields = [0, 1, 2, 3, 4] # open, high, low, close, volume output_fields = [5] # returns ''' Internal parameters''' saved_models_dir = params.global_params['models_dir'] save_models = bool(params.global_params['save_models']) ''' Hyper parameters ''' epochs = 100 validation_split = 0.05 # part of the training set ''' Loading data ''' df = dt.load_data(params.global_params['db_path'], symbol, index_col='date') df = df[['open', 'high', 'low', 'close', 'volume']] df = df.join(pd.Series(pd.Series(df['close'].diff(1), name='returns'))) ''' Preparing data ''' c_w = df['close'].rolling(center=False, window=look_back) c_mean = c_w.mean() c_std = c_w.std() df['close'] = (df['close'] - c_mean) / (2 * c_std) df['open'] = (df['open'] - c_mean) / (2 * c_std) df['high'] = (df['high'] - c_mean) / (2 * c_std) df['low'] = (df['low'] - c_mean) / (2 * c_std) df['returns'] = df['returns'].fillna(0) df['returns'] = np.where(df['returns'].values > 0, 0, 1) # 0 upward # 1 downward df['returns'] = df['returns'].shift(-look_ahead) v_w = df['volume'].rolling(center=False, window=look_back) v_mean = v_w.mean() v_std = v_w.std() df['volume'] = (df['volume'] - v_mean) / (v_std) df = df[look_back:] ''' Inline data as input parameters ''' data = df.values x_data = [] y_data = [] for index in range(data.shape[0] - look_back): x_data.append( np.reshape(data[index:index + look_back, input_fields], (look_back * len(input_fields), 1))) y_data.append( np.reshape(data[index, output_fields], (len(output_fields), 1))) x_data = np.array(x_data) y_data = np.array(y_data) train_rows = int(round(x_data.shape[0] * train_size)) x_close_train = x_data[:train_rows] y_train = y_data[:train_rows] x_close_test = x_data[train_rows:] y_test = y_data[train_rows:] y_train = y_train.astype(int) y_train = np.reshape(y_train, (y_train.shape[0])) y_train = dt.onehottify(y_train, dtype=float) y_test = y_test.astype(int) y_test = np.reshape(y_test, (y_test.shape[0])) y_test = dt.onehottify(y_test, dtype=float) x_close_train = np.reshape( x_close_train, (x_close_train.shape[0], x_close_train.shape[1])) x_close_test = np.reshape(x_close_test, (x_close_test.shape[0], x_close_test.shape[1])) ''' Build model ''' model_file = saved_models_dir + 'model.' + symbol + '.json' if (save_models and os.path.exists(model_file)): json_file = open(model_file, 'r') loaded_model_json = json_file.read() json_file.close() model = model_from_json(loaded_model_json) else: model = Sequential() model.add( Dense(100, activation='relu', input_shape=(x_close_train.shape[1], ))) model.add(Dropout(0.3)) model.add(Dense(2, activation='softmax')) model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy']) model_json = model.to_json() with open(model_file, "w") as json_file: json_file.write(model_json) json_file.close() ''' Train model ''' model_weights = saved_models_dir + 'model.' + symbol + '.h5' history = None if (save_models and os.path.exists(model_weights)): model.load_weights(model_weights) else: history = model.fit( x_close_train, y_train, epochs=epochs, #callbacks=[utils.plot_learning.plot_learning], validation_split=validation_split #validation_data=(x_close_test, y_test) ) if (save_models and not os.path.exists(model_weights)): model.save_weights(model_weights) print("Saved model to disk") ''' Predictions on test set (different from validation set) ''' predictions = model.predict(x_close_test) tmp = predictions * y_test tmp = np.sum(tmp, axis=1) tmp = np.where(tmp > 0.5, 1, 0) accuracy = np.sum(tmp) / len(tmp) print(symbol, accuracy) ''' Print model output ''' if (plot and not history is None): print(history.history.keys()) plt.figure(1) plt.subplot(211) plt.plot(history.history['acc']) plt.plot(history.history['val_acc']) plt.title('model accuracy') plt.ylabel('accuracy') plt.xlabel('epoch') plt.legend(['train', 'test'], loc='upper left') #plt.show() # summarize history for loss plt.subplot(212) plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.title('model loss') plt.ylabel('loss') plt.xlabel('epoch') plt.legend(['train', 'test'], loc='upper left') plt.show()
def join(path: str): """join all csv to one""" tables = [load_data(file) for file in os.listdir(path) if file.endswith(".csv") and file.startswith("cica-")] df = join_tables(tables) df.to_csv(f"result-cica-{datetime.now():%Y%m%d-%H%M}.csv", index=False, sep=";")
# set random behavior rng = check_random_state(args.seed) # load model configuration model = select_model(args.model) # prepare output directory data_name = os.path.basename(os.path.normpath(args.dataset)) out_dir = os.path.join('params', 'membership', model.name + '_' + data_name + '_weak') if not os.path.exists(out_dir): os.makedirs(out_dir) # load data: playlists, splits, features and artist info data = load_data(args.dataset, args.msd, model) playlists_coo, split_weak, split_strong, features, song2artist = data # playlists_coo are the playlists stored in coordinate format playlists_idx, songs_idx, position, idx2song = playlists_coo # split_strong defines a playlist-disjoint split # this is just to validate the model, use any disjoint split fold_strong = split_strong[0] train_idx_dsj, test_idx_dsj = np.hstack(fold_strong[:2]), fold_strong[2] # split_weak provides a query/continuation split query_idx, cont_idx = np.hstack(split_weak[:2]), split_weak[2] # define splits for this experiment # train model on intersection of disjoint training split and queries
np.random.seed(123) ''' Input parameters ''' symbol = 'C' look_back = 5; look_ahead = 1 train_size=0.95 input_fields=[0, 1, 2, 3, 4, 6] # open, high, low, close, volume output_fields=[5] # returns ''' Hyper parameters ''' epochs = 100 validation_split =0.05 # part of the training set ''' Loading data ''' df = dt.load_data(params.global_params['db_path'], symbol, index_col='date') df = df[['open', 'high', 'low', 'close', 'volume']] print('df[high].at[0]', df['high'].iat[0]) df = ta.myRSI(df, 5) df = df.join(pd.Series(df['close'].diff(1), name='returns')) ''' Preparing data ''' c_w = df['close'].rolling(center=False, window=look_back) c_mean = c_w.mean() c_std = c_w.std() stds = 2. df = df.join(pd.Series((df['close'] - c_mean)/(stds*c_std), name='bb')) df['open'] = (df['open'] - df['close'].shift(1)) / c_std
from utils.model import load_model from utils.plot import make_adv_img, make_confusion_matrix from utils.utils import get_targeted_success_rate, set_art parser = argparse.ArgumentParser() parser.add_argument('--dataset', type=str, default='chestx') parser.add_argument('--model', type=str, default='inceptionv3') parser.add_argument('--norm', type=str, default='l2') parser.add_argument('--eps', type=float, default=0.04) parser.add_argument('--target', type=str, default='PNEUMONIA') parser.add_argument('--gpu', type=str, default='0') args = parser.parse_args() set_gpu(args.gpu) X_train, X_test, y_train, y_test, mean_l2_train, mean_linf_train = load_data( dataset=args.dataset, normalize=True, norm=True) model = load_model(dataset=args.dataset, nb_class=y_train.shape[1], model_type=args.model, mode='inference') # # Generate adversarial examples classifier, norm, eps = set_art(model, args.norm, args.eps, mean_l2_train, mean_linf_train) adv_crafter = TargetedUniversalPerturbation(classifier, attacker='fgsm', delta=0.000001, attacker_params={