def main(): log_path = __file__[:-3] init_run(log_path, 2021) device = torch.device('cuda') config = get_gowalla_config(device) dataset_config, model_config, trainer_config = config[6] dataset_config['path'] = dataset_config['path'][:-4] + '0_dropui' writer = SummaryWriter(log_path) dataset = get_dataset(dataset_config) model = get_model(model_config, dataset) trainer = get_trainer(trainer_config, dataset, model) trainer.train(verbose=True, writer=writer) writer.close() dataset_config['path'] = dataset_config['path'][:-7] new_dataset = get_dataset(dataset_config) model.config['dataset'] = new_dataset model.n_users, model.n_items = new_dataset.n_users, new_dataset.n_items model.feat_mat, _, _, model.row_sum = model.generate_feat(new_dataset, is_updating=True) model.update_feat_mat() trainer = get_trainer(trainer_config, new_dataset, model) trainer.inductive_eval(dataset.n_users, dataset.n_items)
def train(): parser = argparse.ArgumentParser(description='Which model to run') parser.add_argument('m', default=0, type=int, nargs='?', help='model number') args = parser.parse_args() print(f'running model {args.m}') if args.m == 4: model = get_model4(walk_length=20, nr_walks=20, feature_length=12) else: model = get_model(walk_length=20, nr_walks=20, feature_length=12) ds_train = get_dataset(path=savedir, walk_length=20, nr_walks=20, feature_length=12, train=True) ds_val = get_dataset(path=savedir, walk_length=20, nr_walks=20, feature_length=12, train=False) model.compile(loss=[tf.keras.losses.MeanSquaredError(name='mse')], optimizer='adam', metrics=['accuracy', 'mse']) hist = model.fit(x=ds_train, epochs=100, validation_data=ds_val, verbose=2) model.save("my_model")
def train(config): # train_path:train-context.json args = config.args train_set = get_dataset(config.train_path, config.w2i_vocabs, config, is_train=True) dev_set = get_dataset(config.dev_path, config.w2i_vocabs, config, is_train=False) # X:img,torch.stack; train_batch = get_dataloader(train_set, args.batch_size, is_train=True) model = Model(n_emb=args.n_emb, n_hidden=args.n_hidden, vocab_size=args.vocab_size, dropout=args.dropout, d_ff=args.d_ff, n_head=args.n_head, n_block=args.n_block) if args.restore != '': model_dict = torch.load(args.restore) model.load_state_dict(model_dict) model.to(device) optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr) best_score = -1000000 for i in range(args.epoch): model.train() report_loss, start_time, n_samples = 0, time.time(), 0 count, total = 0, len(train_set) // args.batch_size + 1 for batch in train_batch: Y, T = batch Y = Y.to(device) T = T.to(device) optimizer.zero_grad() loss = model(Y, T) loss.backward() optimizer.step() report_loss += loss.item() #break n_samples += len(Y.data) count += 1 if count % args.report == 0 or count == total: print('%d/%d, epoch: %d, report_loss: %.3f, time: %.2f' % (count, total, i + 1, report_loss / n_samples, time.time() - start_time)) score = eval(model, dev_set, args.batch_size) model.train() if score > best_score: best_score = score save_model(os.path.join(args.dir, 'best_checkpoint.pt'), model) else: save_model(os.path.join(args.dir, 'checkpoint.pt'), model) report_loss, start_time, n_samples = 0, time.time(), 0 return model
def main(): log_path = __file__[:-3] init_run(log_path, 2021) device = torch.device('cuda') config = get_gowalla_config(device) dataset_config, model_config, trainer_config = config[3] dataset_config['path'] = dataset_config['path'][:-4] + '0_dropit' dataset = get_dataset(dataset_config) model = get_model(model_config, dataset) dataset_config['path'] = dataset_config['path'][:-7] new_dataset = get_dataset(dataset_config) model.config['dataset'] = new_dataset trainer = get_trainer(trainer_config, new_dataset, model) results, _ = trainer.eval('test') print('Previous interactions test result. {:s}'.format(results)) data_mat = sp.coo_matrix((np.ones((len( new_dataset.train_array), )), np.array(new_dataset.train_array).T), shape=(new_dataset.n_users, new_dataset.n_items), dtype=np.float32).tocsr() model.data_mat = data_mat results, _ = trainer.eval('test') print('Updated interactions test result. {:s}'.format(results))
def get_data(dataset_index, t0=-150, t1=100, debug=False,\ Nsmooth=2, smoothing=None): # loading data print(get_dataset()[dataset_index]) delay = 0 #get_dataset()[dataset_index]['delay'] f = loadmat(get_dataset()[dataset_index]['filename']) data = 1e3 * f['matNL'][0]['stim1'][0] data[np.isnan(data)] = 0 # blanking infinite data time = f['matNL'][0]['time'][0].flatten() space = f['matNL'][0]['space'][0].flatten() if smoothing is None: smoothing = np.ones((Nsmooth, Nsmooth)) / Nsmooth**2 smooth_data = convolve2d(data, smoothing, mode='same') # smooth_data = data # REMOVE DATA SMOOTHING # apply time conditions cond = (time > t0 - delay) & (time < t1 - delay) new_time, new_data = np.array(time[cond]), np.array(smooth_data[:, cond]) # get onset time tmax = get_time_max(new_time, new_data, debug=debug) x_center = get_stim_center(new_time, space, new_data, debug=debug, tmax=tmax) return new_time - tmax, space - x_center, new_data
def main(): log_path = __file__[:-3] init_run(log_path, 2021) device = torch.device('cuda') config = get_gowalla_config(device) dataset_config, model_config, trainer_config = config[5] dataset_config['path'] = dataset_config['path'][:-4] + '0_dropit' writer = SummaryWriter(log_path) dataset = get_dataset(dataset_config) model = get_model(model_config, dataset) trainer = get_trainer(trainer_config, dataset, model) trainer.train(verbose=True, writer=writer) writer.close() dataset_config['path'] = dataset_config['path'][:-7] new_dataset = get_dataset(dataset_config) model.config['dataset'] = new_dataset trainer = get_trainer(trainer_config, new_dataset, model) results, _ = trainer.eval('test') print('Previous interactions test result. {:s}'.format(results)) model.normalized_data_mat = model.get_data_mat(new_dataset) results, _ = trainer.eval('test') print('Updated interactions test result. {:s}'.format(results))
def get_data_loaders_new(args, tokenizer): train_data = get_dataset(tokenizer, args.train_path, args.fea_path, n_history=args.max_history) valid_data = get_dataset(tokenizer, args.valid_path, args.fea_path, n_history=args.max_history) train_dataset = AVSDDataSet(train_data[0], tokenizer, (train_data[1], valid_data[1]), drop_rate=0, train=True) valid_dataset = AVSDDataSet(valid_data[0], tokenizer, (valid_data[1], train_data[1]), drop_rate=0, train=False) train_loader = DataLoader(train_dataset, shuffle=(not args.distributed), batch_size=args.train_batch_size, num_workers=4, collate_fn=lambda x: collate_fn( x, tokenizer.pad_token_id, features=True)) valid_loader = DataLoader(valid_dataset, shuffle=False, batch_size=args.valid_batch_size, num_workers=4, collate_fn=lambda x: collate_fn( x, tokenizer.pad_token_id, features=True)) return train_loader, valid_loader
def test_ClfDset(): config = read_yaml("./tests/config/arcface.yaml") dset_config = read_yaml("./tests/config/mnist.yaml") dataset = get_dataset(config, dset_config, mode="train") dataset = get_dataset(config, dset_config, mode="valid") dataset = get_dataset(config, dset_config, mode="valid")
def test_MnistDset(): config = read_yaml("./config/clf.yaml") dset_config = read_yaml("./config/mnist.yaml") dataset = get_dataset(config, dset_config, mode="train") dataset = get_dataset(config, dset_config, mode="valid") dataset = get_dataset(config, dset_config, mode="valid")
def main(data_path, model_path, idtable_path, step, split): if split == 'Valid': dataset = get_dataset(data_path, "valid_data.csv", vcc18=True, valid=True, idtable=idtable_path) elif split == 'Test': dataset = get_dataset(data_path, "testing_data.csv", vcc18=True, valid=True, idtable=idtable_path) dataloader = get_dataloader(dataset, batch_size=20, num_workers=1, shuffle=False) model = MBNet(num_judges=5000).to(device) model.load_state_dict(torch.load(model_path)) lamb = 4 valid(model, dataloader, step, split, lamb)
def main(): log_path = __file__[:-3] init_run(log_path, 2021) device = torch.device('cuda') config = get_gowalla_config(device) dataset_config, model_config, trainer_config = config[3] dataset_config['path'] = dataset_config['path'][:-4] + '0_dropui' dataset = get_dataset(dataset_config) model = get_model(model_config, dataset) dataset_config['path'] = dataset_config['path'][:-7] new_dataset = get_dataset(dataset_config) model.config['dataset'] = new_dataset model.n_users, model.n_items = new_dataset.n_users, new_dataset.n_items data_mat = sp.coo_matrix((np.ones((len( new_dataset.train_array), )), np.array(new_dataset.train_array).T), shape=(new_dataset.n_users, new_dataset.n_items), dtype=np.float32).tocsr() model.data_mat = data_mat sim_mat = model.sim_mat.tocoo() sim_mat = sp.coo_matrix((sim_mat.data, (sim_mat.row, sim_mat.col)), shape=(new_dataset.n_items, new_dataset.n_items)) model.sim_mat = sim_mat.tocsr() trainer = get_trainer(trainer_config, new_dataset, model) trainer.inductive_eval(dataset.n_users, dataset.n_items)
def __init__(self, config): super(Trainer, self).__init__() self.use_cuda = torch.cuda.is_available() self.device = 'cuda' if self.use_cuda else 'cpu' # self.device ='cuda:1' # model self.modef = config['model'] self.model = get_model(config) self.input_dims = config['input_dims'] self.z_dims = config['z_dims'] self.prior = distributions.MultivariateNormal(torch.zeros(self.z_dims), torch.eye(self.z_dims)) # train self.max_iter = config['max_iter'] self.global_iter = 1 self.mseWeight = config['mse_weight'] self.lr = config['lr'] self.beta1 = config['beta1'] self.beta2 = config['beta2'] self.optim = optim.Adam(self.model.parameters(), lr=self.lr, betas=(self.beta1, self.beta2)) self.implicit = 'implicit' in config and config['implicit'] if self.implicit: self.train_inst = self.implicit_inst # saving self.ckpt_dir = config['ckpt_dir'] os.makedirs(self.ckpt_dir, exist_ok=True) self.ckpt_name = config['ckpt_name'] self.save_output = config['save_output'] self.output_dir = config['output_dir'] os.makedirs(self.output_dir, exist_ok=True) # saving if config['cont'] and self.ckpt_name is not None: self.load_checkpoint(self.ckpt_name) self.meta = defaultdict(list) self.gather_step = config['gather_step'] self.display_step = config['display_step'] self.save_step = config['save_step'] # data self.dset_dir = config['dset_dir'] self.dataset = config['dataset'] self.data_type = config['data_type'] if self.data_type == 'linear': self.draw_reconstruction = self.linear_reconstruction self.draw_generated = self.linear_generated self.visualize_traverse = self.linear_traverse self.traversal = self.linear_traversal self.batch_size = config['batch_size'] self.img_size = 32 if 'image_size' not in config else config[ 'image_size'] self.data_loader = get_dataset(config) self.val_loader = get_dataset(config, train=False)
def get_dataset(params): parameter = params.get('parameter', None) if isinstance(parameter, list): return dataset.get_dataset(*parameter) elif isinstance(parameter, dict): return dataset.get_dataset(**parameter) elif parameter is not None: return dataset.get_dataset(parameter) return dataset.get_dataset()
def test_build_triplet(): config = read_yaml("./tests/config/triplet.yaml") dset_config = read_yaml("./tests/config/mnist.yaml") dset = get_dataset(config, dset_config, mode="train") valid_dset = get_dataset(config, dset_config, mode="valid") model = get_model(config, dset_config) trainer = get_trainer(config, dset_config)
def test_build_arcface(): config = read_yaml("./tests/config/arcface.yaml") dset_config = read_yaml("./tests/config/mnist.yaml") dset = get_dataset(config, dset_config, mode="train") valid_dset = get_dataset(config, dset_config, mode="valid") model = get_model(config, dset_config) trainer = get_trainer(config, dset_config) trainer.train(dataset=dset, valid_dataset=valid_dset, model=model)
def _make_input(self): train_dataset = get_dataset(self.vocab.word2idx, self.train_tfrecord_file, self.train_size, repeat_num=self.num_epochs, shuffle_bufer=1000, prefetch=1000) valid_dataset = get_dataset(self.vocab.word2idx, self.valid_tfrecord_file, self.valid_size, repeat_num=-1, shuffle_bufer=1000) test_dataset = get_dataset(self.vocab.word2idx, self.test_tfrecord_file, self.test_size, repeat_num=1, shuffle_bufer=1000) self.train_iterator = train_dataset.make_initializable_iterator() self.valid_iterator = valid_dataset.make_initializable_iterator() self.test_iterator = test_dataset.make_initializable_iterator() data_iter = tf.data.Iterator.from_string_handle( self.data_handle, train_dataset.output_types, train_dataset.output_shapes) batch_data = data_iter.get_next() self.passage = tf.cast(batch_data["passage"], tf.int32, name="passage") self.query = tf.cast(batch_data["query"], tf.int32, name="query") self.answer = tf.cast(batch_data['answer'], tf.int32, name="query") self.passage_len = tf.cast(batch_data["passage_len"], tf.int32, name="passage_len") self.query_len = tf.cast(batch_data["query_len"], tf.int32, name="query_len") self.answer_len = tf.cast(batch_data["answer_len"], tf.int32, name="answer_len") self.query_id = tf.cast(batch_data["query_id"], tf.int32, name="query_id") # make labels and predict self.alter0 = tf.cast(batch_data["alter0"], tf.int32, name="alter0") self.alter1 = tf.cast(batch_data["alter1"], tf.int32, name="alter1") self.alter2 = tf.cast(batch_data["alter2"], tf.int32, name="alter2") self.alter0_len = tf.cast(batch_data["alter0_len"], tf.int32, name="alter0_len") self.alter1_len = tf.cast(batch_data["alter1_len"], tf.int32, name="alter1_len") self.alter2_len = tf.cast(batch_data["alter2_len"], tf.int32, name="alter2_len")
def run_trial(self, trial, data_dir, num_val_batches, objective, *args, **kwargs): hp = trial.hyperparameters model = self.hypermodel.build(trial.hyperparameters) num_epochs = kwargs.get('num_epochs') batch_size = model.batch_size seq_len = model.seq_len overlap = model.big_frame_size q_type = 'mu-law' q_levels = 256 (train_split, val_split) = get_dataset_filenames_split( data_dir, num_val_batches * model.batch_size ) # Train, Val and Test Datasets train_dataset = get_dataset(train_split, num_epochs, batch_size, seq_len, overlap, drop_remainder=True, q_type=q_type, q_levels=q_levels) val_dataset = get_dataset(val_split, 1, batch_size, seq_len, overlap, shuffle=False, drop_remainder=True, q_type=q_type, q_levels=q_levels) # Get subseqs per batch... samples0, _ = librosa.load(train_split[0], sr=None, mono=True) steps_per_batch = int(np.floor(len(samples0) / float(seq_len))) # Get subseqs per epoch... steps_per_epoch = len(train_split) // batch_size * steps_per_batch # Train... history = model.fit( train_dataset, epochs=num_epochs, steps_per_epoch=steps_per_epoch, shuffle=False, validation_data=val_dataset ) # See https://github.com/keras-team/keras-tuner/blob/master/kerastuner/engine/multi_execution_tuner.py#L95 metrics = collections.defaultdict() for metric, epoch_values in history.history.items(): if self.oracle.objective.direction == 'min': best_value = np.min(epoch_values) else: best_value = np.max(epoch_values) metrics[metric] = best_value oracle_metrics_dict = {objective: metrics[objective]} # If we completely override run_trial we need to call this at the end. # See https://keras-team.github.io/keras-tuner/documentation/tuners/#run_trial-method_1 self.oracle.update_trial(trial.trial_id, oracle_metrics_dict) self.save_model(trial.trial_id, model)
def plot_response(args): fig, ax = plt.subplots(1, figsize=(4.7, 3)) fig.suptitle(get_dataset()[args.data_index]['filename']) plt.subplots_adjust(bottom=.23, top=.9, right=.84, left=.25) print(get_dataset()[args.data_index]) f = loadmat(get_dataset()[args.data_index]['filename']) data = 1e3 * f['matNL'][0]['stim1'][0] time = f['matNL'][0]['time'][0].flatten() + args.tshift print(time[-1] - time[0]) space = f['matNL'][0]['space'][0].flatten() if args.Nsmooth > 0: smoothing = np.ones((args.Nsmooth, args.Nsmooth)) / args.Nsmooth**2 smooth_data = convolve2d(data, smoothing, mode='same') else: smooth_data = data cond = (time > args.t0) & (time < args.t1) c = ax.contourf(time[cond], space, smooth_data[:,cond],\ np.linspace(smooth_data.min(), smooth_data.max(), args.Nlevels), cmap=cm.viridis) plt.colorbar(c, label='VSD signal ($\perthousand$)', ticks=args.vsd_ticks) x1, x2 = ax.get_xlim() ax.plot([x1, x1], [0, 2], '-', color='gray', lw=4) ax.annotate('2mm', (x1, 2), rotation=90, fontsize=14) y1, y2 = ax.get_ylim() ax.plot([x1, x1 + 50], [y1, y1], '-', color='gray', lw=4) ax.annotate('50ms', (x1 + 20, y1 + .5), fontsize=14) if args.with_onset_propag: tt, xx = find_latencies_over_space_simple(time, space, smooth_data[:,cond], signal_criteria=args.signal_criteria,\ amp_criteria=args.amp_criteria) plt.plot(tt + args.tshift, xx, 'o', lw=0, ms=1, color='k') # for intervals in [[0,2.3], [2.5,5.7], [5.9,8.5]]: # cond = (xx>intervals[0]) & (xx<intervals[1]) & (tt<20) # pol = np.polyfit(xx[cond], tt[cond]+100, 1) # xxx = np.linspace(xx[cond][0], xx[cond][-1]) # plt.plot(np.polyval(pol, xxx), xxx, 'w--', lw=2) # set_plot(ax, ['bottom'], yticks=[], xlabel='time (ms)') set_plot(ax, xlabel='time (ms)', ylabel='space (mm)') if args.SAVE: fig.savefig('/Users/yzerlaut/Desktop/temp.svg') else: show()
def pp_trans_d_for_model(name): dataset = ds.get_dataset(ds.TO_TRANS_D.get('type'), name) size = dataset.datasize() # 创建数据索引 index_list = [i for i in range(size)] # 数据索引打乱 random.shuffle(index_list) train_size = int(size * 0.6) valid_size = int(size * 0.2) test_size = int(size * 0.2) X = dataset.X.as_matrix() Y = dataset.Y.as_matrix() # 剔除日期字段 X = X[:, 1:] # 改变数据的dtype X = X.astype('float') # 将Y 转换成int类别 # 由于tensorflow 接受的类别标签必须是 大于0的数 所以对Y值转成int之后再 +10 # Y = np.asarray(list(map(lambda x: int(x) + 10, Y))) Y = np.asarray(list(map(float, Y))) Y = uniform_distribution(Y, 21, 'n') print("Y shape is %s" % str(Y.shape)) # 生成训练集 train_X = X[index_list[:train_size]] train_Y = Y[index_list[:train_size]] # 生成验证集 valid_X = X[index_list[train_size:train_size + valid_size]] valid_Y = Y[index_list[train_size:train_size + valid_size]] # 生成测试集 test_X = X[index_list[train_size + valid_size:]] test_Y = Y[index_list[train_size + valid_size:]] print("orig X shape %s" % (str(X.shape))) print("orig Y shape %s" % (str(Y.shape))) print("train X shape %s" % (str(train_X.shape))) print("train Y shape %s" % (str(train_Y.shape))) print("valid X shape %s" % (str(valid_X.shape))) print("valid Y shape %s" % (str(valid_Y.shape))) print("test X shape %s" % (str(test_X.shape))) print("test Y shape %s" % (str(test_Y.shape))) return { "train_X": train_X, "train_Y": train_Y, "valid_X": valid_X, "valid_Y": valid_Y, "test_X": test_X, "test_Y": test_Y }
def predict_svd_90(): """ Description : Retains 90% of the energy in terms of singular values of the 'sigma' matrix obtained after SVD decomposition and performs reconstruction of the original matrix using these singular values. Parameter(s): Return: pred : A list of lists with each list of the form : [userid, prediction,itemid] y : The actual ratings given by the users in the test set """ ratings = get_dataset() U,S,Vt = svd(ratings,1) print(U.shape,S.shape,Vt.shape) total = 0 for i in range(S.shape[0]): total += S[i,i]*S[i,i] so_far = 0 ind = 0 for i in range(S.shape[0]): so_far += S[i,i]*S[i,i] if so_far/total > 0.9: ind = i break U = U[:,:(ind+1)] Vt = Vt[:(ind+1)] S = S[:(ind+1)] S = S[:,:(ind+1)] print(U.shape,S.shape,Vt.shape) pred,y = predict_svd(U,S,Vt) return pred,y
def train_validate(model, hp, args): pm = get_model_pretrained() for p in pm.parameters(): p.requires_grad = False ptp = layer_vectors(pm, args.device, True) del pm trainset, validset, validset_subjects, class_weights = get_dataset( args.dataroot) class_weights = class_weights.to(args.device) train_loader = DataLoader(trainset, batch_size=args.batch_size, num_workers=6, shuffle=True, drop_last=True) valid_loader = DataLoader(validset, batch_size=args.batch_size, num_workers=6, shuffle=False) lmbdas = sorted([ 10**hp[key].value for key in ['lmbda0', 'lmbda1', 'lmbda2', 'lmbda3', 'lmbda4', 'lmbda5'] ], reverse=True) opt = torch.optim.Adam([ { 'params': model.get_params('layer0'), 'lr': lmbdas[0] }, { 'params': model.get_params('layer1'), 'lr': lmbdas[1] }, { 'params': model.get_params('layer2'), 'lr': lmbdas[2] }, { 'params': model.get_params('layer3'), 'lr': lmbdas[3] }, { 'params': model.get_params('layer4'), 'lr': lmbdas[4] }, { 'params': model.get_params('cls'), 'lr': lmbdas[5] }, ]) train(model, opt, args.steps, train_loader, class_weights, ptp, lmbdas, args.device) valid_loss, cm, auc, prec, rec, f1 = evaluate(model, valid_loader, class_weights, args.device) return f1
def train(self, stop_width, save_folder, tf_folder, start_width, num_samples): print('Number of devices: {}'.format( self.strategy.num_replicas_in_sync), flush=True) start_res = math.log(start_width, 2) stop_res = math.log(stop_width, 2) # check if multiple of 2 resolutions = [2**x for x in np.arange(2, stop_res + 1)] for i, resolution in enumerate(resolutions): print('Processing step {}: resolution {} with max resolution {}'. format(i, resolution, resolutions[-1]), flush=True) self.add_resolution() batch_size = self.get_batchsize() global_batch_size = batch_size * self.strategy.num_replicas_in_sync epochs = self.get_epochs() batched_dataset = self.generator.generate_latents( num_samples=num_samples) batched_dist_dataset = self.strategy.experimental_distribute_dataset( dataset.get_dataset(batched_dataset, global_batch_size)) print('**** Batch size : {} | **** Epochs : {}'.format( batch_size, epochs)) if self.current_resolution >= start_res and self.current_resolution > 2: self.train_resolution(batched_dist_dataset, global_batch_size, epochs, save_folder, num_samples)
def get_beer_dataset(data_dir, max_seq_length, word_threshold, balance=False): """ Return tf datasets (train and dev) and language index for the beer dataset. Assume train.tsv and dev.tsv are in the dir. """ processor = BeerProcessor() train_examples = processor.get_train_examples(data_dir) dev_examples = processor.get_dev_examples(data_dir) print("Dataset: Beer Review") print("Training samples %d, Validation sampels %d" % (len(train_examples), len(dev_examples))) # check the label balance train_labels = np.array([0., 0.]) for train_example in train_examples: train_labels += train_example["label"] print("Training data: %d positive examples, %d negative examples." % (train_labels[1], train_labels[0])) dev_labels = np.array([0., 0.]) for dev_example in dev_examples: dev_labels += dev_example["label"] print("Dev data: %d positive examples, %d negative examples." % (dev_labels[1], dev_labels[0])) if balance == True: random.seed(12252018) print("Make the Training dataset class balanced.") # make the beer dataset to be a balanced dataset min_examples = int(min(train_labels[0], train_labels[1])) pos_examples = [] neg_examples = [] for train_example in train_examples: if train_example["label"][0] == 1: neg_examples.append(train_example) else: pos_examples.append(train_example) assert (len(neg_examples) == train_labels[0]) assert (len(pos_examples) == train_labels[1]) if train_labels[0] >= train_labels[1]: # more negative examples neg_examples = random.sample(neg_examples, min_examples) else: # more positive examples pos_examples = random.sample(pos_examples, min_examples) assert (len(pos_examples) == len(neg_examples)) train_examples = pos_examples + neg_examples print( "After balance training data: %d positive examples, %d negative examples." % (len(pos_examples), len(neg_examples))) return get_dataset(train_examples, dev_examples, max_seq_length, word_threshold)
def main(args): alphabet = alphabet_factory() device = torch.device('cpu') checkpoint = torch.load('model_best.pth', map_location=device) in_features = args.n_mfcc * (2 * args.n_context + 1) model = build_deepspeech(in_features=in_features, num_classes=len(alphabet)) model.load_state_dict(checkpoint['state_dict']) print_size_of_model(model) decoder = GreedyDecoder() if args.quantize: model = torch.quantization.quantize_dynamic(model, {nn.RNN, nn.Linear}, dtype=torch.qint8) logging.info('quantized model') print_size_of_model(model) transform = prepare_transformations(args) dataset = ProcessedDataset(get_dataset(args.datadir, "dev-clean"), transform, alphabet) collate_fn = collate_factory(model_length_function) criterion = nn.CTCLoss(blank=alphabet.mapping[alphabet.char_blank]) dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, shuffle=False, num_workers=0, collate_fn=collate_fn, drop_last=False) test_loop_fn(dataloader, model, criterion, device, 1, decoder, alphabet)
def test(): sess = tf.InteractiveSession() # get test data _, _, ds_test = dataset.get_dataset() ds_test_iterator = ds_test.make_initializable_iterator() next_test_images, next_test_labels = ds_test_iterator.get_next() ds_test_iterator.initializer.run() # restore frozen graph gd = tf.GraphDef.FromString(open(FLAGS.frozen_pb, 'rb').read()) images, logits = tf.import_graph_def( gd, return_elements=['images:0', FLAGS.output_node + ':0']) labels = tf.placeholder(tf.float32, [BATCH_SIZE, NUM_CLASSES], name='labels') correct_pred = tf.equal(labels, tf.round(tf.sigmoid(logits))) acc_op = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) # run test total_test_acc = [] for i in range(0, TEST_SIZE, BATCH_SIZE): images_batch, labels_batch = sess.run( [next_test_images, next_test_labels]) test_acc = acc_op.eval(feed_dict={ images: images_batch, labels: labels_batch }) total_test_acc += [test_acc] print('total_test_acc', np.mean(total_test_acc))
def main(): sentence, char_sentence, tags, _, _, test_iter = \ get_dataset(BASE_PATH, "atis", BATCH_SIZE, is_inference=True) tagger = restore_model( "models/ner_cnn-bilstm-crf_*", restore=RESTORED_MODEL) final_result = "" for it in test_iter: words = it.sentence[0] sent_len = it.sentence[1] char_rep = it.char_sentence[0] result = torch.tensor( tagger.decode(char_rep, words, sent_len.numpy()), dtype=torch.int32) sentence_list = words.squeeze(0).numpy().tolist() tag_result = result.squeeze(-1).numpy().tolist() result_format = "{} {}\n" this_result = "" for sent, tag in zip(sentence_list, tag_result): this_result += result_format.format(sentence.vocab.itos[sent], tags.vocab.itos[tag]) this_result += "\n\n" final_result += this_result with open(BASE_PATH + "res_atis.txt", "w") as text_file: text_file.write(final_result)
def main(): args = parse_args() if not os.path.exists(log_dir): os.mkdir(log_dir) if not os.path.exists(model_dir): os.mkdir(model_dir) data = get_dataset(shrink=200) if args.model_fn != None: model_fn = args.model_fn model = torch.load(model_fn) else: model = None if args.mode == 'all': model = train(args.epoch, data) test(model, data) elif args.mode == 'train': train(args.epoch, data, model) elif args.mode == 'test': test(model, data, ep=0, iter=0, test_num=10, test_size=50, f_log=open("log/test.log", "w")) else: print("Wrong arguments!")
def full_analysis(args): DATA = get_dataset() for i in range(len(DATA)): print('analyzing cell ', i, ' [...]') args.data_index = i analyze_scan(args)
def fitness(lr, l2_reg, dropout): set_seed(2021) device = torch.device('cuda') dataset_config = { 'name': 'ProcessedDataset', 'path': 'data/Gowalla/time', 'device': device } model_config = { 'name': 'MultiVAE', 'layer_sizes': [64, 32], 'device': device, 'dropout': dropout } trainer_config = { 'name': 'MLTrainer', 'optimizer': 'Adam', 'lr': lr, 'l2_reg': l2_reg, 'kl_reg': 0.2, 'device': device, 'n_epochs': 1000, 'batch_size': 512, 'dataloader_num_workers': 6, 'test_batch_size': 512, 'topks': [20] } dataset = get_dataset(dataset_config) model = get_model(model_config, dataset) trainer = get_trainer(trainer_config, dataset, model) return trainer.train(verbose=True)
def eval(path, sheet, epoch): dset = dataset.get_dataset(1, 'MPEG', False) cr = CNNCRluma().cuda() sr = CNNSRluma().cuda() print(torch.load(path)['epoch']) #loads net weigths cr.load_state_dict(torch.load(path)['cr']) sr.load_state_dict(torch.load(path)['sr']) cr.eval() sr.eval() total, rate = 0.0, 0.0 for iteration, data in enumerate(dset, 1): input, name = data[0].cuda(), data[1] with torch.no_grad(): #FORWARD PASS======== #down-sample input ds, _ = cr(input) ds = ds.clamp(0,1) #Because of intperolation #code down-sampled input coded, bpp = encode(ds, 25) #up-sampled decoded image us = sr(coded).clamp(0,1) #==================== out = transforms.ToPILImage(mode='L')(us[0][0].cpu()) gt = transforms.ToPILImage(mode='L')(input[0][0].cpu()) psnr = get_metrics(gt, out, False)[0] total += psnr rate += bpp print('Bpp: {} --- PSNR: {}'.format(bpp, psnr)) torch.cuda.empty_cache() print(rate/len(dset)) print(total/len(dset))
def _read_config(self): with open(self.config_file, 'r') as config: for line in config: if line.startswith('#'): continue elif line.startswith('[PARAS]'): self.alpha, self.gamma, self.max_iters = \ line.strip()[7:].strip().split() self.alpha = float(self.alpha) self.gamma = float(self.gamma) self.max_iters = int(self.max_iters) elif line.startswith('[DATASET]'): data = line.strip()[9:].strip().split() data_name = data[0] data_views = [int(v) for v in data[1:]] self.X, self.truth, self.n_clusters, self.n_samples = \ ds.get_dataset(name=data_name, views=data_views) # get number of views self.n_views = len(self.X) # get dimension of each view self.dims = [x.shape[1] for x in self.X] elif line.startswith('[GROUPS]'): tmp_list = line.strip()[8:].split(';') self.group_size = [0] * self.n_views; #print self.n_views for tl in tmp_list: self.groups.append(tuple([int(t) for t in tl.strip().split()])) lg = len(self.groups[-1]) for view in self.groups[-1]: #print view self.group_size[view] = lg # get number of groups self.n_groups = len(self.groups) elif line.startswith('[INTERACTS]'): tmp_list = line.strip()[11:].split(';') for tl in tmp_list: self.interacts.append(tuple([int(t) for t in tl.strip().split()])); #print self.interacts self._collect_interacts_items()