def train(): sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(max_to_keep=max_to_keep) utils.load_ckpt(sess, ckpt_dir, saver) best_loss = 1e6 start_epoch = 0 if history_file.exists(): df = pd.read_csv(history_file) best_loss = df['best_loss'].min() start_epoch = int(df.iloc[-1]['epoch']) + 1 print('Training ...') for epoch in range(start_epoch, num_epochs): train_loss, train_lr = train_one_epoch(sess, epoch, saver) val_loss, best_loss = val_one_epoch(sess, epoch, saver, best_loss) csv_header = ['epoch', 'lr', 'train_loss', 'val_loss', 'best_loss'] csv_values = [epoch, train_lr, train_loss, val_loss, best_loss] utils.log_csv(history_file, csv_values, header=csv_header if epoch == 0 else None) print( f'[{opt.area}-{opt.mode}] Epoch {epoch} loss:{train_loss:.6f}, val loss:{val_loss:.6f},duration:{time.time() - start_time:.3f}s' ) print('Training completed...')
def test(model,data_loader,cfg,test_args): if test_args['load_ckpt'] is not None: load_ckpt(test_args,model) model.eval() error_total = {'err_absRel': 0.0, 'err_squaRel': 0.0, 'err_rms': 0.0, 'err_silog': 0.0, 'err_logRms': 0.0, 'err_silog2': 0.0, 'err_delta1': 0.0, 'err_delta2': 0.0, 'err_delta3': 0.0, 'err_log10': 0.0, 'err_whdr': 0.0} n_pxl_total = 0 eval_num_total = 0 for i, data in enumerate(tqdm(data_loader)): output = model.inference(data) pred_depth = torch.squeeze(output['b_fake']) img_path = data['A_paths'] invalid_side = data['invalid_side'][0] pred_depth = pred_depth[invalid_side[0]:pred_depth.size(0) - invalid_side[1], :] pred_depth = pred_depth / data['ratio'].to(cfg['device']) # scale the depth pred_depth = resize_image(pred_depth, torch.squeeze(data['B_raw']).shape) if i % 10 == 0: Img = vutils.make_grid(data['A'].data.cpu(),normalize = True,scale_each = True) GT_depth = vutils.make_grid(data['B_raw'].data.cpu(),normalize = True,scale_each = True) Estimated_depth = vutils.make_grid(torch.from_numpy(pred_depth),normalize = True,scale_each = True) Edge = vutils.make_grid(data['E'].unsqueeze(1).repeat(1,3,1,1).data.cpu(),normalize = True,scale_each = True) writer.add_image('RGB',Img,i) writer.add_image('GT_Depth',GT_depth,i) writer.add_image('Predicted_Depth',Estimated_depth,i) writer.add_image('Edge',Edge,i) error_batch,n_pxl,eval_num = evaluate_err(pred_depth, data['B_raw'], mask=(45, 471, 41, 601), scale=10.) for (k1,v1), (k2,v2) in zip(error_total.items(), error_batch.items()): error_total[k1] += error_batch[k2] #error_total = error_batch n_pxl_total = n_pxl_total + n_pxl eval_num_total = eval_num_total + eval_num error = calculate_average_error(error_total,n_pxl_total,eval_num_total) print('----------------------------------------------------------') print('absREL: %f'%error['err_absRel']) print('silog: %f'%np.sqrt(error['err_silog2'] - (error['err_silog'])**2)) print('log10: %f'%error['err_log10']) print('RMS: %f'%error['err_rms']) print('delta1: %f'%error['err_delta1']) print('delta2: %f'%error['err_delta2']) print('delta3: %f'%error['err_delta3']) print('squaRel: %f'%error['err_squaRel']) print('logRms: %f'%error['err_logRms']) print('----------------------------------------------------------') del error,output,pred_depth,img_path,invalid_side model.train()
def main(): args = parser.parse_args() with open(args.config) as f: config = yaml.load(f) for k, v in config['common'].items(): setattr(args, k, v) if not (args.model_path and os.path.isfile(args.model_path)): print("=> no checkpoint found at '{}'".format(args.model_path)) return # create model print("=> creating model '{}'".format(args.arch)) model = model_zoo[args.arch](num_classes=args.num_classes) model = torch.nn.DataParallel(model).cuda() load_ckpt(args.model_path, model) torch_in = torch.rand(1, 3, args.input_size, args.input_size) torch_in = torch_in.cuda() model = model.module model.eval() #print(model) torch_out = torch.onnx.export(model, torch_in, args.onnx_path, verbose=True, opset_version=7) print("Exporting onnx model to {}".format(args.onnx_path)) transforms = [] transforms.append({'resize': args.image_size}) transforms.append({'center_crop': args.input_size}) transforms.append({'to_tensor': None}) transforms.append({ 'normalize': { 'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225] } }) config = {'transforms': transforms} onnx_config_path = args.onnx_path.replace('.onnx', '-cfg.json') print("Exporting onnx model config to {}".format(onnx_config_path)) with open(onnx_config_path, 'w') as f: json.dump(config, f)
def __init__(self, model, batcher, src_vocab, tgt_vocab, ckpt_id, output_beams=False): self._model = model self._model() self._batcher = batcher self._src_vocab = src_vocab self._tgt_vocab = tgt_vocab self.output_beams = output_beams self._saver = tf.train.Saver() self._sess = tf.Session(config=utils.get_config()) ckpt_path = utils.load_ckpt(self._saver, self._sess, "train", ckpt_id) ckpt_name = "ckpt-" + ckpt_path.split('-')[-1] self._decode_dir = os.path.join( model.hps.model_path, get_decode_dir_name(ckpt_name, model.hps)) if os.path.exists(self._decode_dir): raise Exception("single_pass decode directory %s should " "not already exist" % self._decode_dir) if not os.path.exists(self._decode_dir): os.mkdir(self._decode_dir) self._ref_dir = os.path.join(self._decode_dir, "reference") if not os.path.exists(self._ref_dir): os.mkdir(self._ref_dir) self._dec_dir = os.path.join(self._decode_dir, "decoded") if not os.path.exists(self._dec_dir): os.mkdir(self._dec_dir) self._summary_path = os.path.join(self._decode_dir, "summary.txt")
def bitcoin(args): A, X = utils.load_XA(args.dataset, datadir = "../Generate_XA_Data/XAL") L = utils.load_labels(args.dataset, datadir = "../Generate_XA_Data/XAL") num_classes = max(L) + 1 input_dim = X.shape[1] num_nodes = X.shape[0] ckpt = utils.load_ckpt(args) print("input dim: ", input_dim, "; num classes: ", num_classes) model = models.GcnEncoderNode( input_dim=input_dim, hidden_dim=args.hidden_dim, embedding_dim=args.output_dim, label_dim=num_classes, num_layers=args.num_gc_layers, bn=args.bn, args=args, ) model.load_state_dict(ckpt["model_state"]) pred = ckpt["save_data"]["pred"] explainer = pe.Node_Explainer(model, A, X, pred, args.num_gc_layers) node_to_explain = [i for [i] in np.argwhere(np.sum(A,axis = 0) > 2)] explanations = explainer.explain_range(node_to_explain, num_samples = args.num_perturb_samples, top_node = args.top_node) print(explanations) savename = utils.gen_filesave(args) np.save(savename,explanations)
def main(): global args args = parser.parse_args() assert args.output_path.endswith('.npy') print("=> creating model '{}'".format(args.arch)) model = models.__dict__[args.arch](feature_dim=args.feature_dim) model = IdentityMapping(model) model.cuda() model = torch.nn.DataParallel(model).cuda() if args.load_path: classifier_keys = ['module.logits.weight', 'module.logits.bias'] load_ckpt(args.load_path, model, ignores=classifier_keys, strict=True) cudnn.benchmark = True normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.25, 0.25, 0.25]) test_loader = DataLoader(BinDataset( args.bin_file, transforms.Compose([ transforms.Resize(args.input_size), transforms.ToTensor(), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) features = extract(test_loader, model) assert features.shape[1] == args.feature_dim print('saving extracted features to {}'.format(args.output_path)) folder = os.path.dirname(args.output_path) if folder != '' and not os.path.exists(folder): os.makedirs(folder) np.save(args.output_path, features)
def proc_func(infile, outfile, csv_path, csv_index): img_path = img_folder + infile.split('.')[0] + '/' video2frames(src_folder + infile, img_path) feature = OpticalFlowAnalyzer(img_path).analyze() np.savez(dst_folder + outfile, feature) [csv_old, index_old] = load_ckpt(ckpt_path).split('#') ckpt_index = str( max(csv_index, int(index_old)) if csv_path is csv_old else csv_index) ckpt_info = csv_path + '#' + ckpt_index save_ckpt(ckpt_info, ckpt_path)
def evaluate_bitcoin_explanation(explanations, args): # Get predictions ckpt = utils.load_ckpt(prog_args) pred = ckpt["save_data"]["pred"] pred_label = [np.argmax(p) for p in pred[0]] # Get ground truth filename_pos = os.path.join( '../Generate_XA_Data/ground_truth_explanation/' + prog_args.dataset, prog_args.dataset + '_pos.csv') filename_neg = os.path.join( '../Generate_XA_Data/ground_truth_explanation/' + prog_args.dataset, prog_args.dataset + '_neg.csv') df_pos = pd.read_csv(filename_pos, header=None, index_col=0, squeeze=True).to_dict() df_neg = pd.read_csv(filename_neg, header=None, index_col=0, squeeze=True).to_dict() # Evaluate pred_pos = 0 true_pos = 0 for node in explanations: gt = [] if pred_label[node] == 0: buff_str = df_neg[node].replace('[', '') buff_str = buff_str.replace(']', '') gt = [int(s) for s in buff_str.split(',')] else: buff_str = df_pos[node].replace('[', '') buff_str = buff_str.replace(']', '') gt = [int(s) for s in buff_str.split(',')] ex = explanations[node] for e in ex: pred_pos = pred_pos + 1 if e in gt: true_pos = true_pos + 1 precision = true_pos / pred_pos print("Explainer's precision is ", precision) savedir = 'result/' if args.top_node == None: top = "no_top" else: top = "top_" + str(args.top_node) report_file_name = 'report_' + args.dataset + ".txt" report_file = os.path.join(savedir, report_file_name) with open(report_file, "a") as text_file: text_file.write(args.dataset + ", " + str(args.num_perturb_samples) + " samples, " + top + " | Precision: " + str(precision) + "\n") text_file.write("\n")
def run_eval(model, batcher, ckpt_id): model() saver = tf.train.Saver(max_to_keep=3) sess = tf.Session(config=utils.get_config()) eval_dir = os.path.join(args.model_path, "eval") bestmodel_save_path = os.path.join(eval_dir, 'bestmodel') summary_writer = tf.summary.FileWriter(eval_dir) running_avg_loss = 0 best_loss = None batch_cnt = 0 while True: if not ckpt_id == -1 and batch_cnt > 100: break batch_cnt += 1 _ = utils.load_ckpt(args, saver, sess, "train", ckpt_id) batch = batcher.next_batch() # run eval on the batch t0 = time.time() results = model.run_step(sess, batch) t1 = time.time() tf.logging.info('seconds for batch: %.2f', t1 - t0) # print the loss and coverage loss to screen loss = results['loss'] tf.logging.info('batch_id: %d (100)\tloss: %f', batch_cnt, loss) # add summaries summaries = results['summaries'] train_step = results['global_step'] summary_writer.add_summary(summaries, train_step) # calculate running avg loss running_avg_loss = calc_running_avg_loss(np.asscalar(loss), running_avg_loss, summary_writer, train_step) # If running_avg_loss is best so far, save this checkpoint (early stopping). # These checkpoints will appear as bestmodel-<iteration_number> in the eval dir if best_loss is None or running_avg_loss < best_loss: tf.logging.info( 'Found new best model with %.3f running_avg_loss. Saving to %s', running_avg_loss, bestmodel_save_path) saver.save(sess, bestmodel_save_path, global_step=train_step, latest_filename='checkpoint_best') best_loss = running_avg_loss # flush the summary writer every so often if train_step % 100 == 0: summary_writer.flush()
def train(model,data_loader_train,data_loader_test,optimizer,criterion_1,criterion_2,cfg,train_args,test_args): if train_args['load_ckpt'] is not None: load_ckpt(train_args,model) model.train() lr = train_args['lr'] for epoch in range(train_args['epoch']): print('epoch #: %d'%epoch) for i,data in tqdm(enumerate(data_loader_train)): #target = data['B_bins'].squeeze().long()#.to(cfg['device']) output,pred_depth = model.train_nyuv2(data) output_softmax = output['b_fake_softmax'] output_logit = output['b_fake_logit'].cpu() # weights = calc_weights(output_softmax.cpu(),target.clone().detach()) loss_1 = criterion_1(output_logit,data['B_bins'].squeeze().long()) loss_2 = criterion_2(imgrad_yx(pred_depth.cpu().clone()),data['E'].cpu()) loss = loss_1 + loss_2 loss.backward() optimizer.zero_grad() optimizer.step() lr = poly_lr_scheduler(optimizer,train_args['lr'],i + epoch*len(data_loader_train)) if i%10 == 0: Img = vutils.make_grid(data['A'].data.cpu(),normalize = True,scale_each = True) GT_depth = vutils.make_grid(data['B'].data.cpu(),normalize = True,scale_each = True) Estimated_depth = vutils.make_grid(pred_depth.data.cpu(),normalize = True,scale_each = True) Edge = vutils.make_grid(data['E'].unsqueeze(1).repeat(1,3,1,1).data.cpu(),normalize = True,scale_each = True) inputs = vutils.make_grid((data['A']*data['E'].unsqueeze(1).repeat(1,3,1,1)).data.cpu(),normalize = True,scale_each = True) #x*e.repeat(1,3,1,1) writer.add_image('RGB',Img,i + epoch*len(data_loader_train)) writer.add_image('GT_Depth',GT_depth,i + epoch*len(data_loader_train)) writer.add_image('Predicted_Depth',Estimated_depth,i + epoch*len(data_loader_train)) writer.add_image('Edge',Edge,i + epoch*len(data_loader_train)) writer.add_image('inputs',inputs,i + epoch*len(data_loader_train)) del output['b_fake_softmax'],output_softmax,output['b_fake_logit'],output_logit,pred_depth,loss print(lr) test(model,data_loader_test,cfg,test_args) save_ckpt(train_args['batchsize'],save_dir = '.',step = i + epoch*len(data_loader_train),epoch = epoch,model = model,optimizer = optimizer)
def proc_func(infile, outfile, csv_path, csv_index): try: audio_analyzer = AudioAnalyzer(src_folder + infile) audio_analyzer.compute_features() feature = audio_analyzer.analyze() np.savez(dst_folder + outfile, **feature) except: pass [csv_old, index_old] = load_ckpt(ckpt_path).split('#') ckpt_index = str(max(csv_index, int(index_old)) if csv_path is csv_old else csv_index) ckpt_info = csv_path + '#' + ckpt_index save_ckpt(ckpt_info, ckpt_path)
def task_syn(args): A, X = utils.load_XA(args.dataset, datadir = "../Generate_XA_Data/XAL") L = utils.load_labels(args.dataset, datadir = "../Generate_XA_Data/XAL") num_classes = max(L) + 1 input_dim = X.shape[1] ckpt = utils.load_ckpt(args) print("input dim: ", input_dim, "; num classes: ", num_classes) model = models.GcnEncoderNode( input_dim=input_dim, hidden_dim=args.hidden_dim, embedding_dim=args.output_dim, label_dim=num_classes, num_layers=args.num_gc_layers, bn=args.bn, args=args, ) model.load_state_dict(ckpt["model_state"]) pred = ckpt["save_data"]["pred"] explainer = pe.Node_Explainer(model, A, X, pred, args.num_gc_layers) explanations = {} if args.explain_node == None: if args.dataset == 'syn1': explanations = explainer.explain_range(list(range(300,700)), num_samples = args.num_perturb_samples, top_node = args.top_node) elif args.dataset == 'syn2': explanations = explainer.explain_range(list(range(300,700)) + list(range(1000,1400)), num_samples = args.num_perturb_samples, top_node = args.top_node, pred_threshold = 0.1) elif args.dataset == 'syn3': explanations = explainer.explain_range(list(range(300,1020)), num_samples = args.num_perturb_samples, top_node = args.top_node,pred_threshold = 0.05) elif args.dataset == 'syn4': explanations = explainer.explain_range(list(range(511,871)), num_samples = args.num_perturb_samples, top_node = args.top_node, pred_threshold = 0.1) elif args.dataset == 'syn5': explanations = explainer.explain_range(list(range(511,1231)), num_samples = args.num_perturb_samples, top_node = args.top_node, pred_threshold = 0.05) elif args.dataset == 'syn6': explanations = explainer.explain_range(list(range(300,700)), num_samples = args.num_perturb_samples, top_node = args.top_node) else: explanation = explainer.explain(args.explain_node, num_samples = args.num_perturb_samples, top_node = args.top_node) print(explanation) explanations[args.explain_node] = explanation print(explanations) savename = utils.gen_filesave(args) np.save(savename,explanations)
def __init__(self, model, batcher, vocab, ckpt_id=None, fw_sess=None, bw_model=None, bw_sess=None, bidi_ckpt_path=None): self.model = model self.bw_model = model self.batcher = batcher self.vocab = vocab self.sess = tf.Session(config=utils.gpu_config()) if fw_sess is None else fw_sess self.sess2 = bw_sess self.bw_model = bw_model if bw_model is None: ckpt_path = utils.load_ckpt(self.model.hps, self.model.saver, self.sess) print('Checkpoint path name: {}'.format(ckpt_path)) ckpt_name = 'ckpt-' + ckpt_path.split('-')[-1] else: ckpt_name = 'ckpt-' + bidi_ckpt_path.split('-')[-1] self.decode_dir = os.path.join(model.hps.model_path, make_decode_dir_name(ckpt_name, model.hps)) if os.path.exists(self.decode_dir): pass else: os.makedirs(self.decode_dir)
def convert_to_coverage_model(): """Load non-coverage checkpoint, add initialized extra variables for coverage, and save as new checkpoint""" print("converting non-coverage model to coverage model..") # initialize an entire coverage model from scratch sess = tf.Session(config=utils.get_config()) print("initializing everything...") sess.run(tf.global_variables_initializer()) # load all non-coverage weights from checkpoint saver = tf.train.Saver([v for v in tf.global_variables() if "coverage" not in v.name and "Adagrad" not in v.name]) print("restoring non-coverage variables...") curr_ckpt = utils.load_ckpt(saver, sess) print("restored.") # save this model and quit new_fname = curr_ckpt + '_cov_init' print("saving model to %s..." % (new_fname)) new_saver = tf.train.Saver() # this one will save all variables that now exist new_saver.save(sess, new_fname) print("saved.") exit()
def main(): print("Hello!") voca = Vocab(args.vocab_fname) model = Model(args, voca) batcher = Batcher(voca, args) with tf.Session(config=GPU_config()) as sess: model.build_graph() if args.mode == 'train': sess.run(tf.global_variables_initializer()) if not os.path.exists(args.train_logdir): os.makedirs(args.train_logdir) if not os.path.exists(args.valid_logdir): os.makedirs(args.valid_logdir) train_writer, valid_writer = tf.summary.FileWriter( args.train_logdir, sess.graph), tf.summary.FileWriter(args.valid_logdir, sess.graph) t = trange(args.max_step, leave=True) for i in t: sample, label = batcher.next_data() _, loss, step, summaries = model.run_train_step(sample, sess) t.set_description('Train loss: {}'.format(round(loss, 3))) train_writer.add_summary(summaries, step) if step % 5e3 == 0: model.saver.save(sess, args.model_path, step) if step % 5 == 0: valid_sample, valid_label = batcher.next_data( is_valid=True) loss, step, summaries = model.run_eval_step( valid_sample, sess) valid_writer.add_summary(summaries, step) t.set_description('Valid loss: {}'.format(round(loss, 3))) if step % 100 == 0: near_ids, near_words = model.get_nearest_words( sess, args.near_K) pprint(near_words) score = coherence_score(args.test_bin_fname, voca, near_ids) summary = tf.Summary() summary.value.add(tag='coherence_score_{}k'.format( args.near_K), simple_value=score) valid_writer.add_summary(summary, step) else: load_ckpt(args.model_path, sess, model.saver) near_words_dict = {i: [] for i in range(args.aspect_num)} for k in range(5, 50, 5): near_ids, near_words = model.get_nearest_words(sess, k) score = coherence_score(args.test_bin_fname, voca, near_ids) print(k, score) for asp_idx in near_words: for word in near_words[asp_idx]: if word not in near_words_dict[asp_idx]: near_words_dict[asp_idx].append(word) with open(args.nearword_fname, 'w') as f: for idx in range(len(list(near_words_dict.keys()))): print(near_words_dict[idx]) f.write(str(idx) + ' ') f.write(' '.join(near_words_dict[idx][:5])) f.write('\n')
def main(): start_time = time() last_step = get_last_ckpt_step() assert last_step >= 0 my_log(f'Checkpoint found: {last_step}\n') print_args() net_init, net_apply, net_init_cache, net_apply_fast = get_net() params = load_ckpt(last_step) in_shape = (args.batch_size, args.L, args.L, 1) _, cache_init = net_init_cache(params, jnp.zeros(in_shape), (-1, -1)) # sample_raw_fun = get_sample_fun(net_apply, None) sample_raw_fun = get_sample_fun(net_apply_fast, cache_init) # sample_k_fun = get_sample_k_fun(net_apply, None) sample_k_fun = get_sample_k_fun(net_apply_fast, net_init_cache) log_q_fun = get_log_q_fun(net_apply) @jit def update(spins_old, log_q_old, energy_old, step, accept_count, energy_mean, energy_var_sum, rng): rng, rng_k, rng_sample, rng_accept = jrand.split(rng, 4) k = get_k(rng_k) spins = sample_k_fun(k, params, spins_old, rng_sample) log_q = log_q_fun(params, spins) energy = energy_fun(spins) log_uniform = jnp.log(jrand.uniform(rng_accept, (args.batch_size, ))) accept = log_uniform < (log_q_old - log_q + args.beta * (energy_old - energy)) spins = jnp.where(jnp.expand_dims(accept, axis=(1, 2, 3)), spins, spins_old) log_q = jnp.where(accept, log_q, log_q_old) energy = jnp.where(accept, energy, energy_old) mag = spins.mean(axis=(1, 2, 3)) step += 1 accept_count += accept.sum() energy_per_spin = energy / args.L**2 energy_mean, energy_var_sum = welford_update(energy_per_spin.mean(), step, energy_mean, energy_var_sum) return (spins, log_q, energy, mag, accept, k, step, accept_count, energy_mean, energy_var_sum, rng) rng, rng_init = jrand.split(jrand.PRNGKey(args.seed)) # Sample initial configurations from the network spins = sample_raw_fun(args.batch_size, params, rng_init) log_q = log_q_fun(params, spins) energy = energy_fun(spins) step = 0 accept_count = 0 energy_mean = 0 energy_var_sum = 0 data_filename = args.log_filename.replace('.log', '.hdf5') writer_proto = [ # Uncomment to save all the sampled spins # ('spins', bool, (args.batch_size, args.L, args.L)), ('log_q', np.float32, (args.batch_size, )), ('energy', np.int32, (args.batch_size, )), ('mag', np.float32, (args.batch_size, )), ('accept', bool, (args.batch_size, )), ('k', np.int32, None), ] ensure_dir(data_filename) with ChunkedDataWriter(data_filename, writer_proto, args.save_step) as writer: my_log('Sampling...') while step < args.max_step: (spins, log_q, energy, mag, accept, k, step, accept_count, energy_mean, energy_var_sum, rng) = update(spins, log_q, energy, step, accept_count, energy_mean, energy_var_sum, rng) # Uncomment to save all the sampled spins # writer.write(spins[:, :, :, 0] > 0, log_q, energy, mag, accept, k) writer.write(log_q, energy, mag, accept, k) if args.print_step and step % args.print_step == 0: accept_rate = accept_count / (step * args.batch_size) energy_std = jnp.sqrt(energy_var_sum / step) my_log(', '.join([ f'step = {step}', f'P = {accept_rate:.8g}', f'E = {energy_mean:.8g}', f'E_std = {energy_std:.8g}', f'time = {time() - start_time:.3f}', ]))
import configs import utils prog_args = configs.arg_parse() ckpt = utils.load_ckpt(prog_args) save_data = ckpt["save_data"] # get save data print(ckpt["epoch"])
def main(): utils.print_config(args) if 'train' not in args.mode: args.keep_rate = 1.0 args.use_pretrain = True if args.use_pretrain == 'True' else False args.use_aux_task = True if args.use_aux_task == 'True' else False if args.mode == 'lm_train': args.model = 'lm' args.data_path = "./data/wikitext/wikitext-103/processed_wiki_train.bin" args.use_pretrain = False args.model_path = os.path.join(args.model_path, args.exp_name).format( args.model) #model_path default="data/log/{} if not os.path.exists(args.model_path): if 'train' not in args.mode: print(args.model_path) raise ValueError os.makedirs(args.model_path) with open(os.path.join(args.model_path, 'config.json'), 'w', encoding='utf8') as f: json.dump(vars(args), f) print("Default models path: {}".format(args.model_path)) print('code start/ {} mode / {} models'.format(args.mode, args.model)) utils.assign_specific_gpu(args.gpu_nums) vocab = utils.Vocab() vardicts = utils.get_pretrain_weights( args.true_pretrain_ckpt_path ) if args.use_pretrain and args.mode == 'train' else None if args.mode == 'decode': if args.model == 'mmi_bidi': args.beam_size = args.mmi_bsize args.batch_size = args.beam_size modelhps = deepcopy(args) if modelhps.mode == 'decode': modelhps.max_dec_len = 1 if args.model == 'vanilla': model = BaseModel(vocab, modelhps) elif args.model == 'mmi_bidi': if args.mode == 'decode': bw_graph = tf.Graph() with bw_graph.as_default(): bw_model = BaseModel(vocab, args) bw_sess = tf.Session(graph=bw_graph, config=utils.gpu_config()) with bw_sess.as_default(): with bw_graph.as_default(): bidi_ckpt_path = utils.load_ckpt(bw_model.hps, bw_model.saver, bw_sess) fw_graph = tf.Graph() with fw_graph.as_default(): modelhps.model_path = modelhps.model_path.replace( 'mmi_bidi', 'vanilla') modelhps.model = 'vanilla' fw_model = BaseModel(vocab, modelhps) fw_sess = tf.Session(graph=fw_graph) with fw_sess.as_default(): with fw_graph.as_default(): ckpt_path = utils.load_ckpt(fw_model.hps, fw_model.saver, fw_sess) else: model = BaseModel(vocab, modelhps) elif args.model == 'lm': model = LMModel(vocab, modelhps) elif args.model == 'embmin': model = DiverEmbMin(vocab, modelhps) else: raise ValueError print('models load end') if args.mode in ['train', 'lm_train']: train(model, vocab, vardicts) elif args.mode == 'decode': import time if args.model == 'mmi_bidi': batcher = Batcher( vocab, bw_model.hps.data_path.replace('train_', 'test_'), args) decoder = BeamsearchDecoder(fw_model, batcher, vocab, fw_sess=fw_sess, bw_model=bw_model, bw_sess=bw_sess, bidi_ckpt_path=bidi_ckpt_path) else: batcher = Batcher(vocab, model.hps.data_path.replace('train_', 'test_'), args) decoder = BeamsearchDecoder(model, batcher, vocab) decoder.decode() elif args.mode == 'eval': pass
n_views=args.n_views, depth_interval=args.depth_interval, img_wh=tuple(args.img_wh)) if args.scan: scans = [args.scan] else: # evaluate on all scans in dataset scans = dataset.scans # Step 1. Create depth estimation and probability for each scan model = CascadeMVSNet(n_depths=args.n_depths, interval_ratios=args.interval_ratios, num_groups=args.num_groups, norm_act=ABN) device = 'cpu' if args.cpu else 'cuda:0' model.to(device) load_ckpt(model, args.ckpt_path) model.eval() depth_dir = f'results/{args.dataset_name}/depth' print('Creating depth and confidence predictions...') if args.scan: # TODO: adapt scan specification to tanks and blendedmvs data_range = [i for i, x in enumerate(dataset.metas) if x[0] == args.scan] else: data_range = range(len(dataset)) for i in tqdm(data_range): imgs, proj_mats, init_depth_min, depth_interval, \ scan, vid = decode_batch(dataset[i]) os.makedirs(os.path.join(depth_dir, scan), exist_ok=True) with torch.no_grad():
if __name__ == "__main__": args = get_opts() w, h = args.img_wh kwargs = {'root_dir': args.root_dir, 'img_wh': tuple(args.img_wh)} if args.dataset_name == 'llff': kwargs['spheric_poses'] = args.spheric_poses dataset = dataset_dict[args.dataset_name](split='test', **kwargs) embedding_xyz = Embedding(3, 10) embedding_dir = Embedding(3, 4) nerf_coarse = NeRF() nerf_fine = NeRF() load_ckpt(nerf_coarse, args.ckpt_path, model_name='nerf_coarse') load_ckpt(nerf_fine, args.ckpt_path, model_name='nerf_fine') nerf_coarse.cuda().eval() nerf_fine.cuda().eval() models = [nerf_coarse, nerf_fine] embeddings = [embedding_xyz, embedding_dir] imgs = [] psnrs = [] dir_name = f'results/{args.dataset_name}/{args.scene_name}' os.makedirs(dir_name, exist_ok=True) for i in tqdm(range(len(dataset))): sample = dataset[i] rays = sample['rays'].cuda()
def main(): global args, best_prec1 args = parser.parse_args() with open(args.config) as f: config = yaml.load(f) for k, v in config['common'].items(): setattr(args, k, v) print(args.eval_list) if not (args.model_path and os.path.isfile(args.model_path)): print("=> no checkpoint found at '{}'".format(args.model_path)) return gpu_num = torch.cuda.device_count() if args.distributed: args.rank, args.size = init_processes(args.dist_addr, args.dist_port, gpu_num, args.dist_backend) print("=> using {} GPUS for distributed training".format(args.size)) else: args.rank = 0 print("=> using {} GPUS for training".format(gpu_num)) # create model print("=> creating model '{}'".format(args.arch)) model = model_zoo[args.arch](num_classes=args.num_classes) if not args.distributed: if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): model.features = torch.nn.DataParallel(model.features) model.cuda() else: model = torch.nn.DataParallel(model).cuda() else: model.cuda() model = torch.nn.parallel.DistributedDataParallel(model, [args.rank]) print('create DistributedDataParallel model successfully', args.rank) if args.rank == 0: mkdir_if_no_exist(args.save_path, subdirs=['events/', 'logs/', 'checkpoints/']) logger = create_logger('global_logger', '{}/logs/log.txt'.format(args.save_path)) logger.debug(args) # log args only to file else: logger = None # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() load_ckpt(args.model_path, model) cudnn.benchmark = True normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) val_loader = torch.utils.data.DataLoader( FileListDatasetName( args.eval_list, args.eval_root, transforms.Compose([ transforms.Resize(args.image_size), transforms.CenterCrop(args.input_size), #transforms.Resize((args.input_size, args.input_size)), transforms.ToTensor(), normalize, ])), batch_size=args.test_batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) validate(val_loader, model, criterion, logger, args.print_freq, args.rank)
img_path = img_folder + infile.split('.')[0] + '/' video2frames(src_folder + infile, img_path) feature = OpticalFlowAnalyzer(img_path).analyze() np.savez(dst_folder + outfile, feature) [csv_old, index_old] = load_ckpt(ckpt_path).split('#') ckpt_index = str( max(csv_index, int(index_old)) if csv_path is csv_old else csv_index) ckpt_info = csv_path + '#' + ckpt_index save_ckpt(ckpt_info, ckpt_path) if __name__ == "__main__": check_path(ckpt_path, binary=True) ckpt_info = load_ckpt(ckpt_path) if ckpt_info is None or '#' not in ckpt_info: ckpt_chunk = csv_paths[0] ckpt_index = -1 save_ckpt(ckpt_chunk + '#' + str(ckpt_index), ckpt_path) else: ckpt_chunk = ckpt_info.split('#')[0] ckpt_index = int(ckpt_info.split('#')[1]) print('continue from checkpoint ' + ckpt_chunk + ' ' + str(ckpt_index)) for csv_path in csv_paths: print(csv_path + ' has began ...') csv_file = csv.reader(open(csv_folder + csv_path + '.csv')) _ = next(csv_file) rows = [row for row in csv_file]
def main(args): args.color_t = torch.rand(700, 3) if not os.path.exists(args.ckpt_dir): os.mkdir(args.ckpt_dir) if not os.path.exists(args.summary_dir): os.mkdir(args.summary_dir) device = torch.device( "cuda" if not args.nocuda and torch.cuda.is_available() else "cpu") train_data = TrainStation(args=args, train=True) train_loader = DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, drop_last=True) num_train = len(train_data) model = SCALOR(args) model.to(device) model.train() optimizer = torch.optim.RMSprop(model.parameters(), lr=args.lr) global_step = 0 if args.last_ckpt: global_step, args.start_epoch = \ load_ckpt(model, optimizer, args.last_ckpt, device) writer = SummaryWriter(args.summary_dir) args.global_step = global_step log_tau_gamma = np.log(args.tau_end) / args.tau_ep for epoch in range(int(args.start_epoch), args.epochs): local_count = 0 last_count = 0 end_time = time.time() for batch_idx, (sample, counting_gt) in enumerate(train_loader): tau = np.exp(global_step * log_tau_gamma) tau = max(tau, args.tau_end) args.tau = tau global_step += 1 log_phase = global_step % args.print_freq == 0 or global_step == 1 args.global_step = global_step args.log_phase = log_phase imgs = sample.to(device) y_seq, log_like, kl_z_what, kl_z_where, kl_z_depth, \ kl_z_pres, kl_z_bg, log_imp, counting, \ log_disc_list, log_prop_list, scalor_log_list = model(imgs) log_like = log_like.mean(dim=0) kl_z_what = kl_z_what.mean(dim=0) kl_z_where = kl_z_where.mean(dim=0) kl_z_depth = kl_z_depth.mean(dim=0) kl_z_pres = kl_z_pres.mean(dim=0) kl_z_bg = kl_z_bg.mean(0) total_loss = -(log_like - kl_z_what - kl_z_where - kl_z_depth - kl_z_pres - kl_z_bg) optimizer.zero_grad() total_loss.backward() clip_grad_norm_(model.parameters(), args.cp) optimizer.step() local_count += imgs.data.shape[0] if log_phase: time_inter = time.time() - end_time end_time = time.time() count_inter = local_count - last_count print_scalor(global_step, epoch, local_count, count_inter, num_train, total_loss, log_like, kl_z_what, kl_z_where, kl_z_pres, kl_z_depth, time_inter) writer.add_scalar('train/total_loss', total_loss.item(), global_step=global_step) writer.add_scalar('train/log_like', log_like.item(), global_step=global_step) writer.add_scalar('train/What_KL', kl_z_what.item(), global_step=global_step) writer.add_scalar('train/Where_KL', kl_z_where.item(), global_step=global_step) writer.add_scalar('train/Pres_KL', kl_z_pres.item(), global_step=global_step) writer.add_scalar('train/Depth_KL', kl_z_depth.item(), global_step=global_step) writer.add_scalar('train/Bg_KL', kl_z_bg.item(), global_step=global_step) # writer.add_scalar('train/Bg_alpha_KL', kl_z_bg_mask.item(), global_step=global_step) writer.add_scalar('train/tau', tau, global_step=global_step) log_summary(args, writer, imgs, y_seq, global_step, log_disc_list, log_prop_list, scalor_log_list, prefix='train') last_count = local_count if global_step % args.generate_freq == 0: ####################################### do generation #################################### model.eval() with torch.no_grad(): args.phase_generate = True y_seq, log_like, kl_z_what, kl_z_where, kl_z_depth, \ kl_z_pres, kl_z_bg, log_imp, counting, \ log_disc_list, log_prop_list, scalor_log_list = model(imgs) args.phase_generate = False log_summary(args, writer, imgs, y_seq, global_step, log_disc_list, log_prop_list, scalor_log_list, prefix='generate') model.train() ####################################### end generation #################################### if global_step % args.save_epoch_freq == 0 or global_step == 1: save_ckpt(args.ckpt_dir, model, optimizer, global_step, epoch, local_count, args.batch_size, num_train)
def main(**kwargs): # parse parameters param = default_config() param.update({ "mode": "sds", "top_k": 10, "ckpt": "ckpt/gnn.pt", "use_gpu": False }) param.update(kwargs) # read maps symp2id, id2symp = read_symp2id() dise2id, id2dise = read_dise2id() # read data datapath = os.path.join("dataset/EHR/test/data.txt") fin = open(datapath, "r", encoding="utf-8") lines = fin.readlines() data_model = ehr.EHR("dataset/EHR", "train") # init retrieval system ehr_ret = EHR_retrieval(mode=param["mode"]) # init and load model data_model_param = parse_data_model(data_model) param.update(data_model_param) param = parse_kwargs(param, kwargs) gnn = HGNN(**param) if param["use_gpu"]: gnn.cuda() ckpt_path = param.get("ckpt") if ckpt_path is None: print("[Warning] Do not set ckpt path, load from the default path.") load_ckpt("ckpt/checkpoint.pt", gnn, param["use_gpu"]) else: load_ckpt(ckpt_path, gnn, param["use_gpu"]) dsd_sampler = DSD_sampler("dataset/EHR") usu_sampler = USU_sampler("dataset/EHR") gnn.eval() emb_dise = gnn.gen_all_dise_emb(dsd_sampler) # init result list before_list = [] after_list = [] real_dise_list = [] init_symp_list = [] after_symp_list = [] result_map_bfo = defaultdict(list) result_map_aft = defaultdict(list) # this is top_k for evaluation p@N, Rec@N, ... top_k_list = [1, 5] for i, line in enumerate(lines): line_data = line.strip().split() uid = line_data[0] did = line_data[1] real_dise_list.append(did) symps = line_data[2:] # select the first symptom and do inference init_symp = symps[0] init_symp_list.append(id2symp[init_symp]) symp_ar = np.array([[init_symp]]) pred_rank = gnn.rank_query(symp_ar, emb_dise, usu_sampler, top_k=5) # calculate statistics for top_k in top_k_list: pred_top_k = pred_rank[0][:top_k] calculate_rec_ndcg(pred_top_k, int(did), top_k, result_map_bfo) # print("true did:", did) # print("before:", pred_rank) before_list.append(pred_rank[0]) rank_symp = ehr_ret(symp_idx=init_symp, top_k=param["top_k"]) after_symp_list.append([id2symp[str(t)] for t in rank_symp]) symp_ar = [np.concatenate([[init_symp], rank_symp], 0)] # symp_ar = np.array([symps]) pred_rank = gnn.rank_query(symp_ar, emb_dise, usu_sampler, top_k=5) for top_k in top_k_list: pred_top_k = pred_rank[0][:top_k] calculate_rec_ndcg(pred_top_k, int(did), top_k, result_map_aft) # print("after:", pred_rank) after_list.append(pred_rank[0]) ret_symps = ehr_ret(init_symp, param["top_k"]) ret_symp_list = [] for sid in ret_symps: ret_symp_list.append(id2symp[str(sid)]) if i % 100 == 0: print("[line]:", i) # summary bf_log = build_result_log(result_map_bfo, top_k_list) af_log = build_result_log(result_map_aft, top_k_list) print("[before]: {}".format(bf_log)) print("[after]: {}".format(af_log)) # to result csv fout = open("retrieval_result_{}.txt".format(param["mode"]), "w", encoding="utf-8") fout.write("did\tbefore_pred\tafter_pred\tinit_symp\taftersymp\n") for i in range(len(init_symp_list)): wrtline = id2dise[int(real_dise_list[i])] + "\t" + id2dise[int( before_list[i][0])] + "\t" + id2dise[int( after_list[i] [0])] + "\t" + init_symp_list[i] + "\t" + "#".join( after_symp_list[i]) + "\n" fout.write(wrtline) fin.close() fout.close() df_res = pd.read_table("retrieval_result_{}.txt".format(param["mode"])) df_res.to_excel("retrieval_result_{}.xlsx".format(param["mode"]), encoding="utf-8") print("Done")
print('===> prepare model ...') unet = models.PConvUNet() unet = unet.cuda() discriminator = models.Discriminator(in_channels=3) discriminator = discriminator.cuda() if args.finetune: unet.freeze_enc_bn = True # freeze bn layer for fine tuning optimizer = torch.optim.Adam(unet.parameters(), lr=args.lr_finetune) else: optimizer = torch.optim.Adam(unet.parameters(), lr=args.lr) if args.resume: # start_iter = utils.load_ckpt(args.resume_folder, [('model', unet)]) utils.load_ckpt(args.resume_folder, [('model', unet)]) # unet.load_state_dict(torch.load(args.resume_folder)) print('===> prepare loss function ...') criterion = loss.InpaintingLoss(models.VGG16FeatureExtractor()).cuda() print('===> prepare lambda ...') LAMBDA_DICT = { 'valid': 1.0, 'hole': 6.0, 'tv': 0.1, 'prc': 0.05, 'style': 120.0 } print('===> start training ...')
def main(): start_time = time() init_out_dir() last_step = get_last_ckpt_step() if last_step >= 0: my_log(f'\nCheckpoint found: {last_step}\n') else: clear_log() print_args() net_init, net_apply, net_init_cache, net_apply_fast = get_net() rng, rng_net = jrand.split(jrand.PRNGKey(args.seed)) in_shape = (args.batch_size, args.L, args.L, 1) out_shape, params_init = net_init(rng_net, in_shape) _, cache_init = net_init_cache(params_init, jnp.zeros(in_shape), (-1, -1)) # sample_fun = get_sample_fun(net_apply, None) sample_fun = get_sample_fun(net_apply_fast, cache_init) log_q_fun = get_log_q_fun(net_apply) need_beta_anneal = args.beta_anneal_step > 0 opt_init, opt_update, get_params = optimizers.adam(args.lr) @jit def update(step, opt_state, rng): params = get_params(opt_state) rng, rng_sample = jrand.split(rng) spins = sample_fun(args.batch_size, params, rng_sample) log_q = log_q_fun(params, spins) / args.L**2 energy = energy_fun(spins) / args.L**2 def neg_log_Z_fun(params, spins): log_q = log_q_fun(params, spins) / args.L**2 energy = energy_fun(spins) / args.L**2 beta = args.beta if need_beta_anneal: beta *= jnp.minimum(step / args.beta_anneal_step, 1) neg_log_Z = log_q + beta * energy return neg_log_Z loss_fun = partial(expect, log_q_fun, neg_log_Z_fun, mean_grad_expected_is_zero=True) grads = grad(loss_fun)(params, spins, spins) opt_state = opt_update(step, grads, opt_state) return spins, log_q, energy, opt_state, rng if last_step >= 0: params_init = load_ckpt(last_step) opt_state = opt_init(params_init) my_log('Training...') for step in range(last_step + 1, args.max_step + 1): spins, log_q, energy, opt_state, rng = update(step, opt_state, rng) if args.print_step and step % args.print_step == 0: # Use the final beta, not the annealed beta free_energy = log_q / args.beta + energy my_log(', '.join([ f'step = {step}', f'F = {free_energy.mean():.8g}', f'F_std = {free_energy.std():.8g}', f'S = {-log_q.mean():.8g}', f'E = {energy.mean():.8g}', f'time = {time() - start_time:.3f}', ])) if args.save_step and step % args.save_step == 0: params = get_params(opt_state) save_ckpt(params, step)
def main(args): model = Model() optimizer = torch.optim.SGD(model.parameters(), lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.step_size, gamma=args.gamma) if args.scheduler == 'multistep': scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, args.milestones, gamma=args.gamma) elif args.scheduler == 'cosine': scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, args.step_size) criterion = torch.nn.CrossEntropyLoss() model = model.cuda() criterion = criterion.cuda() start_epoch = 0 # Check number of parameters your model pytorch_total_params = sum(p.numel() for p in model.parameters()) print(f"Number of parameters: {pytorch_total_params}") if not os.path.exists('{}'.format(args.savepath)): os.makedirs('{}'.format(args.savepath)) # resume if args.resume: model, optimizer, start_epoch = load_ckpt(model, optimizer, args) # Dataloader if args.dataset == 'cifar10': normalize = transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010]) transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]) transform_train.transforms.insert( 0, RandAugment(args.rand_n, args.rand_m)) transform_val = transforms.Compose([ transforms.ToTensor(), normalize, ]) trainset = CIFAR10(root=args.datapath, train=True, download=True, transform=transform_train) valset = CIFAR10(root=args.datapath, train=False, download=True, transform=transform_val) elif args.dataset == 'cifar100': normalize = transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010]) transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]) transform_val = transforms.Compose([ transforms.ToTensor(), normalize, ]) trainset = CIFAR100(root=args.datapath, train=True, download=True, transform=transform_train) valset = CIFAR100(root=args.datapath, train=False, download=True, transform=transform_val) elif args.dataset == 'ImageNet': normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) transform_train = transforms.Compose([ transforms.RandomResizedCrop(image_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]) transform_val = transforms.Compose([ transforms.Resize(image_size + 32), transforms.CenterCrop(image_size), transforms.ToTensor(), normalize, ]) trainset = ImageNet(root=args.datapath, split='train', download=False, transform=transform_train) valset = ImageNet(root=args.datapath, split='val', download=False, transform=transform_val) elif args.dataeset == 'tiny-imagenet-200': normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) transform_train = transforms.Compose([ transforms.RandomResizedCrop(image_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]) transform_val = transforms.Compose([ transforms.Resize(image_size + 32), transforms.CenterCrop(image_size), transforms.ToTensor(), normalize, ]) trainset = ImageFolder(root=args.datapath, split='train', download=False, transform=transform_train) valset = ImageFolder(root=args.datapath, split='val', download=False, transform=transform_val) train_loader = torch.utils.data.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=False) val_loader = torch.utils.data.DataLoader(valset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, pin_memory=False) # start training last_top1_acc = 0 acc1_valid = 0 best_acc1 = 0 is_best = False for epoch in range(start_epoch, args.epochs): print("\n----- epoch: {}, lr: {} -----".format( epoch, optimizer.param_groups[0]["lr"])) # train for one epoch start_time = time.time() last_top1_acc = train(train_loader, epoch, model, optimizer, criterion) elapsed_time = time.time() - start_time print('==> {:.2f} seconds to train this epoch\n'.format(elapsed_time)) # validate for one epoch start_time = time.time() acc1_valid = validate(val_loader, model, criterion) elapsed_time = time.time() - start_time print( '==> {:.2f} seconds to validate this epoch\n'.format(elapsed_time)) # learning rate scheduling scheduler.step() summary = [epoch, last_top1_acc, acc1_valid.item()] is_best = acc1_valid > best_acc1 best_acc1 = max(acc1_valid, best_acc1) save_summary('rexnetv1', args.dataset, args.name, summary) checkpoint = { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() } save_ckpt(checkpoint, is_best, args) #if is_best: # torch.save(model.state_dict(), args.savepath+'model_weight_best.pth') # Save model each epoch #torch.save(model.state_dict(), args.savepath+'model_weight_epoch{}.pth'.format(epoch)) print(f"Last Top-1 Accuracy: {last_top1_acc}") print(f"Best valid Top-1 Accuracy: {best_acc1}") print(f"Number of parameters: {pytorch_total_params}")
def main(args, stream): if not args.class_wise_sampling and args.data_seed is None: args.data_seed = random.randint(1, 1e8) # * Prepare data_module * dm = DInterface(**vars(args)) args.class_dict = dm.init_data['class_dict'] args.classes = list(args.class_dict.keys()) args.num_classes = len(args.class_dict) global_bs = args.gpus * args.batch_size if args.gpus > 1 else args.batch_size # * Build model * net = build_model(**vars(args)) if args.load_pretrained: pretrained_path = load_pretrain_path_by_args(args, '.pth.tar') bl_layers = None if args.mode_name in ['train', 'finetune']: bl_layers = ['classifier', 'fc'] net = load_ckpt(net, pretrained_path, train=(args.mode_name == 'train'), block_layers=bl_layers, map_keys=args.map_keys, verbose=True) model = BasicModel(net, **vars(args)) # Resume load_path = load_model_path_by_args(args) if load_path is not None: model.load_from_checkpoint(checkpoint_path=load_path, strict=False) # * validate mode * if args.mode_name in ['val', 'test']: model.final_val = True trainer = Trainer.from_argparse_args(args) trainer.validate(model, datamodule=dm) return # * Callbacks * # Checkpoint callbacks if args.ckpt == 'debug' or not args.save_ckpt: # ckpt_callback = get_checkpoint_callback(args.ckpt, save_last=False, save_top_k=0) ckpt_callback = get_checkpoint_callback(f'Task_models/{args.net_suffix}', save_last=False, save_top_k=1) else: cpDir = '{}/{}_{}'.format(args.ckpt, args.model_name, args.net_suffix) every_n_train_steps = dm.num_samples//global_bs if args.ckpt_ever_n_epoch: every_n_train_steps *= args.ckpt_ever_n_epoch ckpt_callback = get_checkpoint_callback( cpDir, 'val/acc', 'max', filename='{epoch}_{val_acc:.2f}', every_n_train_steps=every_n_train_steps) # Logging callbacks if args.train_scale >= 1: version_str = f'{args.dataset}_ts={int(args.train_scale):d}' else: version_str = f'{args.dataset}_ts={args.train_scale:.2%}' logger_tb = pl_log.TensorBoardLogger(args.log_dir, args.exp_name, version_str) log_dir = logger_tb.log_dir args.logger = [logger_tb] if pl.utilities.distributed._get_rank() == 0: os.makedirs(log_dir) stream.all_to_file(log_dir+'/{}.log'.format( args.exp_name), flush=True) # logger_eren = MyLogger(log_dir, 'exp_log') logger_eren = MyLogger(None) args.progress_bar_refresh_rate = 0 # Use MyLogger() install of progress_bar lr_monitor = pl.callbacks.LearningRateMonitor(logging_interval='epoch') args.callbacks = [ ckpt_callback, logger_eren, lr_monitor ] # * Accelerating * if args.gpus > 1 and (args.accelerator is None and args.plugins is None): args.accelerator = 'ddp' if args.accelerator == 'ddp': args.plugins = pl.plugins.DDPPlugin(find_unused_parameters=False) if args.mode_name in ['train', 'finetune']: args.benchmark = True # * Begin training and testing * trainer = Trainer.from_argparse_args(args) # Begin training trainer.fit(model, datamodule=dm) # Final test model.final_val = True trainer.validate(model, ckpt_path='best', datamodule=dm) # Other operations print('Best ckpt: {}'.format(trainer.checkpoint_callback.best_model_path)) if args.ckpt != 'debug' and args.save_ckpt: checkpoint_standardize(cpDir)
def main(): start_time = time() last_step = get_last_ckpt_step() assert last_step >= 0 my_log(f'Checkpoint found: {last_step}\n') print_args() net_init, net_apply, net_init_cache, net_apply_fast = get_net() params = load_ckpt(last_step) in_shape = (args.batch_size, args.L, args.L, 1) _, cache_init = net_init_cache(params, jnp.zeros(in_shape), (-1, -1)) # sample_fun = get_sample_fun(net_apply, None) sample_fun = get_sample_fun(net_apply_fast, cache_init) log_q_fun = get_log_q_fun(net_apply) def sample_energy_fun(rng): spins = sample_fun(args.batch_size, params, rng) log_q = log_q_fun(params, spins) energy = energy_fun(spins) return spins, log_q, energy @jit def update(spins_old, log_q_old, energy_old, step, energy_mean, energy_var_sum, rng): rng, rng_sample = jrand.split(rng) spins, log_q, energy = sample_energy_fun(rng_sample) mag = spins.mean(axis=(1, 2, 3)) step += 1 energy_per_spin = energy / args.L**2 energy_mean, energy_var_sum = welford_update(energy_per_spin.mean(), step, energy_mean, energy_var_sum) return (spins, log_q, energy, mag, step, energy_mean, energy_var_sum, rng) rng, rng_init = jrand.split(jrand.PRNGKey(args.seed)) spins, log_q, energy = sample_energy_fun(rng_init) step = 0 energy_mean = 0 energy_var_sum = 0 data_filename = args.log_filename.replace('.log', '.hdf5') writer_proto = [ # Uncomment to save all the sampled spins # ('spins', bool, (args.L, args.L)), ('log_q', np.float32, None), ('energy', np.int32, None), ('mag', np.float32, None), ] ensure_dir(data_filename) with ChunkedDataWriter(data_filename, writer_proto, args.save_step * args.batch_size) as writer: my_log('Sampling...') while step < args.max_step: (spins, log_q, energy, mag, step, energy_mean, energy_var_sum, rng) = update(spins, log_q, energy, step, energy_mean, energy_var_sum, rng) # Uncomment to save all the sampled spins # writer.write_batch(spins[:, :, :, 0] > 0, log_q, energy, mag) writer.write_batch(log_q, energy, mag) if args.print_step and step % args.print_step == 0: energy_std = jnp.sqrt(energy_var_sum / step) my_log(', '.join([ f'step = {step}', f'E = {energy_mean:.8g}', f'E_std = {energy_std:.8g}', f'time = {time() - start_time:.3f}', ]))
## Data trainset, testset, num_classes = L.load_dataset(params['data'], data_dir=params['data_dir']) X_train, y_train = F.get_samples(trainset, args.trainsamples) X_test, y_test = F.get_samples(testset, args.testsamples) if args.translatetrain: X_train, y_train = F.translate(X_train, y_train, stride=7) if args.translatetest: X_test, y_test = F.translate(X_test, y_test, stride=7) X_train, y_train = X_train.to(device), y_train.to(device) X_test, y_test = X_test.to(device), y_test.to(device) ## Architecture net = L.load_architecture(params['data'], params['arch']) net = utils.load_ckpt(args.model_dir, 'model', net) net = net.to(device) ## Forward with torch.no_grad(): print('train') Z_train = net.batch_forward(X_train, batch_size=args.batch_size, loss=args.loss, device=device) X_train, y_train, Z_train = F.to_cpu(X_train, y_train, Z_train) utils.save_loss(eval_dir, f'train', net.get_loss()) print('test') Z_test = net.batch_forward(X_test, batch_size=args.batch_size,