def main(args): print(args) print("Started experiment!") utils.print_args(args) utils.set_seed(args.seed) smoothing_method = {"nist": SmoothingFunction().method3} results = {} scores = {} for name, method in smoothing_method.items(): scores[name] = utils.evaluate_bleu( args.input_file, os.path.join("data", "valid.txt"), num_real_sentences=args.num_sentences, num_generated_sentences=args.num_sentences, gram=args.gram, smoothing_method=method, chunk_size=15) print() for name in smoothing_method.keys(): results[name] = {} results[name]['scores'] = scores[name] print("Results:", results) bleu = results['nist']['scores']['bleu5'] sbleu = results['nist']['scores']['self-bleu5'] hmean = stats.hmean([bleu, 1.0 / sbleu]) print("Harmonic Mean:", hmean)
def get_config(): import argparse parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--mode', '-mode', type=str, default='train') parser.add_argument('--gpu_id', '-id', type=str, default='0') parser.add_argument('--root_dir', '-sd', type=str, default='/home/caiyi/PycharmProjects/gesture_MP') # parser.add_argument('--result_dir', '-rd', type=str, default='/home/caiyi/PycharmProjects/gesture_MP') parser.add_argument( '--result_dir', '-rd', type=str, default='/home/caiyi/PycharmProjects/gesture_MP/result') parser.add_argument('--batch_size', '-bs', type=int, default=64) parser.add_argument('--input_size', '-is', type=int, default=64) # parser.add_argument('--num_joint', '-nj', type=int, default=14) parser.add_argument('--fc_size', '-fc', type=int, default=2048) parser.add_argument('--num_class', '-nc', type=int, default=11) parser.add_argument('--epoch', '-epoch', type=int, default=100) parser.add_argument('--lr_start', '-lr', help='learning rate', type=float, default=1e-3) parser.add_argument('--lr_decay_rate', type=float, default=0.9) parser.add_argument('--lr_decay_step', type=float, default=100000) args = vars(parser.parse_args()) utils.print_args(args) return args
def get_config(): import argparse parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--mode', '-mode', type=str, default='pretest') parser.add_argument('--gpu_id', '-id', type=str, default='0') parser.add_argument('--saved_model_path', '-smp', type=str, default='../../results/') parser.add_argument('--batch_size', '-bs', type=int, default=64) parser.add_argument('--root_weight', '-w', type=float, default=4) parser.add_argument('--n_rounds', '-nr', type=int, default=2000) parser.add_argument('--train_iters', '-ni', type=int, default=100) parser.add_argument('--dataset', '-data', type=str, default='nyu') parser.add_argument('--mrsa_test_fold', '-mtf', type=str, default='P8') parser.add_argument('--files_per_time', '-fpt', type=int, default=10) parser.add_argument('--samples_per_time', '-spt', type=int, default=2000) parser.add_argument('--lr_start', '-lr', help='learning rate', type=float, default=0.0001) parser.add_argument('--lr_decay_rate', default=0.99) parser.add_argument('--lr_decay_iters', default=3000) parser.add_argument('--new_training', '-new', type=bool, default=0) parser.add_argument('--test_gap', '-tg', type=int, default=2) parser.add_argument('--num_cpus', '-cpus', type=int, default=32) args = vars(parser.parse_args()) utils.print_args(args) return args
def main(): parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter) parser, parser_create = JobManager.register_arguments(parser) parser.add_argument('-o', '--output-dir', type=str, default='./results', help="directory to store the results") parser_create.add_argument( '--project', type=str, help='project name for wandb logging (omit to disable)') parser_create.add_argument('-s', '--seed', type=int, default=12345, help='initial random seed') parser_create.add_argument('-r', '--repeats', type=int, default=10, help="number of experiment iterations") parser_create.add_argument('--hyperopt', action='store_true') parser_create.add_argument('--LPGNN', action='store_true') parser_create.add_argument('--baselines', action='store_true') args = parser.parse_args() print_args(args) JobManager(args, cmd_generator=experiment_generator).run()
def main(): # Get the configuration arguments args = utils.get_args() utils.print_args(args) # Allocate a small fraction of GPU and expand the allotted memory as needed gpu_options = tf.GPUOptions(allow_growth=True) config = tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False) # Essentially defining global variables. TF_CPP_MIN_LOG_LEVEL equates to '3' os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu) # Create a session with config options with tf.Session(config=config) as sess: # initialize the DNN mini = MiniNet(sess, args) # Gets all variables that have trainable=True model_vars = tf.trainable_variables() # slim is a library that makes defining, training, and evaluating NNs simple. tf.contrib.slim.model_analyzer.analyze_vars(model_vars, print_info=True) if args.training == True: mini.train() else: mini.test()
def get_config(): import argparse parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--gpu_id', '-id', type=str, default='1') parser.add_argument('--saved_model_path', '-smp', type=str, default='../../results/') parser.add_argument('--dataset', '-data', type=str, default='icvl') parser.add_argument('--mrsa_test_fold', '-mtf', type=str, default='P8') parser.add_argument('--batch_size', '-bs', type=int, default=64) parser.add_argument('--num_cpus', '-cpus', type=int, default=32) parser.add_argument('--n_rounds', '-nr', type=int, default=500) parser.add_argument('--train_iters', '-ni', type=int, default=400) parser.add_argument('--update_iters', '-ui', type=int, default=40) parser.add_argument('--tau', '-tau', type=float, default=0.01) parser.add_argument('--files_per_time', '-fpt', type=int, default=5) parser.add_argument('--num_batch_samples', '-nb', type=int, default=1023) parser.add_argument('--max_iters', '-mi', type=int, default=1) parser.add_argument('--test_gap', '-tg', type=int, default=1) parser.add_argument('--buffer_size', '-buf', type=int, default=40000) parser.add_argument('--actor_lr', '-alr', type=float, default=0.0001) parser.add_argument('--critic_lr', '-clr', type=float, default=0.0001) parser.add_argument('--step_size', '-step', type=float, default=1.0) parser.add_argument('--beta', '-beta', type=float, default=1.0) parser.add_argument('--gamma', '-gamma', type=float, default=0.9) parser.add_argument('--reward_range', '-range', type=int, default=3) args = vars(parser.parse_args()) utils.print_args(args) return args
def main(): args = get_arguments() utils.print_args(args) dataset = data_loading.get_dataset(args.dataset, args.normalize_raw, args.normalize_reps, args.cifar_path) print("Collected arguments and raw dataset.") if args.network_type == 'simple': input_shape = args.dim_red if args.dim_red is not None else dataset.get_raw_input_shape( ) rounds.add_network_to_vector_rounds(args.rounds, dataset, input_shape, args.neurons, args.max_iter_optimization, args.alpha_optimization, args.n_train, args.dim_red, network_type='simple') elif args.network_type == 'conv': input_shape = dataset.get_raw_input_shape(True) rounds.add_network_to_vector_rounds(args.rounds, dataset, input_shape, args.neurons, args.max_iter_optimization, args.alpha_optimization, args.n_train, None, network_type='conv') else: raise ValueError("Network type {} not supported".format( args.network_type)) print("Finished getting Representations") print("Getting represented dataset:") print("Getting training examples...") x, y = dataset.get_training_examples(args.n_train, dim_reduction=args.dim_red, print_progress=True) print("Getting test examples...") x_test, y_test = dataset.get_test_examples(args.n_test, dim_reduction=args.dim_red, print_progress=True) print("Getting final linear separator") w = svm.get_linear_separator(x, y, type_of_classifier='sdca', verbose=2, alpha=args.alpha_evaluation, max_iter=args.max_iter_evaluation) performance = evaluation.evaluate_model(w, x, y) print("train performance is {}".format(performance)) performance = evaluation.evaluate_model(w, x_test, y_test) print("test performance is {}".format(performance))
def main(args, **model_kwargs): device = torch.device(args.device) args.device = device if args.dataset == 'abilene_tm': args.nNodes = 12 args.day_size = 288 elif args.dataset == 'geant_tm': args.nNodes = 22 args.day_size = 96 elif args.dataset == 'brain_tm': args.nNodes = 9 elif 'sinet' in args.dataset: args.nNodes = 73 args.day_size = 288 else: raise ValueError('Dataset not found!') test_loader = utils.get_dataloader(args) args.test_size, args.nSeries = test_loader.dataset.gt_data_set.shape in_dim = 1 args.in_dim = in_dim model = models.get_model(args) logger = utils.Logger(args) engine = utils.Trainer.from_args(model, test_loader.dataset.scaler, args) utils.print_args(args) if not args.test: test_met_df, x_gt, y_gt, y_real, yhat = engine.test( test_loader, engine.model, args.out_seq_len) test_met_df.round(6).to_csv( os.path.join(logger.log_dir, 'test_metrics.csv')) print('Prediction Accuracy:') print(utils.summary(logger.log_dir)) np.save(os.path.join(logger.log_dir, 'x_gt'), x_gt) np.save(os.path.join(logger.log_dir, 'y_gt'), y_gt) np.save(os.path.join(logger.log_dir, 'y_real'), y_real) np.save(os.path.join(logger.log_dir, 'yhat'), yhat) else: x_gt = np.load(os.path.join(logger.log_dir, 'x_gt.npy')) y_gt = np.load(os.path.join(logger.log_dir, 'y_gt.npy')) y_real = np.load(os.path.join(logger.log_dir, 'y_real.npy')) yhat = np.load(os.path.join(logger.log_dir, 'yhat.npy')) if args.plot: logger.plot(x_gt, y_real, yhat) # run TE if args.run_te: run_te(x_gt, y_gt, yhat, args)
def main(): """Main training program.""" num_of_gpus = 8 num_of_layers = 24 hp = 1024 // num_of_gpus d_binglr = torch.load( '/relevance2-nfs/romittel/binglr_pretrained_model/pytorch_model.bin') emb_per_gpu = d_binglr['bert.embeddings.word_embeddings.weight'].size( )[0] // num_of_gpus # Disable CuDNN. torch.backends.cudnn.enabled = False # Timer. timers = Timers() # Arguments. args = get_args() file_len = 0 for line in open(args.valid_data[0], 'r', encoding='utf-8'): file_len += 1 print("file_len= ", file_len) # Pytorch distributed. initialize_distributed(args) if torch.distributed.get_rank() == 0: print('Pretrain GPT2 model') print_args(args) # Random seeds for reproducability. set_random_seed(args.seed) # Data stuff. train_data, val_data, test_data, args.vocab_size, \ args.eod_token = get_train_val_test_data(args) # Model, optimizer, and learning rate. model, optimizer, lr_scheduler = setup_model_and_optimizer(args) #model.optimizer.dynamic_loss_scale=True j = 0 if j == torch.distributed.get_rank(): # word_embeddings #num_embeddings_per_partition = model.module.module.module.word_embeddings.num_embeddings_per_partition #embedding_dim = model.module.module.module.word_embeddings.embedding_dim print(model.module.module.module.input_layernorm.bias.size()) print(d_binglr['bert.embeddings.LayerNorm.bias'].size())
def main(): seed_everything(12345) logging.getLogger("lightning").setLevel(logging.ERROR) logging.captureWarnings(True) parser = ArgumentParser() parser.add_argument('-d', '--dataset', type=str, choices=available_datasets(), required=True) parser.add_argument('-m', '--method', type=str, choices=FeatureTransform.available_methods(), required=True) parser.add_argument('-e', '--epsilons', nargs='*', type=float, default=[0.0]) parser.add_argument('-k', '--steps', nargs='*', type=int, default=[1]) parser.add_argument('-l', '--label-rates', nargs='*', type=float, default=[1.0]) parser.add_argument('-r', '--repeats', type=int, default=1) parser.add_argument('-o', '--output-dir', type=str, default='./results') parser.add_argument('--device', type=str, default='cuda', choices=['cpu', 'cuda']) parser = NodeClassifier.add_module_specific_args(parser) args = parser.parse_args() if args.method in FeatureTransform.private_methods and min( args.epsilons) <= 0: parser.error('LDP method requires eps > 0.') print_args(args) start = time.time() batch_train_and_test(args) end = time.time() print('\nTotal time spent:', end - start, 'seconds.\n\n')
def main(): # Training settings parser = argparse.ArgumentParser(description='Parse pytorch records') parser.add_argument('--file-list', type=str, help='the file name for the list of ') args = parser.parse_args() print_args(args) best_model = defaultdict(list) with open(args.file_list) as list_fp: for line in list_fp: fn = line.strip() count = 0 print_str = [] with open(fn) as fp: for line in fp: line = line.strip() if len(line) > 5 and line[:5] == 'valid': count = 3 print_str = [] if count != 0: count -= 1 if count == 2: line = line[-4:-1] elif count == 1: line = line.split(' ')[-1] else: line = line.split(',')[-3] print_str.append(line) print_str[2] = float(print_str[2]) mid_dist = print_str[2] acc = (print_str[0]) if len(best_model[acc]) == 0 or best_model[acc][-1] < mid_dist: best_model[acc] = [fn] + print_str print(fn, print_str) print() for acc in sorted(best_model, reverse=True): print('acc = {}, # of region = {}, mid dist = {:.4f}, fn = {}'.format( best_model[acc][1], best_model[acc][2], best_model[acc][3], best_model[acc][0]))
def main(): import argparse # global hyper parameters parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--use_GPU', help='if use gpu', default=True) parser.add_argument('--render', help='render the environment', default=True) parser.add_argument('--hid-layers-sizes', help='the sizes of each hidden layer', default=[64, 64]) parser.add_argument('--model_dir', help='model loading directory', type=str, default='../../mamodel_p/') parser.add_argument('--model_name', help='model name for initial model', type=str, default='pmodel_18') parser.add_argument('--scenario-name', '-sn', help='scenario name', type=str, default='simple_body_language_pt') parser.add_argument('--max-env-steps', help='maximum steps in the env', type=int, default=25) parser.add_argument('--time-interval', help="time interval between two steps", type=float, default=0.2) parser.add_argument('--num-episodes', help='number of episodes', type=int, default=10) args = parser.parse_args() print_args(args) args = vars(parser.parse_args()) learn(args)
def main(): seed_everything(12345) # parse arguments parser = ArgumentParser() parser.add_argument('-d', '--datasets', nargs='+', choices=available_datasets(), default=available_datasets()) parser.add_argument('-m', '--methods', nargs='+', choices=available_mechanisms(), default=available_mechanisms()) parser.add_argument('-e', '--epsilons', nargs='+', type=float, dest='epsilons', required=True) parser.add_argument('-a', '--aggs', nargs='*', type=str, default=['gcn']) parser.add_argument('-r', '--repeats', type=int, default=1) parser.add_argument('-o', '--output-dir', type=str, default='./results') parser.add_argument('--device', type=str, default='cuda', choices=['cpu', 'cuda']) args = parser.parse_args() # check if eps > 0 for LDP methods if min(args.epsilons) <= 0: parser.error('LDP methods require eps > 0.') print_args(args) batch_error_estimation(args)
def main(): args = utils.get_args() utils.print_args(args) gpu_options = tf.GPUOptions(allow_growth=True) config = tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False) os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu) with tf.Session(config=config) as sess: mini = MiniNet(sess, args) model_vars = tf.trainable_variables() tf.contrib.slim.model_analyzer.analyze_vars(model_vars, print_info=True) if bool(int(args.training)): mini.train()
def get_config(): import argparse parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--mode', '-mode', type=str, default='train') parser.add_argument('--gpu_id', '-id', type=str, default='2') parser.add_argument('--store_dir', '-sd', type=str, default='/hand_pose_data/nyu') # parser.add_argument('--result_dir', '-rd', type=str, default='/home/caiyi/PycharmProjects/noisy_pose_result') parser.add_argument('--result_dir', '-rd', type=str, default='/home/data/hands2019_challenge/task1') parser.add_argument('--in_channel', type=int, default=1) parser.add_argument('--channel_1', type=int, default=4) parser.add_argument('--channel_2', type=int, default=8) parser.add_argument('--channel_3', type=int, default=16) parser.add_argument('--channel_4', type=int, default=32) parser.add_argument('--channel_5', type=int, default=64) parser.add_argument('--channel_6', type=int, default=128) parser.add_argument('--channel_7', type=int, default=256) parser.add_argument('--input_dim', type=int, default=1152) parser.add_argument('--layer1', type=int, default=256) parser.add_argument('--layer2', type=int, default=64) parser.add_argument('--output_dim', type=int, default=2) parser.add_argument('--batch_size', '-bs', type=int, default=64) parser.add_argument('--epoch', '-epoch', type=int, default=20) parser.add_argument('--lr_start', '-lr', help='learning rate', type=float, default=0.0005) parser.add_argument('--lr_decay_rate', type=float, default=0.9) parser.add_argument('--lr_decay_step', type=float, default=20000) args = vars(parser.parse_args()) utils.print_args(args) return args
def main(): """Main training program.""" # Disable CuDNN. torch.backends.cudnn.enabled = False # Timer. timers = Timers() # Arguments. args = get_args() file_len = 0 for line in open(args.valid_data[0], 'r', encoding='utf-8'): file_len += 1 print("file_len= ", file_len) # Pytorch distributed. initialize_distributed(args) if torch.distributed.get_rank() == 0: print('Pretrain GPT2 model') print_args(args) # Random seeds for reproducability. set_random_seed(args.seed) # Data stuff. train_data, val_data, test_data, args.vocab_size, \ args.eod_token = get_train_val_test_data(args) # Model, optimizer, and learning rate. model, optimizer, lr_scheduler = setup_model_and_optimizer(args) #model.optimizer.dynamic_loss_scale=True if val_data is not None: val_data_iterator = iter(val_data) else: val_data_iterator = None #TODO: figure out how to properly set this especially when resuming training evaluate(val_data_iterator, model, args, timers, file_len, verbose=False)
def main_nn(): args = get_network_training_args() utils.print_args(args) dataset = data_loading.get_dataset(args.dataset, args.normalize_raw, False, args.cifar_path) if args.network_type == 'simple': d = args.dim_red if args.dim_red is not None else dataset.get_raw_input_shape( ) model = FCNetwork(d, args.neurons, args.layers) else: # meaning args.network_type == 'conv' input_shape = dataset.get_raw_input_shape(True) model = ConvNetwork(input_shape, args.neurons, args.layers, args.kernel_size, auto_pad=True) model.to(utils.get_device()) print("Model device is cuda? {}".format(next(model.parameters()).is_cuda)) x, y = dataset.get_training_examples( args.n_train, False, args.dim_red if args.network_type == 'simple' else None) x_test, y_test = dataset.get_test_examples( args.n_test, False, args.dim_red if args.network_type == 'simple' else None) train_network(model, x, y, x_test, y_test, args.epochs, args.batch_size, optimizer=args.optimizer, lr=args.learning_rate, weight_decay=args.weight_decay, verbose=args.verbose)
def get_config(): import argparse parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--gpu_id', '-id', type=str, default='0') parser.add_argument('--saved_model_path', '-smp', type=str, default='../results/unmaintained/') parser.add_argument('--actor_model_name', '-amn', type=str, default='actor') parser.add_argument('--critic_model_name', '-cmn', type=str, default='critic') parser.add_argument('--batch_size', '-bs', type=int, default=128) parser.add_argument('--n_rounds', '-nr', type=int, default=1000) parser.add_argument('--update_iters', '-ui', type=int, default=1000) parser.add_argument('--n_iters', '-ni', type=int, default=10) parser.add_argument('--iter_per_joint', '-ipj', type=str, default='(5, 3)') parser.add_argument('--beta', '-beta', type=float, default=0.1) parser.add_argument('--dataset', '-data', type=str, default='icvl') parser.add_argument('--mrsa_test_fold', '-mtf', type=str, default='P9') parser.add_argument('--files_per_time', '-nfp', type=int, default=1) parser.add_argument('--buffer_size', '-buf', type=int, default=100000) # parameter of models parser.add_argument('--root_actor_cnn_layers', '-racl', type=str, default='(8, 16, 32, 64, 128)') parser.add_argument('--root_critic_cnn_layers', '-rccl', type=str, default='(8, 16, 32, 64, 128)') parser.add_argument('--root_actor_fc_layers', '-rafl', type=str, default='(256, 32)') parser.add_argument('--root_critic_fc_layers', '-rcfl', type=str, default='(64, 6, 32, 64)') parser.add_argument('--root_obs_dims', '-rod', type=str, default='(40, 40, 20)') parser.add_argument('--tau', '-tau', type=float, default=0.001) parser.add_argument('--learning_rate', '-lr', type=float, default=1e-5) parser.add_argument('--chain_actor_cnn_layers', '-racl', type=str, default='(8, 16, 32, 64, 128)') parser.add_argument('--chain_critic_cnn_layers', '-rccl', type=str, default='(8, 16, 32, 64, 128)') parser.add_argument('--chain_actor_fc_layers', '-rafl', type=str, default='(128, 32)') parser.add_argument('--chain_critic_fc_layers', '-rcfl', type=str, default='(64, 6, 32, 64)') parser.add_argument('--chain_obs_dims', '-rod', type=str, default='(30, 30, 20)') args = vars(parser.parse_args()) utils.print_args(args) args = utils.str2int_tuple(args) return args
parser.add_argument("--shuffle", default=True) parser.add_argument("--num_workers", default=8) parser.add_argument("--epoch", default=10, type=int) parser.add_argument("--pre_epoches", default=10, type=int) parser.add_argument("--snapshot", default="") parser.add_argument("--lr", default=0.001) parser.add_argument("--log_interval", default=50) parser.add_argument("--class_num", default=12) parser.add_argument("--extract", default=True) parser.add_argument("--weight_ring", default=0.01) parser.add_argument("--radius", default=25.0) parser.add_argument("--model", default='-1', type=str) parser.add_argument("--post", default='-1', type=str) parser.add_argument("--repeat", default='-1', type=str) args = parser.parse_args() print_args(args) source_root = os.path.join(args.data_root, args.source) source_label = os.path.join(args.data_root, args.source + "_list.txt") target_root = os.path.join(args.data_root, args.target) target_label = os.path.join(args.data_root, args.target + "6_list.txt") train_transform = transforms.Compose([ transforms.Scale((256, 256)), transforms.CenterCrop((224, 224)), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) source_set = VisDAImage(source_root, source_label, train_transform)
args.path = path_sorted[-1] pass else: args.path = '{}-{}'.format(args.path, get_time()) os.system('mkdir -p {}'.format(args.path)) # print args logging(str(args), path=args.path) # init tensorboard writer = SummaryWriter(args.path) # print config configuration_setup='SAC-NF' configuration_setup+='\n' configuration_setup+=print_args(args) #for arg in vars(args): # configuration_setup+=' {} : {}'.format(str(arg),str(getattr(args, arg))) # configuration_setup+='\n' logging(configuration_setup, path=args.path) # init sac agent = SAC(env.observation_space.shape[0], env.action_space, args) logging("----------------------------------------", path=args.path) logging(str(agent.zf1), path=args.path) logging("----------------------------------------", path=args.path) logging(str(agent.policy), path=args.path) logging("----------------------------------------", path=args.path) gaussian_params, nf_params = get_params(agent.policy,args.flow_family) nf_weights=sum(p.numel() for p in nf_params)
np.random.seed(SEED) random.seed(SEED) # torch.backends.cudnn.deterministic = True args.s_dset_path = './data/ssda/' + args.dset + '/labeled_source_images_' \ + names[args.s] + '.txt' args.lt_dset_path = './data/ssda/' + args.dset + '/labeled_target_images_' \ + names[args.t] + '_' + str(args.shot) + '.txt' args.t_dset_path = './data/ssda/' + args.dset + '/unlabeled_target_images_' \ + names[args.t] + '_' + str(args.shot) + '.txt' args.vt_dset_path = './data/ssda/' + args.dset + '/validation_target_images_' \ + names[args.t] + '_3.txt' args.test_dset_path = args.t_dset_path args.output_dir = osp.join( args.output, 'mixmatch', args.dset, names[args.s][0].upper() + names[args.t][0].upper()) args.name = names[args.s][0].upper() + names[args.t][0].upper() if not osp.exists(args.output_dir): os.system('mkdir -p ' + args.output_dir) if not osp.exists(args.output_dir): os.mkdir(args.output_dir) args.log = 'mixmatch_' + args.pl + '_' + str(args.shot) args.out_file = open(osp.join(args.output_dir, "{:}.txt".format(args.log)), "w") utils.print_args(args) train(args)
type=str, metavar='<str>', default='restaurant', help="domain of the corpus {restaurant, beer}") parser.add_argument( "--ortho-reg", dest="ortho_reg", type=float, metavar='<float>', default=0.1, help="The weight of orthogonol regularizaiton (default=0.1)") args = parser.parse_args() out_dir = args.out_dir_path + '/' + args.domain U.mkdir_p(out_dir) # 构造输出目录 U.print_args(args) # 打印命令行参数 assert args.algorithm in { 'rmsprop', 'sgd', 'adagrad', 'adadelta', 'adam', 'adamax' } assert args.domain in {'restaurant', 'beer'} if args.seed > 0: np.random.seed(args.seed) # ############################################################################################################################### # ## Prepare data # # from keras.preprocessing import sequence import reader as dataset
sess.run(iterator.initializer) while True: try: item_id, label = model.predict(sess) writer.write(indices=[0, 1], values=[item_id, label]) except tf.errors.OutOfRangeError: break step += 1 if step % 10 == 0: now_time = time.strftime('%Y-%m-%d-%H:%M:%S', time.localtime(time.time())) print('{} predict {:2d} lines'.format(now_time, step*FLAGS.predict_batch_size)) print("Done. Write output into {}".format(FLAGS.output_file)) writer.close() if __name__ == '__main__': # Params Preparation print_args(FLAGS) vocab_table, _, vocab_size = load_vocab(FLAGS.vocab_file) FLAGS.vocab_size = vocab_size # Model Preparation iterator = get_infer_iterator( FLAGS.predict_file, vocab_table, FLAGS.predict_batch_size, question_max_len=FLAGS.question_max_len, answer_max_len=FLAGS.answer_max_len, ) mode = tf.estimator.ModeKeys.PREDICT model = parse_model(iterator, FLAGS, mode) predict()
def main(args, **model_kwargs): device = torch.device(args.device) args.device = device if args.dataset == 'abilene_tm': args.nNodes = 12 args.day_size = 288 elif args.dataset == 'geant_tm': args.nNodes = 22 args.day_size = 96 elif args.dataset == 'brain_tm': args.nNodes = 9 args.day_size = 1440 elif 'sinet' in args.dataset: args.nNodes = 73 args.day_size = 288 else: raise ValueError('Dataset not found!') train_loader, val_loader, test_loader, graphs, top_k_index = utils.get_dataloader( args) args.train_size, args.nSeries = train_loader.dataset.X.shape args.val_size = val_loader.dataset.X.shape[0] args.test_size = test_loader.dataset.X.shape[0] in_dim = 1 if args.tod: in_dim += 1 if args.ma: in_dim += 1 if args.mx: in_dim += 1 args.in_dim = in_dim model = models.get_model(args) logger = utils.Logger(args) engine = utils.Trainer.from_args(model, train_loader.dataset.scaler, \ train_loader.dataset.scaler_top_k, args) utils.print_args(args) if not args.test: iterator = trange(args.epochs) try: if os.path.isfile(logger.best_model_save_path): print('Model checkpoint exist!') print('Load model checkpoint? (y/n)') _in = input() if _in == 'y' or _in == 'yes': print('Loading model...') engine.model.load_state_dict( torch.load(logger.best_model_save_path)) else: print('Training new model') for epoch in iterator: train_loss, train_rse, train_mae, train_mse, train_mape, train_rmse = [], [], [], [], [], [] for iter, batch in enumerate(train_loader): # x = batch['x'] # [b, seq_x, n, f] # y = batch['y'] # [b, seq_y, n] x = batch['x_top_k'] y = batch['y_top_k'] if y.max() == 0: continue loss, rse, mae, mse, mape, rmse = engine.train(x, y) train_loss.append(loss) train_rse.append(rse) train_mae.append(mae) train_mse.append(mse) train_mape.append(mape) train_rmse.append(rmse) engine.scheduler.step() with torch.no_grad(): val_loss, val_rse, val_mae, val_mse, val_mape, val_rmse = engine.eval( val_loader) m = dict(train_loss=np.mean(train_loss), train_rse=np.mean(train_rse), train_mae=np.mean(train_mae), train_mse=np.mean(train_mse), train_mape=np.mean(train_mape), train_rmse=np.mean(train_rmse), val_loss=np.mean(val_loss), val_rse=np.mean(val_rse), val_mae=np.mean(val_mae), val_mse=np.mean(val_mse), val_mape=np.mean(val_mape), val_rmse=np.mean(val_rmse)) description = logger.summary(m, engine.model) if logger.stop: break description = 'Epoch: {} '.format(epoch) + description iterator.set_description(description) except KeyboardInterrupt: pass # Metrics on test data engine.model.load_state_dict(torch.load(logger.best_model_save_path)) with torch.no_grad(): test_met_df, x_gt, y_gt, y_real, yhat = engine.test( test_loader, engine.model, args.out_seq_len) test_met_df.round(6).to_csv( os.path.join(logger.log_dir, 'test_metrics.csv')) print('Prediction Accuracy:') print(utils.summary(logger.log_dir)) if args.plot: logger.plot(x_gt, y_real, yhat) x_gt = x_gt.cpu().data.numpy() # [timestep, seq_x, seq_y] y_gt = y_gt.cpu().data.numpy() yhat = yhat.cpu().data.numpy() # run TE if args.run_te: psi = get_psi(args) G = get_G(args) R = get_R(args) A = np.dot(R * G, psi) y_cs = np.zeros(y_gt.shape) # for i in range(y_gt.shape[0]): # temp = np.linalg.inv(np.dot(A, A.T)) # S = np.dot(np.dot(A.T, temp), yhat[i].T) # y_cs[i] = np.dot(psi, S).T for i in range(y_gt.shape[0]): m = A.shape[1] S = cvx.Variable(m) objective = cvx.Minimize(cvx.norm(S, p=0)) constraint = [yhat[i].T == A * S] prob = cvx.Problem(objective, constraint) prob.solve() y_cs[i] = S.value.reshape(1, m) run_te(x_gt, y_gt, y_cs, args)
default='adam', help= "Optimization algorithm (rmsprop|sgd|adagrad|adadelta|adam|adamax) (default=adam)" ) parser.add_argument( "--ortho-reg", dest="ortho_reg", type=float, metavar='<float>', default=0.1, help="The weight of orthogonal regularization (default=0.1)") args = parser.parse_args() out_dir = args.out_dir_path + '/' + args.domain U.mkdir_p(out_dir) U.print_args(args) assert args.algorithm in { 'rmsprop', 'sgd', 'adagrad', 'adadelta', 'adam', 'adamax' } # assert args.domain in {'restaurant', 'beer'} if args.seed > 0: np.random.seed(args.seed) # ############################################################################################################################### # ## Prepare data # # from keras.preprocessing import sequence import reader as dataset
def main(): """Main training program.""" # Disable CuDNN. torch.backends.cudnn.enabled = False # Timer. timers = Timers() # Arguments. args = get_args() writer = None if args.tensorboard_dir and args.rank == 0: try: from torch.utils.tensorboard import SummaryWriter writer = SummaryWriter(log_dir=args.tensorboard_dir) except ModuleNotFoundError: print_rank_0('WARNING: TensorBoard writing requested but is not ' 'available (are you using PyTorch 1.1.0 or later?), ' 'no TensorBoard logs will be written.') writer = None # Pytorch distributed. initialize_distributed(args) if torch.distributed.get_rank() == 0: print('Pretrain BERT model') print_args(args, writer) # Autoresume. torch.distributed.barrier() if args.adlr_autoresume: enable_adlr_autoresume(args) # Random seeds for reproducability. set_random_seed(args.seed) # Data stuff. train_data, val_data, test_data, args.tokenizer_num_tokens, \ args.tokenizer_num_type_tokens = get_train_val_test_data(args) # Model, optimizer, and learning rate. model, optimizer, lr_scheduler = setup_model_and_optimizer(args) if args.resume_dataloader: if train_data is not None: train_data.batch_sampler.start_iter = args.iteration % \ len(train_data) print_rank_0('setting training data start iteration to {}'.format( train_data.batch_sampler.start_iter)) if val_data is not None: start_iter_val = (args.iteration // args.eval_interval) * \ args.eval_iters val_data.batch_sampler.start_iter = start_iter_val % \ len(val_data) print_rank_0( 'setting validation data start iteration to {}'.format( val_data.batch_sampler.start_iter)) if train_data is not None: train_data_iterator = iter(train_data) else: train_data_iterator = None if val_data is not None: val_data_iterator = iter(val_data) else: val_data_iterator = None iteration = 0 if args.train_iters > 0: if args.do_train: iteration, skipped = train(model, optimizer, lr_scheduler, train_data_iterator, val_data_iterator, timers, args, writer) if args.do_valid: prefix = 'the end of training for val data' val_loss = evaluate_and_print_results(prefix, val_data_iterator, model, args, writer, iteration, timers, False) if args.save and iteration != 0: save_checkpoint(iteration, model, optimizer, lr_scheduler, args) if test_data is not None: test_data_iterator = iter(test_data) else: test_data_iterator = None if args.do_test: # Run on test data. prefix = 'the end of training for test data' evaluate_and_print_results(prefix, test_data_iterator, model, args, None, 0, timers, True)
def main(): parser = ArgumentParser() #任务配置 parser.add_argument('-device', default=0, type=int) parser.add_argument('-output_name', default='', type=str) parser.add_argument('-saved_model_path', default='', type=str) #如果是k fold合并模型进行预测,只需设置为对应k_fold模型对应的output path parser.add_argument('-type', default='train', type=str) parser.add_argument('-k_fold', default=-1, type=int) #如果是-1则说明不采用k折,否则说明采用k折的第几折 parser.add_argument('-merge_classification', default='avg', type=str) # 个数预测:vote则采用投票法,avg则是平均概率 parser.add_argument('-merge_with_bert_sort', default='yes', type=str) # 是否融合之前bert模型计算的相似度 parser.add_argument('-k_fold_cache', default='no', type=str) #是否使用之前k_fold的cache parser.add_argument('-generate_candidates', default='', type=str) # 是否融合之前bert模型计算的相似度 parser.add_argument('-seed', default=123456, type=int) # 随机数种子 parser.add_argument('-cls_position', default='zero', type=str) # 添加的两个cls的position是否使用0 parser.add_argument('-pretrained_model_path', default='/home/liangming/nas/lm_params/chinese_L-12_H-768_A-12/', type=str) # bert参数地址 #训练参数 parser.add_argument('-train_batch_size', default=64, type=int) parser.add_argument('-val_batch_size', default=256, type=int) parser.add_argument('-lr', default=2e-5, type=float) parser.add_argument('-epoch_num', default=20, type=int) parser.add_argument('-max_len', default=64, type=int) parser.add_argument('-dropout', default=0.3, type=float) parser.add_argument('-print_loss_step', default=2, type=int) parser.add_argument('-hit_list', default=[2, 5, 7, 10], type=list) args = parser.parse_args() # assert args.train_batch_size % args.neg_num == 0, print('batch size应该是neg_num的整数倍') #定义时间格式 DATE_FORMAT = "%Y-%m-%d-%H:%M:%S" #定义输出文件夹,如果不存在则创建, if args.output_name == '': output_path = os.path.join('./output/rerank_keywords_output', time.strftime(DATE_FORMAT,time.localtime(time.time()))) else: output_path = os.path.join('./output/rerank_keywords_output', args.output_name) # if os.path.exists(output_path): # raise Exception('the output path {} already exists'.format(output_path)) if not os.path.exists(output_path): os.makedirs(output_path) #配置tensorboard tensor_board_log_path = os.path.join(output_path, 'tensor_board_log{}'.format('' if args.k_fold == -1 else args.k_fold)) writer = SummaryWriter(tensor_board_log_path) #定义log参数 logger = Logger(output_path,'main{}'.format('' if args.k_fold == -1 else args.k_fold)).logger #设置seed logger.info('set seed to {}'.format(args.seed)) set_seed(args) #打印args print_args(args, logger) #读取数据 logger.info('#' * 20 + 'loading data and model' + '#' * 20) data_path = os.path.join(project_path, 'candidates') # data_path = os.path.join(project_path, 'tf_idf_candidates') train_list, val_list, test_list, code_to_name, name_to_code, standard_name_list = read_rerank_data(data_path, logger, args) #load model # pretrained_model_path = '/home/liangming/nas/lm_params/chinese_L-12_H-768_A-12/' pretrained_model_path = args.pretrained_model_path bert_config, bert_tokenizer, bert_model = get_pretrained_model(pretrained_model_path, logger) #获取dataset logger.info('create dataloader') train_dataset = RerankKeywordDataset(train_list, bert_tokenizer, args, logger) train_dataloader = DataLoader(train_dataset, batch_size=args.train_batch_size, shuffle=False, collate_fn=train_dataset.collate_fn) val_dataset = RerankKeywordDataset(val_list, bert_tokenizer, args, logger) val_dataloader = DataLoader(val_dataset, batch_size=args.val_batch_size, shuffle=False, collate_fn=val_dataset.collate_fn) test_dataset = RerankKeywordDataset(test_list, bert_tokenizer, args, logger) test_dataloader = DataLoader(test_dataset, batch_size=args.val_batch_size, shuffle=False, collate_fn=test_dataset.collate_fn) #创建model logger.info('create model') model = BertKeywordsClassification(bert_model, bert_config, args) model = model.to(args.device) #配置optimizer和scheduler t_total = len(train_dataloader) * args.epoch_num optimizer, _ = get_optimizer_and_scheduler(model, t_total, args.lr, 0) if args.type == 'train': train(model, train_dataloader, val_dataloader, test_dataloader, optimizer, writer, args, logger, output_path, standard_name_list) elif args.type == 'evaluate': if args.saved_model_path == '': raise Exception('saved model path不能为空') # 非k折模型 if args.k_fold == -1: logger.info('loading saved model') checkpoint = torch.load(args.saved_model_path, map_location='cpu') model.load_state_dict(checkpoint) model = model.to(args.device) # #生成icd标准词的最新embedding evaluate(model, test_dataloader, args, logger, writer, standard_name_list, is_test=True) else: evaluate_k_fold(model, test_dataloader, args, logger, writer, standard_name_list)
def main(): """Main training program.""" # Disable CuDNN. torch.backends.cudnn.enabled = False # Timer. timers = Timers() # Arguments. args = get_args() # Pytorch distributed. initialize_distributed(args) if torch.distributed.get_rank() == 0: print('Pretrain GPT2 model') print_args(args) # Random seeds for reproducability. set_random_seed(args.seed) # Model, optimizer, and learning rate. model, optimizer, lr_scheduler = setup_model_and_optimizer(args) if torch.distributed.get_rank() == 0: print(args.iteration) train_data_iterator, val_data_iterator, test_data_iterator = \ build_train_valid_test_data_iterators( train_valid_test_dataset_provider, args) # Resume data loader if necessary. # if args.resume_dataloader: # if train_data is not None: # train_data.batch_sampler.start_iter = args.iteration % \ # len(train_data) # if val_data is not None: # start_iter_val = (args.train_iters // args.save_interval) * \ # args.eval_interval # val_data.batch_sampler.start_iter = start_iter_val % \ # len(val_data) # if train_data is not None: # train_data_iterator = iter(train_data) # else: # train_data_iterator = None # if val_data is not None: # val_data_iterator = iter(val_data) # else: # val_data_iterator = None # TODO: figure out how to properly set this especially when resuming training iteration = 0 if args.train_iters > 0: iteration, skipped = train(model, optimizer, lr_scheduler, train_data_iterator, val_data_iterator, timers, args) prefix = 'the end of training for val data' val_loss = evaluate_and_print_results(prefix, val_data_iterator, model, args, timers, False) if args.save and iteration != 0: save_checkpoint(iteration, model, optimizer, lr_scheduler, args) # if test_data is not None: # test_data_iterator = iter(test_data) # else: # test_data_iterator = None if args.do_test: # Run on test data. prefix = 'the end of training for test data' evaluate_and_print_results(prefix, test_data_iterator, model, args, timers, True)
def main(): start_time = time.time() init_out_dir() if args.clear_checkpoint: clear_checkpoint() last_step = get_last_checkpoint_step() if last_step >= 0: my_log('\nCheckpoint found: {}\n'.format(last_step)) else: clear_log() print_args() if args.net == 'made': net = MADE(**vars(args)) elif args.net == 'pixelcnn': net = PixelCNN(**vars(args)) elif args.net == 'bernoulli': net = BernoulliMixture(**vars(args)) else: raise ValueError('Unknown net: {}'.format(args.net)) net.to(args.device) my_log('{}\n'.format(net)) params = list(net.parameters()) params = list(filter(lambda p: p.requires_grad, params)) nparams = int(sum([np.prod(p.shape) for p in params])) my_log('Total number of trainable parameters: {}'.format(nparams)) named_params = list(net.named_parameters()) if args.optimizer == 'sgd': optimizer = torch.optim.SGD(params, lr=args.lr) elif args.optimizer == 'sgdm': optimizer = torch.optim.SGD(params, lr=args.lr, momentum=0.9) elif args.optimizer == 'rmsprop': optimizer = torch.optim.RMSprop(params, lr=args.lr, alpha=0.99) elif args.optimizer == 'adam': optimizer = torch.optim.Adam(params, lr=args.lr, betas=(0.9, 0.999)) elif args.optimizer == 'adam0.5': optimizer = torch.optim.Adam(params, lr=args.lr, betas=(0.5, 0.999)) else: raise ValueError('Unknown optimizer: {}'.format(args.optimizer)) if args.lr_schedule: # 0.92**80 ~ 1e-3 scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, factor=0.92, patience=100, threshold=1e-4, min_lr=1e-6) if last_step >= 0: state = torch.load('{}_save/{}.state'.format(args.out_filename, last_step)) ignore_param(state['net'], net) net.load_state_dict(state['net']) if state.get('optimizer'): optimizer.load_state_dict(state['optimizer']) if args.lr_schedule and state.get('scheduler'): scheduler.load_state_dict(state['scheduler']) init_time = time.time() - start_time my_log('init_time = {:.3f}'.format(init_time)) my_log('Training...') sample_time = 0 train_time = 0 start_time = time.time() for step in range(last_step + 1, args.max_step + 1): optimizer.zero_grad() sample_start_time = time.time() with torch.no_grad(): sample, x_hat = net.sample(args.batch_size) assert not sample.requires_grad assert not x_hat.requires_grad sample_time += time.time() - sample_start_time train_start_time = time.time() log_prob = net.log_prob(sample) # 0.998**9000 ~ 1e-8 beta = args.beta * (1 - args.beta_anneal**step) with torch.no_grad(): energy = ising.energy(sample, args.ham, args.lattice, args.boundary) loss = log_prob + beta * energy assert not energy.requires_grad assert not loss.requires_grad loss_reinforce = torch.mean((loss - loss.mean()) * log_prob) loss_reinforce.backward() if args.clip_grad: nn.utils.clip_grad_norm_(params, args.clip_grad) optimizer.step() if args.lr_schedule: scheduler.step(loss.mean()) train_time += time.time() - train_start_time if args.print_step and step % args.print_step == 0: free_energy_mean = loss.mean() / args.beta / args.L**2 free_energy_std = loss.std() / args.beta / args.L**2 entropy_mean = -log_prob.mean() / args.L**2 energy_mean = energy.mean() / args.L**2 mag = sample.mean(dim=0) mag_mean = mag.mean() mag_sqr_mean = (mag**2).mean() if step > 0: sample_time /= args.print_step train_time /= args.print_step used_time = time.time() - start_time my_log( 'step = {}, F = {:.8g}, F_std = {:.8g}, S = {:.8g}, E = {:.8g}, M = {:.8g}, Q = {:.8g}, lr = {:.3g}, beta = {:.8g}, sample_time = {:.3f}, train_time = {:.3f}, used_time = {:.3f}' .format( step, free_energy_mean.item(), free_energy_std.item(), entropy_mean.item(), energy_mean.item(), mag_mean.item(), mag_sqr_mean.item(), optimizer.param_groups[0]['lr'], beta, sample_time, train_time, used_time, )) sample_time = 0 train_time = 0 if args.save_sample: state = { 'sample': sample, 'x_hat': x_hat, 'log_prob': log_prob, 'energy': energy, 'loss': loss, } torch.save(state, '{}_save/{}.sample'.format( args.out_filename, step)) if (args.out_filename and args.save_step and step % args.save_step == 0): state = { 'net': net.state_dict(), 'optimizer': optimizer.state_dict(), } if args.lr_schedule: state['scheduler'] = scheduler.state_dict() torch.save(state, '{}_save/{}.state'.format( args.out_filename, step)) if (args.out_filename and args.visual_step and step % args.visual_step == 0): torchvision.utils.save_image( sample, '{}_img/{}.png'.format(args.out_filename, step), nrow=int(sqrt(sample.shape[0])), padding=0, normalize=True) if args.print_sample: x_hat_np = x_hat.view(x_hat.shape[0], -1).cpu().numpy() x_hat_std = np.std(x_hat_np, axis=0).reshape([args.L] * 2) x_hat_cov = np.cov(x_hat_np.T) x_hat_cov_diag = np.diag(x_hat_cov) x_hat_corr = x_hat_cov / ( sqrt(x_hat_cov_diag[:, None] * x_hat_cov_diag[None, :]) + args.epsilon) x_hat_corr = np.tril(x_hat_corr, -1) x_hat_corr = np.max(np.abs(x_hat_corr), axis=1) x_hat_corr = x_hat_corr.reshape([args.L] * 2) energy_np = energy.cpu().numpy() energy_count = np.stack( np.unique(energy_np, return_counts=True)).T my_log( '\nsample\n{}\nx_hat\n{}\nlog_prob\n{}\nenergy\n{}\nloss\n{}\nx_hat_std\n{}\nx_hat_corr\n{}\nenergy_count\n{}\n' .format( sample[:args.print_sample, 0], x_hat[:args.print_sample, 0], log_prob[:args.print_sample], energy[:args.print_sample], loss[:args.print_sample], x_hat_std, x_hat_corr, energy_count, )) if args.print_grad: my_log('grad max_abs min_abs mean std') for name, param in named_params: if param.grad is not None: grad = param.grad grad_abs = torch.abs(grad) my_log('{} {:.3g} {:.3g} {:.3g} {:.3g}'.format( name, torch.max(grad_abs).item(), torch.min(grad_abs).item(), torch.mean(grad).item(), torch.std(grad).item(), )) else: my_log('{} None'.format(name)) my_log('')
def main(): start_time = time() last_step = get_last_ckpt_step() assert last_step >= 0 my_log(f'Checkpoint found: {last_step}\n') print_args() net_init, net_apply, net_init_cache, net_apply_fast = get_net() params = load_ckpt(last_step) in_shape = (args.batch_size, args.L, args.L, 1) _, cache_init = net_init_cache(params, jnp.zeros(in_shape), (-1, -1)) # sample_raw_fun = get_sample_fun(net_apply, None) sample_raw_fun = get_sample_fun(net_apply_fast, cache_init) # sample_k_fun = get_sample_k_fun(net_apply, None) sample_k_fun = get_sample_k_fun(net_apply_fast, net_init_cache) log_q_fun = get_log_q_fun(net_apply) @jit def update(spins_old, log_q_old, energy_old, step, accept_count, energy_mean, energy_var_sum, rng): rng, rng_k, rng_sample, rng_accept = jrand.split(rng, 4) k = get_k(rng_k) spins = sample_k_fun(k, params, spins_old, rng_sample) log_q = log_q_fun(params, spins) energy = energy_fun(spins) log_uniform = jnp.log(jrand.uniform(rng_accept, (args.batch_size, ))) accept = log_uniform < (log_q_old - log_q + args.beta * (energy_old - energy)) spins = jnp.where(jnp.expand_dims(accept, axis=(1, 2, 3)), spins, spins_old) log_q = jnp.where(accept, log_q, log_q_old) energy = jnp.where(accept, energy, energy_old) mag = spins.mean(axis=(1, 2, 3)) step += 1 accept_count += accept.sum() energy_per_spin = energy / args.L**2 energy_mean, energy_var_sum = welford_update(energy_per_spin.mean(), step, energy_mean, energy_var_sum) return (spins, log_q, energy, mag, accept, k, step, accept_count, energy_mean, energy_var_sum, rng) rng, rng_init = jrand.split(jrand.PRNGKey(args.seed)) # Sample initial configurations from the network spins = sample_raw_fun(args.batch_size, params, rng_init) log_q = log_q_fun(params, spins) energy = energy_fun(spins) step = 0 accept_count = 0 energy_mean = 0 energy_var_sum = 0 data_filename = args.log_filename.replace('.log', '.hdf5') writer_proto = [ # Uncomment to save all the sampled spins # ('spins', bool, (args.batch_size, args.L, args.L)), ('log_q', np.float32, (args.batch_size, )), ('energy', np.int32, (args.batch_size, )), ('mag', np.float32, (args.batch_size, )), ('accept', bool, (args.batch_size, )), ('k', np.int32, None), ] ensure_dir(data_filename) with ChunkedDataWriter(data_filename, writer_proto, args.save_step) as writer: my_log('Sampling...') while step < args.max_step: (spins, log_q, energy, mag, accept, k, step, accept_count, energy_mean, energy_var_sum, rng) = update(spins, log_q, energy, step, accept_count, energy_mean, energy_var_sum, rng) # Uncomment to save all the sampled spins # writer.write(spins[:, :, :, 0] > 0, log_q, energy, mag, accept, k) writer.write(log_q, energy, mag, accept, k) if args.print_step and step % args.print_step == 0: accept_rate = accept_count / (step * args.batch_size) energy_std = jnp.sqrt(energy_var_sum / step) my_log(', '.join([ f'step = {step}', f'P = {accept_rate:.8g}', f'E = {energy_mean:.8g}', f'E_std = {energy_std:.8g}', f'time = {time() - start_time:.3f}', ]))