Beispiel #1
0
def main(args):
    print(args)
    print("Started experiment!")
    utils.print_args(args)
    utils.set_seed(args.seed)

    smoothing_method = {"nist": SmoothingFunction().method3}
    results = {}
    scores = {}
    for name, method in smoothing_method.items():
        scores[name] = utils.evaluate_bleu(
            args.input_file,
            os.path.join("data", "valid.txt"),
            num_real_sentences=args.num_sentences,
            num_generated_sentences=args.num_sentences,
            gram=args.gram,
            smoothing_method=method,
            chunk_size=15)
        print()

    for name in smoothing_method.keys():
        results[name] = {}
        results[name]['scores'] = scores[name]

    print("Results:", results)
    bleu = results['nist']['scores']['bleu5']
    sbleu = results['nist']['scores']['self-bleu5']
    hmean = stats.hmean([bleu, 1.0 / sbleu])
    print("Harmonic Mean:", hmean)
def get_config():
    import argparse
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--mode', '-mode', type=str, default='train')
    parser.add_argument('--gpu_id', '-id', type=str, default='0')
    parser.add_argument('--root_dir',
                        '-sd',
                        type=str,
                        default='/home/caiyi/PycharmProjects/gesture_MP')
    # parser.add_argument('--result_dir', '-rd', type=str, default='/home/caiyi/PycharmProjects/gesture_MP')
    parser.add_argument(
        '--result_dir',
        '-rd',
        type=str,
        default='/home/caiyi/PycharmProjects/gesture_MP/result')
    parser.add_argument('--batch_size', '-bs', type=int, default=64)
    parser.add_argument('--input_size', '-is', type=int, default=64)
    # parser.add_argument('--num_joint', '-nj', type=int, default=14)
    parser.add_argument('--fc_size', '-fc', type=int, default=2048)
    parser.add_argument('--num_class', '-nc', type=int, default=11)
    parser.add_argument('--epoch', '-epoch', type=int, default=100)
    parser.add_argument('--lr_start',
                        '-lr',
                        help='learning rate',
                        type=float,
                        default=1e-3)
    parser.add_argument('--lr_decay_rate', type=float, default=0.9)
    parser.add_argument('--lr_decay_step', type=float, default=100000)
    args = vars(parser.parse_args())
    utils.print_args(args)
    return args
Beispiel #3
0
def get_config():
    import argparse
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--mode', '-mode', type=str, default='pretest')
    parser.add_argument('--gpu_id', '-id', type=str, default='0')
    parser.add_argument('--saved_model_path',
                        '-smp',
                        type=str,
                        default='../../results/')
    parser.add_argument('--batch_size', '-bs', type=int, default=64)
    parser.add_argument('--root_weight', '-w', type=float, default=4)
    parser.add_argument('--n_rounds', '-nr', type=int, default=2000)
    parser.add_argument('--train_iters', '-ni', type=int, default=100)
    parser.add_argument('--dataset', '-data', type=str, default='nyu')
    parser.add_argument('--mrsa_test_fold', '-mtf', type=str, default='P8')
    parser.add_argument('--files_per_time', '-fpt', type=int, default=10)
    parser.add_argument('--samples_per_time', '-spt', type=int, default=2000)
    parser.add_argument('--lr_start',
                        '-lr',
                        help='learning rate',
                        type=float,
                        default=0.0001)
    parser.add_argument('--lr_decay_rate', default=0.99)
    parser.add_argument('--lr_decay_iters', default=3000)
    parser.add_argument('--new_training', '-new', type=bool, default=0)
    parser.add_argument('--test_gap', '-tg', type=int, default=2)
    parser.add_argument('--num_cpus', '-cpus', type=int, default=32)
    args = vars(parser.parse_args())
    utils.print_args(args)
    return args
Beispiel #4
0
def main():
    parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
    parser, parser_create = JobManager.register_arguments(parser)
    parser.add_argument('-o',
                        '--output-dir',
                        type=str,
                        default='./results',
                        help="directory to store the results")
    parser_create.add_argument(
        '--project',
        type=str,
        help='project name for wandb logging (omit to disable)')
    parser_create.add_argument('-s',
                               '--seed',
                               type=int,
                               default=12345,
                               help='initial random seed')
    parser_create.add_argument('-r',
                               '--repeats',
                               type=int,
                               default=10,
                               help="number of experiment iterations")
    parser_create.add_argument('--hyperopt', action='store_true')
    parser_create.add_argument('--LPGNN', action='store_true')
    parser_create.add_argument('--baselines', action='store_true')
    args = parser.parse_args()
    print_args(args)

    JobManager(args, cmd_generator=experiment_generator).run()
Beispiel #5
0
def main():
    # Get the configuration arguments
    args = utils.get_args()
    utils.print_args(args)

    # Allocate a small fraction of GPU and expand the allotted memory as needed
    gpu_options = tf.GPUOptions(allow_growth=True)
    config = tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)

    # Essentially defining global variables. TF_CPP_MIN_LOG_LEVEL equates to '3'
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)

    # Create a session with config options
    with tf.Session(config=config) as sess:
        # initialize the DNN
        mini = MiniNet(sess, args)

        # Gets all variables that have trainable=True
        model_vars = tf.trainable_variables()

        # slim is a library that makes defining, training, and evaluating NNs simple.
        tf.contrib.slim.model_analyzer.analyze_vars(model_vars, print_info=True)

        if args.training == True:
            mini.train()
        else:
            mini.test()
Beispiel #6
0
def get_config():
    import argparse
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--gpu_id', '-id', type=str, default='1')
    parser.add_argument('--saved_model_path', '-smp', type=str, default='../../results/')
    parser.add_argument('--dataset', '-data', type=str, default='icvl')
    parser.add_argument('--mrsa_test_fold', '-mtf', type=str, default='P8')
    parser.add_argument('--batch_size', '-bs', type=int, default=64)
    parser.add_argument('--num_cpus', '-cpus', type=int, default=32)

    parser.add_argument('--n_rounds', '-nr', type=int, default=500)
    parser.add_argument('--train_iters', '-ni', type=int, default=400)
    parser.add_argument('--update_iters', '-ui', type=int, default=40)
    parser.add_argument('--tau', '-tau', type=float, default=0.01)
    parser.add_argument('--files_per_time', '-fpt', type=int, default=5)
    parser.add_argument('--num_batch_samples', '-nb', type=int, default=1023)
    parser.add_argument('--max_iters', '-mi', type=int, default=1)
    parser.add_argument('--test_gap', '-tg', type=int, default=1)

    parser.add_argument('--buffer_size', '-buf', type=int, default=40000)
    parser.add_argument('--actor_lr', '-alr', type=float, default=0.0001)
    parser.add_argument('--critic_lr', '-clr', type=float, default=0.0001)
    parser.add_argument('--step_size', '-step', type=float, default=1.0)
    parser.add_argument('--beta', '-beta', type=float, default=1.0)
    parser.add_argument('--gamma', '-gamma', type=float, default=0.9)
    parser.add_argument('--reward_range', '-range', type=int, default=3)
    args = vars(parser.parse_args())
    utils.print_args(args)
    return args
Beispiel #7
0
def main():
    args = get_arguments()
    utils.print_args(args)
    dataset = data_loading.get_dataset(args.dataset, args.normalize_raw,
                                       args.normalize_reps, args.cifar_path)

    print("Collected arguments and raw dataset.")

    if args.network_type == 'simple':
        input_shape = args.dim_red if args.dim_red is not None else dataset.get_raw_input_shape(
        )
        rounds.add_network_to_vector_rounds(args.rounds,
                                            dataset,
                                            input_shape,
                                            args.neurons,
                                            args.max_iter_optimization,
                                            args.alpha_optimization,
                                            args.n_train,
                                            args.dim_red,
                                            network_type='simple')
    elif args.network_type == 'conv':
        input_shape = dataset.get_raw_input_shape(True)
        rounds.add_network_to_vector_rounds(args.rounds,
                                            dataset,
                                            input_shape,
                                            args.neurons,
                                            args.max_iter_optimization,
                                            args.alpha_optimization,
                                            args.n_train,
                                            None,
                                            network_type='conv')
    else:
        raise ValueError("Network type {} not supported".format(
            args.network_type))

    print("Finished getting Representations")
    print("Getting represented dataset:")
    print("Getting training examples...")
    x, y = dataset.get_training_examples(args.n_train,
                                         dim_reduction=args.dim_red,
                                         print_progress=True)
    print("Getting test examples...")
    x_test, y_test = dataset.get_test_examples(args.n_test,
                                               dim_reduction=args.dim_red,
                                               print_progress=True)

    print("Getting final linear separator")

    w = svm.get_linear_separator(x,
                                 y,
                                 type_of_classifier='sdca',
                                 verbose=2,
                                 alpha=args.alpha_evaluation,
                                 max_iter=args.max_iter_evaluation)

    performance = evaluation.evaluate_model(w, x, y)
    print("train performance is {}".format(performance))
    performance = evaluation.evaluate_model(w, x_test, y_test)
    print("test performance is {}".format(performance))
Beispiel #8
0
def main(args, **model_kwargs):
    device = torch.device(args.device)
    args.device = device
    if args.dataset == 'abilene_tm':
        args.nNodes = 12
        args.day_size = 288
    elif args.dataset == 'geant_tm':
        args.nNodes = 22
        args.day_size = 96
    elif args.dataset == 'brain_tm':
        args.nNodes = 9
    elif 'sinet' in args.dataset:
        args.nNodes = 73
        args.day_size = 288
    else:
        raise ValueError('Dataset not found!')

    test_loader = utils.get_dataloader(args)

    args.test_size, args.nSeries = test_loader.dataset.gt_data_set.shape

    in_dim = 1
    args.in_dim = in_dim

    model = models.get_model(args)
    logger = utils.Logger(args)

    engine = utils.Trainer.from_args(model, test_loader.dataset.scaler, args)

    utils.print_args(args)

    if not args.test:
        test_met_df, x_gt, y_gt, y_real, yhat = engine.test(
            test_loader, engine.model, args.out_seq_len)
        test_met_df.round(6).to_csv(
            os.path.join(logger.log_dir, 'test_metrics.csv'))
        print('Prediction Accuracy:')
        print(utils.summary(logger.log_dir))
        np.save(os.path.join(logger.log_dir, 'x_gt'), x_gt)
        np.save(os.path.join(logger.log_dir, 'y_gt'), y_gt)
        np.save(os.path.join(logger.log_dir, 'y_real'), y_real)
        np.save(os.path.join(logger.log_dir, 'yhat'), yhat)

    else:
        x_gt = np.load(os.path.join(logger.log_dir, 'x_gt.npy'))
        y_gt = np.load(os.path.join(logger.log_dir, 'y_gt.npy'))
        y_real = np.load(os.path.join(logger.log_dir, 'y_real.npy'))
        yhat = np.load(os.path.join(logger.log_dir, 'yhat.npy'))

    if args.plot:
        logger.plot(x_gt, y_real, yhat)

    # run TE
    if args.run_te:
        run_te(x_gt, y_gt, yhat, args)
Beispiel #9
0
def main():
    """Main training program."""

    num_of_gpus = 8
    num_of_layers = 24
    hp = 1024 // num_of_gpus
    d_binglr = torch.load(
        '/relevance2-nfs/romittel/binglr_pretrained_model/pytorch_model.bin')
    emb_per_gpu = d_binglr['bert.embeddings.word_embeddings.weight'].size(
    )[0] // num_of_gpus
    # Disable CuDNN.
    torch.backends.cudnn.enabled = False

    # Timer.
    timers = Timers()

    # Arguments.
    args = get_args()
    file_len = 0
    for line in open(args.valid_data[0], 'r', encoding='utf-8'):
        file_len += 1
    print("file_len= ", file_len)
    # Pytorch distributed.
    initialize_distributed(args)
    if torch.distributed.get_rank() == 0:
        print('Pretrain GPT2 model')
        print_args(args)

    # Random seeds for reproducability.
    set_random_seed(args.seed)

    # Data stuff.
    train_data, val_data, test_data, args.vocab_size, \
        args.eod_token = get_train_val_test_data(args)

    # Model, optimizer, and learning rate.

    model, optimizer, lr_scheduler = setup_model_and_optimizer(args)
    #model.optimizer.dynamic_loss_scale=True
    j = 0
    if j == torch.distributed.get_rank():
        # word_embeddings
        #num_embeddings_per_partition = model.module.module.module.word_embeddings.num_embeddings_per_partition
        #embedding_dim = model.module.module.module.word_embeddings.embedding_dim
        print(model.module.module.module.input_layernorm.bias.size())
        print(d_binglr['bert.embeddings.LayerNorm.bias'].size())
Beispiel #10
0
def main():
    seed_everything(12345)
    logging.getLogger("lightning").setLevel(logging.ERROR)
    logging.captureWarnings(True)

    parser = ArgumentParser()
    parser.add_argument('-d',
                        '--dataset',
                        type=str,
                        choices=available_datasets(),
                        required=True)
    parser.add_argument('-m',
                        '--method',
                        type=str,
                        choices=FeatureTransform.available_methods(),
                        required=True)
    parser.add_argument('-e',
                        '--epsilons',
                        nargs='*',
                        type=float,
                        default=[0.0])
    parser.add_argument('-k', '--steps', nargs='*', type=int, default=[1])
    parser.add_argument('-l',
                        '--label-rates',
                        nargs='*',
                        type=float,
                        default=[1.0])
    parser.add_argument('-r', '--repeats', type=int, default=1)
    parser.add_argument('-o', '--output-dir', type=str, default='./results')
    parser.add_argument('--device',
                        type=str,
                        default='cuda',
                        choices=['cpu', 'cuda'])
    parser = NodeClassifier.add_module_specific_args(parser)
    args = parser.parse_args()

    if args.method in FeatureTransform.private_methods and min(
            args.epsilons) <= 0:
        parser.error('LDP method requires eps > 0.')

    print_args(args)
    start = time.time()
    batch_train_and_test(args)
    end = time.time()
    print('\nTotal time spent:', end - start, 'seconds.\n\n')
def main():
    # Training settings
    parser = argparse.ArgumentParser(description='Parse pytorch records')
    parser.add_argument('--file-list',
                        type=str,
                        help='the file name for the list of ')
    args = parser.parse_args()
    print_args(args)

    best_model = defaultdict(list)

    with open(args.file_list) as list_fp:
        for line in list_fp:
            fn = line.strip()
            count = 0
            print_str = []
            with open(fn) as fp:
                for line in fp:
                    line = line.strip()
                    if len(line) > 5 and line[:5] == 'valid':
                        count = 3
                        print_str = []
                    if count != 0:
                        count -= 1
                        if count == 2:
                            line = line[-4:-1]
                        elif count == 1:
                            line = line.split(' ')[-1]
                        else:
                            line = line.split(',')[-3]
                        print_str.append(line)

                print_str[2] = float(print_str[2])
                mid_dist = print_str[2]
                acc = (print_str[0])
                if len(best_model[acc]) == 0 or best_model[acc][-1] < mid_dist:
                    best_model[acc] = [fn] + print_str

                print(fn, print_str)

    print()
    for acc in sorted(best_model, reverse=True):
        print('acc = {}, # of region = {}, mid dist = {:.4f}, fn = {}'.format(
            best_model[acc][1], best_model[acc][2], best_model[acc][3],
            best_model[acc][0]))
Beispiel #12
0
def main():
    import argparse
    # global hyper parameters
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--use_GPU', help='if use gpu', default=True)
    parser.add_argument('--render',
                        help='render the environment',
                        default=True)

    parser.add_argument('--hid-layers-sizes',
                        help='the sizes of each hidden layer',
                        default=[64, 64])
    parser.add_argument('--model_dir',
                        help='model loading directory',
                        type=str,
                        default='../../mamodel_p/')
    parser.add_argument('--model_name',
                        help='model name for initial model',
                        type=str,
                        default='pmodel_18')
    parser.add_argument('--scenario-name',
                        '-sn',
                        help='scenario name',
                        type=str,
                        default='simple_body_language_pt')
    parser.add_argument('--max-env-steps',
                        help='maximum steps in the env',
                        type=int,
                        default=25)
    parser.add_argument('--time-interval',
                        help="time interval between two steps",
                        type=float,
                        default=0.2)
    parser.add_argument('--num-episodes',
                        help='number of episodes',
                        type=int,
                        default=10)

    args = parser.parse_args()
    print_args(args)
    args = vars(parser.parse_args())
    learn(args)
Beispiel #13
0
def main():
    seed_everything(12345)

    # parse arguments
    parser = ArgumentParser()
    parser.add_argument('-d', '--datasets', nargs='+', choices=available_datasets(), default=available_datasets())
    parser.add_argument('-m', '--methods', nargs='+', choices=available_mechanisms(), default=available_mechanisms())
    parser.add_argument('-e', '--epsilons', nargs='+', type=float, dest='epsilons', required=True)
    parser.add_argument('-a', '--aggs', nargs='*', type=str, default=['gcn'])
    parser.add_argument('-r', '--repeats', type=int, default=1)
    parser.add_argument('-o', '--output-dir', type=str, default='./results')
    parser.add_argument('--device', type=str, default='cuda', choices=['cpu', 'cuda'])
    args = parser.parse_args()

    # check if eps > 0 for LDP methods
    if min(args.epsilons) <= 0:
        parser.error('LDP methods require eps > 0.')

    print_args(args)
    batch_error_estimation(args)
Beispiel #14
0
def main():

    args = utils.get_args()
    utils.print_args(args)

    gpu_options = tf.GPUOptions(allow_growth=True)
    config = tf.ConfigProto(gpu_options=gpu_options,
                            log_device_placement=False)

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)

    with tf.Session(config=config) as sess:
        mini = MiniNet(sess, args)

        model_vars = tf.trainable_variables()
        tf.contrib.slim.model_analyzer.analyze_vars(model_vars,
                                                    print_info=True)

        if bool(int(args.training)):
            mini.train()
Beispiel #15
0
def get_config():
    import argparse
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--mode', '-mode', type=str, default='train')
    parser.add_argument('--gpu_id', '-id', type=str, default='2')
    parser.add_argument('--store_dir',
                        '-sd',
                        type=str,
                        default='/hand_pose_data/nyu')
    # parser.add_argument('--result_dir', '-rd', type=str, default='/home/caiyi/PycharmProjects/noisy_pose_result')
    parser.add_argument('--result_dir',
                        '-rd',
                        type=str,
                        default='/home/data/hands2019_challenge/task1')
    parser.add_argument('--in_channel', type=int, default=1)
    parser.add_argument('--channel_1', type=int, default=4)
    parser.add_argument('--channel_2', type=int, default=8)
    parser.add_argument('--channel_3', type=int, default=16)
    parser.add_argument('--channel_4', type=int, default=32)
    parser.add_argument('--channel_5', type=int, default=64)
    parser.add_argument('--channel_6', type=int, default=128)
    parser.add_argument('--channel_7', type=int, default=256)
    parser.add_argument('--input_dim', type=int, default=1152)
    parser.add_argument('--layer1', type=int, default=256)
    parser.add_argument('--layer2', type=int, default=64)
    parser.add_argument('--output_dim', type=int, default=2)
    parser.add_argument('--batch_size', '-bs', type=int, default=64)
    parser.add_argument('--epoch', '-epoch', type=int, default=20)
    parser.add_argument('--lr_start',
                        '-lr',
                        help='learning rate',
                        type=float,
                        default=0.0005)
    parser.add_argument('--lr_decay_rate', type=float, default=0.9)
    parser.add_argument('--lr_decay_step', type=float, default=20000)
    args = vars(parser.parse_args())
    utils.print_args(args)
    return args
Beispiel #16
0
def main():
    """Main training program."""

    # Disable CuDNN.
    torch.backends.cudnn.enabled = False

    # Timer.
    timers = Timers()

    # Arguments.
    args = get_args()
    file_len = 0
    for line in open(args.valid_data[0], 'r', encoding='utf-8'):
        file_len += 1
    print("file_len= ", file_len)
    # Pytorch distributed.
    initialize_distributed(args)
    if torch.distributed.get_rank() == 0:
        print('Pretrain GPT2 model')
        print_args(args)

    # Random seeds for reproducability.
    set_random_seed(args.seed)

    # Data stuff.
    train_data, val_data, test_data, args.vocab_size, \
        args.eod_token = get_train_val_test_data(args)

    # Model, optimizer, and learning rate.
    model, optimizer, lr_scheduler = setup_model_and_optimizer(args)
    #model.optimizer.dynamic_loss_scale=True

    if val_data is not None:
        val_data_iterator = iter(val_data)
    else:
        val_data_iterator = None

    #TODO: figure out how to properly set this especially when resuming training
    evaluate(val_data_iterator, model, args, timers, file_len, verbose=False)
Beispiel #17
0
def main_nn():
    args = get_network_training_args()
    utils.print_args(args)
    dataset = data_loading.get_dataset(args.dataset, args.normalize_raw, False,
                                       args.cifar_path)
    if args.network_type == 'simple':
        d = args.dim_red if args.dim_red is not None else dataset.get_raw_input_shape(
        )
        model = FCNetwork(d, args.neurons, args.layers)
    else:  # meaning args.network_type == 'conv'
        input_shape = dataset.get_raw_input_shape(True)
        model = ConvNetwork(input_shape,
                            args.neurons,
                            args.layers,
                            args.kernel_size,
                            auto_pad=True)

    model.to(utils.get_device())
    print("Model device is cuda? {}".format(next(model.parameters()).is_cuda))

    x, y = dataset.get_training_examples(
        args.n_train, False,
        args.dim_red if args.network_type == 'simple' else None)
    x_test, y_test = dataset.get_test_examples(
        args.n_test, False,
        args.dim_red if args.network_type == 'simple' else None)
    train_network(model,
                  x,
                  y,
                  x_test,
                  y_test,
                  args.epochs,
                  args.batch_size,
                  optimizer=args.optimizer,
                  lr=args.learning_rate,
                  weight_decay=args.weight_decay,
                  verbose=args.verbose)
Beispiel #18
0
def get_config():
    import argparse
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--gpu_id', '-id', type=str, default='0')
    parser.add_argument('--saved_model_path', '-smp', type=str, default='../results/unmaintained/')
    parser.add_argument('--actor_model_name', '-amn', type=str, default='actor')
    parser.add_argument('--critic_model_name', '-cmn', type=str, default='critic')
    parser.add_argument('--batch_size', '-bs', type=int, default=128)
    parser.add_argument('--n_rounds', '-nr', type=int, default=1000)
    parser.add_argument('--update_iters', '-ui', type=int, default=1000)
    parser.add_argument('--n_iters', '-ni', type=int, default=10)
    parser.add_argument('--iter_per_joint', '-ipj', type=str, default='(5, 3)')
    parser.add_argument('--beta', '-beta', type=float, default=0.1)
    parser.add_argument('--dataset', '-data', type=str, default='icvl')
    parser.add_argument('--mrsa_test_fold', '-mtf', type=str, default='P9')
    parser.add_argument('--files_per_time', '-nfp', type=int, default=1)
    parser.add_argument('--buffer_size', '-buf', type=int, default=100000)

    # parameter of models
    parser.add_argument('--root_actor_cnn_layers', '-racl', type=str, default='(8, 16, 32, 64, 128)')
    parser.add_argument('--root_critic_cnn_layers', '-rccl', type=str, default='(8, 16, 32, 64, 128)')
    parser.add_argument('--root_actor_fc_layers', '-rafl', type=str, default='(256, 32)')
    parser.add_argument('--root_critic_fc_layers', '-rcfl', type=str, default='(64, 6, 32, 64)')
    parser.add_argument('--root_obs_dims', '-rod', type=str, default='(40, 40, 20)')
    parser.add_argument('--tau', '-tau', type=float, default=0.001)
    parser.add_argument('--learning_rate', '-lr', type=float, default=1e-5)

    parser.add_argument('--chain_actor_cnn_layers', '-racl', type=str, default='(8, 16, 32, 64, 128)')
    parser.add_argument('--chain_critic_cnn_layers', '-rccl', type=str, default='(8, 16, 32, 64, 128)')
    parser.add_argument('--chain_actor_fc_layers', '-rafl', type=str, default='(128, 32)')
    parser.add_argument('--chain_critic_fc_layers', '-rcfl', type=str, default='(64, 6, 32, 64)')
    parser.add_argument('--chain_obs_dims', '-rod', type=str, default='(30, 30, 20)')

    args = vars(parser.parse_args())
    utils.print_args(args)
    args = utils.str2int_tuple(args)
    return args
Beispiel #19
0
parser.add_argument("--shuffle", default=True)
parser.add_argument("--num_workers", default=8)
parser.add_argument("--epoch", default=10, type=int)
parser.add_argument("--pre_epoches", default=10, type=int)
parser.add_argument("--snapshot", default="")
parser.add_argument("--lr", default=0.001)
parser.add_argument("--log_interval", default=50)
parser.add_argument("--class_num", default=12)
parser.add_argument("--extract", default=True)
parser.add_argument("--weight_ring", default=0.01)
parser.add_argument("--radius", default=25.0)
parser.add_argument("--model", default='-1', type=str)
parser.add_argument("--post", default='-1', type=str)
parser.add_argument("--repeat", default='-1', type=str)
args = parser.parse_args()
print_args(args)

source_root = os.path.join(args.data_root, args.source)
source_label = os.path.join(args.data_root, args.source + "_list.txt")
target_root = os.path.join(args.data_root, args.target)
target_label = os.path.join(args.data_root, args.target + "6_list.txt")

train_transform = transforms.Compose([
    transforms.Scale((256, 256)),
    transforms.CenterCrop((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

source_set = VisDAImage(source_root, source_label, train_transform)
Beispiel #20
0
        args.path = path_sorted[-1]
        pass
else:
    args.path = '{}-{}'.format(args.path, get_time())
os.system('mkdir -p {}'.format(args.path))

# print args
logging(str(args), path=args.path)

# init tensorboard
writer = SummaryWriter(args.path)

# print config
configuration_setup='SAC-NF'
configuration_setup+='\n'
configuration_setup+=print_args(args)
#for arg in vars(args):
#    configuration_setup+=' {} : {}'.format(str(arg),str(getattr(args, arg)))
#    configuration_setup+='\n'
logging(configuration_setup, path=args.path)

# init sac
agent = SAC(env.observation_space.shape[0], env.action_space, args)
logging("----------------------------------------", path=args.path)
logging(str(agent.zf1), path=args.path)
logging("----------------------------------------", path=args.path)
logging(str(agent.policy), path=args.path)
logging("----------------------------------------", path=args.path)

gaussian_params, nf_params = get_params(agent.policy,args.flow_family)
nf_weights=sum(p.numel() for p in nf_params)
Beispiel #21
0
    np.random.seed(SEED)
    random.seed(SEED)
    # torch.backends.cudnn.deterministic = True

    args.s_dset_path  = './data/ssda/' + args.dset + '/labeled_source_images_' \
        + names[args.s] + '.txt'
    args.lt_dset_path = './data/ssda/' + args.dset + '/labeled_target_images_' \
        + names[args.t] + '_' + str(args.shot) + '.txt'
    args.t_dset_path  = './data/ssda/' + args.dset + '/unlabeled_target_images_' \
        + names[args.t] + '_' + str(args.shot) + '.txt'
    args.vt_dset_path  = './data/ssda/' + args.dset + '/validation_target_images_' \
        + names[args.t] + '_3.txt'

    args.test_dset_path = args.t_dset_path

    args.output_dir = osp.join(
        args.output, 'mixmatch', args.dset,
        names[args.s][0].upper() + names[args.t][0].upper())

    args.name = names[args.s][0].upper() + names[args.t][0].upper()
    if not osp.exists(args.output_dir):
        os.system('mkdir -p ' + args.output_dir)
    if not osp.exists(args.output_dir):
        os.mkdir(args.output_dir)

    args.log = 'mixmatch_' + args.pl + '_' + str(args.shot)
    args.out_file = open(osp.join(args.output_dir, "{:}.txt".format(args.log)),
                         "w")

    utils.print_args(args)
    train(args)
                    type=str,
                    metavar='<str>',
                    default='restaurant',
                    help="domain of the corpus {restaurant, beer}")
parser.add_argument(
    "--ortho-reg",
    dest="ortho_reg",
    type=float,
    metavar='<float>',
    default=0.1,
    help="The weight of orthogonol regularizaiton (default=0.1)")

args = parser.parse_args()
out_dir = args.out_dir_path + '/' + args.domain
U.mkdir_p(out_dir)  # 构造输出目录
U.print_args(args)  # 打印命令行参数

assert args.algorithm in {
    'rmsprop', 'sgd', 'adagrad', 'adadelta', 'adam', 'adamax'
}
assert args.domain in {'restaurant', 'beer'}

if args.seed > 0:
    np.random.seed(args.seed)

# ###############################################################################################################################
# ## Prepare data
# #

from keras.preprocessing import sequence
import reader as dataset
Beispiel #23
0
        sess.run(iterator.initializer)
        while True:
            try:
                item_id, label = model.predict(sess)
                writer.write(indices=[0, 1], values=[item_id, label])

            except tf.errors.OutOfRangeError:
                break
            step += 1
            if step % 10 == 0:
                now_time = time.strftime('%Y-%m-%d-%H:%M:%S', time.localtime(time.time()))
                print('{} predict {:2d} lines'.format(now_time, step*FLAGS.predict_batch_size))
        print("Done. Write output into {}".format(FLAGS.output_file))
        writer.close()

if __name__ == '__main__':
    # Params Preparation
    print_args(FLAGS)
    vocab_table, _, vocab_size = load_vocab(FLAGS.vocab_file)
    FLAGS.vocab_size = vocab_size

    # Model Preparation
    iterator = get_infer_iterator(
        FLAGS.predict_file, vocab_table, FLAGS.predict_batch_size,
        question_max_len=FLAGS.question_max_len,
        answer_max_len=FLAGS.answer_max_len,
    )
    mode = tf.estimator.ModeKeys.PREDICT
    model = parse_model(iterator, FLAGS, mode)
    predict()
Beispiel #24
0
def main(args, **model_kwargs):
    device = torch.device(args.device)
    args.device = device
    if args.dataset == 'abilene_tm':
        args.nNodes = 12
        args.day_size = 288
    elif args.dataset == 'geant_tm':
        args.nNodes = 22
        args.day_size = 96
    elif args.dataset == 'brain_tm':
        args.nNodes = 9
        args.day_size = 1440
    elif 'sinet' in args.dataset:
        args.nNodes = 73
        args.day_size = 288
    else:
        raise ValueError('Dataset not found!')

    train_loader, val_loader, test_loader, graphs, top_k_index = utils.get_dataloader(
        args)

    args.train_size, args.nSeries = train_loader.dataset.X.shape
    args.val_size = val_loader.dataset.X.shape[0]
    args.test_size = test_loader.dataset.X.shape[0]

    in_dim = 1
    if args.tod:
        in_dim += 1
    if args.ma:
        in_dim += 1
    if args.mx:
        in_dim += 1

    args.in_dim = in_dim

    model = models.get_model(args)
    logger = utils.Logger(args)

    engine = utils.Trainer.from_args(model, train_loader.dataset.scaler, \
                        train_loader.dataset.scaler_top_k, args)

    utils.print_args(args)

    if not args.test:
        iterator = trange(args.epochs)

        try:
            if os.path.isfile(logger.best_model_save_path):
                print('Model checkpoint exist!')
                print('Load model checkpoint? (y/n)')
                _in = input()
                if _in == 'y' or _in == 'yes':
                    print('Loading model...')
                    engine.model.load_state_dict(
                        torch.load(logger.best_model_save_path))
                else:
                    print('Training new model')

            for epoch in iterator:
                train_loss, train_rse, train_mae, train_mse, train_mape, train_rmse = [], [], [], [], [], []
                for iter, batch in enumerate(train_loader):

                    # x = batch['x']  # [b, seq_x, n, f]
                    # y = batch['y']  # [b, seq_y, n]

                    x = batch['x_top_k']
                    y = batch['y_top_k']

                    if y.max() == 0: continue
                    loss, rse, mae, mse, mape, rmse = engine.train(x, y)
                    train_loss.append(loss)
                    train_rse.append(rse)
                    train_mae.append(mae)
                    train_mse.append(mse)
                    train_mape.append(mape)
                    train_rmse.append(rmse)

                engine.scheduler.step()
                with torch.no_grad():
                    val_loss, val_rse, val_mae, val_mse, val_mape, val_rmse = engine.eval(
                        val_loader)
                m = dict(train_loss=np.mean(train_loss),
                         train_rse=np.mean(train_rse),
                         train_mae=np.mean(train_mae),
                         train_mse=np.mean(train_mse),
                         train_mape=np.mean(train_mape),
                         train_rmse=np.mean(train_rmse),
                         val_loss=np.mean(val_loss),
                         val_rse=np.mean(val_rse),
                         val_mae=np.mean(val_mae),
                         val_mse=np.mean(val_mse),
                         val_mape=np.mean(val_mape),
                         val_rmse=np.mean(val_rmse))

                description = logger.summary(m, engine.model)

                if logger.stop:
                    break

                description = 'Epoch: {} '.format(epoch) + description
                iterator.set_description(description)
        except KeyboardInterrupt:
            pass

    # Metrics on test data
    engine.model.load_state_dict(torch.load(logger.best_model_save_path))
    with torch.no_grad():
        test_met_df, x_gt, y_gt, y_real, yhat = engine.test(
            test_loader, engine.model, args.out_seq_len)
        test_met_df.round(6).to_csv(
            os.path.join(logger.log_dir, 'test_metrics.csv'))
        print('Prediction Accuracy:')
        print(utils.summary(logger.log_dir))

    if args.plot:
        logger.plot(x_gt, y_real, yhat)

    x_gt = x_gt.cpu().data.numpy()  # [timestep, seq_x, seq_y]
    y_gt = y_gt.cpu().data.numpy()
    yhat = yhat.cpu().data.numpy()

    # run TE
    if args.run_te:
        psi = get_psi(args)
        G = get_G(args)
        R = get_R(args)

        A = np.dot(R * G, psi)
        y_cs = np.zeros(y_gt.shape)

        # for i in range(y_gt.shape[0]):
        #     temp = np.linalg.inv(np.dot(A, A.T))
        #     S = np.dot(np.dot(A.T, temp), yhat[i].T)
        #     y_cs[i] = np.dot(psi, S).T

        for i in range(y_gt.shape[0]):
            m = A.shape[1]
            S = cvx.Variable(m)
            objective = cvx.Minimize(cvx.norm(S, p=0))
            constraint = [yhat[i].T == A * S]

            prob = cvx.Problem(objective, constraint)
            prob.solve()

            y_cs[i] = S.value.reshape(1, m)
        run_te(x_gt, y_gt, y_cs, args)
    default='adam',
    help=
    "Optimization algorithm (rmsprop|sgd|adagrad|adadelta|adam|adamax) (default=adam)"
)
parser.add_argument(
    "--ortho-reg",
    dest="ortho_reg",
    type=float,
    metavar='<float>',
    default=0.1,
    help="The weight of orthogonal regularization (default=0.1)")
args = parser.parse_args()

out_dir = args.out_dir_path + '/' + args.domain
U.mkdir_p(out_dir)
U.print_args(args)

assert args.algorithm in {
    'rmsprop', 'sgd', 'adagrad', 'adadelta', 'adam', 'adamax'
}
# assert args.domain in {'restaurant', 'beer'}

if args.seed > 0:
    np.random.seed(args.seed)

# ###############################################################################################################################
# ## Prepare data
# #

from keras.preprocessing import sequence
import reader as dataset
Beispiel #26
0
def main():
    """Main training program."""

    # Disable CuDNN.
    torch.backends.cudnn.enabled = False

    # Timer.
    timers = Timers()

    # Arguments.
    args = get_args()

    writer = None
    if args.tensorboard_dir and args.rank == 0:
        try:
            from torch.utils.tensorboard import SummaryWriter
            writer = SummaryWriter(log_dir=args.tensorboard_dir)
        except ModuleNotFoundError:
            print_rank_0('WARNING: TensorBoard writing requested but is not '
                         'available (are you using PyTorch 1.1.0 or later?), '
                         'no TensorBoard logs will be written.')
            writer = None

    # Pytorch distributed.
    initialize_distributed(args)
    if torch.distributed.get_rank() == 0:
        print('Pretrain BERT model')
        print_args(args, writer)

    # Autoresume.
    torch.distributed.barrier()
    if args.adlr_autoresume:
        enable_adlr_autoresume(args)

    # Random seeds for reproducability.
    set_random_seed(args.seed)

    # Data stuff.
    train_data, val_data, test_data, args.tokenizer_num_tokens, \
        args.tokenizer_num_type_tokens = get_train_val_test_data(args)

    # Model, optimizer, and learning rate.
    model, optimizer, lr_scheduler = setup_model_and_optimizer(args)

    if args.resume_dataloader:
        if train_data is not None:
            train_data.batch_sampler.start_iter = args.iteration % \
                                                  len(train_data)
            print_rank_0('setting training data start iteration to {}'.format(
                train_data.batch_sampler.start_iter))
        if val_data is not None:
            start_iter_val = (args.iteration // args.eval_interval) * \
                             args.eval_iters
            val_data.batch_sampler.start_iter = start_iter_val % \
                                                len(val_data)
            print_rank_0(
                'setting validation data start iteration to {}'.format(
                    val_data.batch_sampler.start_iter))

    if train_data is not None:
        train_data_iterator = iter(train_data)
    else:
        train_data_iterator = None
    if val_data is not None:
        val_data_iterator = iter(val_data)
    else:
        val_data_iterator = None

    iteration = 0
    if args.train_iters > 0:
        if args.do_train:
            iteration, skipped = train(model, optimizer, lr_scheduler,
                                       train_data_iterator, val_data_iterator,
                                       timers, args, writer)
        if args.do_valid:
            prefix = 'the end of training for val data'
            val_loss = evaluate_and_print_results(prefix, val_data_iterator,
                                                  model, args, writer,
                                                  iteration, timers, False)

    if args.save and iteration != 0:
        save_checkpoint(iteration, model, optimizer, lr_scheduler, args)

    if test_data is not None:
        test_data_iterator = iter(test_data)
    else:
        test_data_iterator = None

    if args.do_test:
        # Run on test data.
        prefix = 'the end of training for test data'
        evaluate_and_print_results(prefix, test_data_iterator, model, args,
                                   None, 0, timers, True)
Beispiel #27
0
def main():
    parser = ArgumentParser()

    #任务配置
    parser.add_argument('-device', default=0, type=int)
    parser.add_argument('-output_name', default='', type=str)
    parser.add_argument('-saved_model_path', default='', type=str) #如果是k fold合并模型进行预测,只需设置为对应k_fold模型对应的output path
    parser.add_argument('-type', default='train', type=str)
    parser.add_argument('-k_fold', default=-1, type=int) #如果是-1则说明不采用k折,否则说明采用k折的第几折
    parser.add_argument('-merge_classification', default='avg', type=str) # 个数预测:vote则采用投票法,avg则是平均概率
    parser.add_argument('-merge_with_bert_sort', default='yes', type=str) # 是否融合之前bert模型计算的相似度
    parser.add_argument('-k_fold_cache', default='no', type=str) #是否使用之前k_fold的cache
    parser.add_argument('-generate_candidates', default='', type=str) # 是否融合之前bert模型计算的相似度
    parser.add_argument('-seed', default=123456, type=int) # 随机数种子
    parser.add_argument('-cls_position', default='zero', type=str) # 添加的两个cls的position是否使用0
    parser.add_argument('-pretrained_model_path', default='/home/liangming/nas/lm_params/chinese_L-12_H-768_A-12/', type=str) # bert参数地址

    #训练参数
    parser.add_argument('-train_batch_size', default=64, type=int)
    parser.add_argument('-val_batch_size', default=256, type=int)
    parser.add_argument('-lr', default=2e-5, type=float)
    parser.add_argument('-epoch_num', default=20, type=int)

    parser.add_argument('-max_len', default=64, type=int)
    parser.add_argument('-dropout', default=0.3, type=float)
    parser.add_argument('-print_loss_step', default=2, type=int)
    parser.add_argument('-hit_list', default=[2, 5, 7, 10], type=list)

    args = parser.parse_args()
    # assert args.train_batch_size % args.neg_num == 0, print('batch size应该是neg_num的整数倍')

    #定义时间格式
    DATE_FORMAT = "%Y-%m-%d-%H:%M:%S"
    #定义输出文件夹,如果不存在则创建, 
    if args.output_name == '':
        output_path = os.path.join('./output/rerank_keywords_output', time.strftime(DATE_FORMAT,time.localtime(time.time())))
    else:
        output_path = os.path.join('./output/rerank_keywords_output', args.output_name)
        # if os.path.exists(output_path):
            # raise Exception('the output path {} already exists'.format(output_path))
    if not os.path.exists(output_path):
        os.makedirs(output_path)

    #配置tensorboard    
    tensor_board_log_path = os.path.join(output_path, 'tensor_board_log{}'.format('' if args.k_fold == -1 else args.k_fold))
    writer = SummaryWriter(tensor_board_log_path)

    #定义log参数
    logger = Logger(output_path,'main{}'.format('' if args.k_fold == -1 else args.k_fold)).logger

    #设置seed
    logger.info('set seed to {}'.format(args.seed))
    set_seed(args)
    
    #打印args
    print_args(args, logger)

    #读取数据
    logger.info('#' * 20 + 'loading data and model' + '#' * 20)
    data_path = os.path.join(project_path, 'candidates')
    # data_path = os.path.join(project_path, 'tf_idf_candidates')
    train_list, val_list, test_list, code_to_name, name_to_code, standard_name_list = read_rerank_data(data_path, logger, args)

    #load model
    # pretrained_model_path = '/home/liangming/nas/lm_params/chinese_L-12_H-768_A-12/'
    pretrained_model_path = args.pretrained_model_path
    bert_config, bert_tokenizer, bert_model = get_pretrained_model(pretrained_model_path, logger)

    #获取dataset
    logger.info('create dataloader')
    train_dataset = RerankKeywordDataset(train_list, bert_tokenizer, args, logger)
    train_dataloader = DataLoader(train_dataset, batch_size=args.train_batch_size, shuffle=False, collate_fn=train_dataset.collate_fn)

    val_dataset = RerankKeywordDataset(val_list, bert_tokenizer, args, logger)
    val_dataloader = DataLoader(val_dataset, batch_size=args.val_batch_size, shuffle=False, collate_fn=val_dataset.collate_fn)

    test_dataset = RerankKeywordDataset(test_list, bert_tokenizer, args, logger)
    test_dataloader = DataLoader(test_dataset, batch_size=args.val_batch_size, shuffle=False, collate_fn=test_dataset.collate_fn)

    #创建model
    logger.info('create model')
    model = BertKeywordsClassification(bert_model, bert_config, args)
    model = model.to(args.device)

    #配置optimizer和scheduler
    t_total = len(train_dataloader) * args.epoch_num
    optimizer, _ = get_optimizer_and_scheduler(model, t_total, args.lr, 0)

    if args.type == 'train':
        train(model, train_dataloader, val_dataloader, test_dataloader, optimizer, writer, args, logger, output_path, standard_name_list)

    elif args.type == 'evaluate':
        if args.saved_model_path == '':
            raise Exception('saved model path不能为空')

        # 非k折模型
        if args.k_fold == -1:
            logger.info('loading saved model')
            checkpoint = torch.load(args.saved_model_path, map_location='cpu')
            model.load_state_dict(checkpoint)
            model = model.to(args.device)
            # #生成icd标准词的最新embedding
            evaluate(model, test_dataloader, args, logger, writer, standard_name_list, is_test=True)

        else:
            evaluate_k_fold(model, test_dataloader, args, logger, writer, standard_name_list)
def main():
    """Main training program."""

    # Disable CuDNN.
    torch.backends.cudnn.enabled = False

    # Timer.
    timers = Timers()

    # Arguments.
    args = get_args()

    # Pytorch distributed.
    initialize_distributed(args)
    if torch.distributed.get_rank() == 0:
        print('Pretrain GPT2 model')
        print_args(args)

    # Random seeds for reproducability.
    set_random_seed(args.seed)

    # Model, optimizer, and learning rate.
    model, optimizer, lr_scheduler = setup_model_and_optimizer(args)
    if torch.distributed.get_rank() == 0:
        print(args.iteration)

    train_data_iterator, val_data_iterator, test_data_iterator = \
            build_train_valid_test_data_iterators(
                    train_valid_test_dataset_provider, args)

    # Resume data loader if necessary.
    # if args.resume_dataloader:
    #    if train_data is not None:
    #        train_data.batch_sampler.start_iter = args.iteration % \
    #                                              len(train_data)
    #    if val_data is not None:
    #        start_iter_val = (args.train_iters // args.save_interval) * \
    #                         args.eval_interval
    #        val_data.batch_sampler.start_iter = start_iter_val % \
    #                                            len(val_data)
    # if train_data is not None:
    #    train_data_iterator = iter(train_data)
    # else:
    #    train_data_iterator = None
    # if val_data is not None:
    #    val_data_iterator = iter(val_data)
    # else:
    #    val_data_iterator = None

    # TODO: figure out how to properly set this especially when resuming training
    iteration = 0
    if args.train_iters > 0:
        iteration, skipped = train(model, optimizer,
                                   lr_scheduler,
                                   train_data_iterator,
                                   val_data_iterator,
                                   timers, args)

        prefix = 'the end of training for val data'
        val_loss = evaluate_and_print_results(prefix, val_data_iterator,
                                                  model, args, timers, False)

    if args.save and iteration != 0:
        save_checkpoint(iteration, model, optimizer, lr_scheduler, args)

    # if test_data is not None:
    #    test_data_iterator = iter(test_data)
    # else:
    #    test_data_iterator = None

    if args.do_test:
        # Run on test data.
        prefix = 'the end of training for test data'
        evaluate_and_print_results(prefix, test_data_iterator,
                                   model, args, timers, True)
Beispiel #29
0
def main():
    start_time = time.time()

    init_out_dir()
    if args.clear_checkpoint:
        clear_checkpoint()
    last_step = get_last_checkpoint_step()
    if last_step >= 0:
        my_log('\nCheckpoint found: {}\n'.format(last_step))
    else:
        clear_log()
    print_args()

    if args.net == 'made':
        net = MADE(**vars(args))
    elif args.net == 'pixelcnn':
        net = PixelCNN(**vars(args))
    elif args.net == 'bernoulli':
        net = BernoulliMixture(**vars(args))
    else:
        raise ValueError('Unknown net: {}'.format(args.net))
    net.to(args.device)
    my_log('{}\n'.format(net))

    params = list(net.parameters())
    params = list(filter(lambda p: p.requires_grad, params))
    nparams = int(sum([np.prod(p.shape) for p in params]))
    my_log('Total number of trainable parameters: {}'.format(nparams))
    named_params = list(net.named_parameters())

    if args.optimizer == 'sgd':
        optimizer = torch.optim.SGD(params, lr=args.lr)
    elif args.optimizer == 'sgdm':
        optimizer = torch.optim.SGD(params, lr=args.lr, momentum=0.9)
    elif args.optimizer == 'rmsprop':
        optimizer = torch.optim.RMSprop(params, lr=args.lr, alpha=0.99)
    elif args.optimizer == 'adam':
        optimizer = torch.optim.Adam(params, lr=args.lr, betas=(0.9, 0.999))
    elif args.optimizer == 'adam0.5':
        optimizer = torch.optim.Adam(params, lr=args.lr, betas=(0.5, 0.999))
    else:
        raise ValueError('Unknown optimizer: {}'.format(args.optimizer))

    if args.lr_schedule:
        # 0.92**80 ~ 1e-3
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, factor=0.92, patience=100, threshold=1e-4, min_lr=1e-6)

    if last_step >= 0:
        state = torch.load('{}_save/{}.state'.format(args.out_filename,
                                                     last_step))
        ignore_param(state['net'], net)
        net.load_state_dict(state['net'])
        if state.get('optimizer'):
            optimizer.load_state_dict(state['optimizer'])
        if args.lr_schedule and state.get('scheduler'):
            scheduler.load_state_dict(state['scheduler'])

    init_time = time.time() - start_time
    my_log('init_time = {:.3f}'.format(init_time))

    my_log('Training...')
    sample_time = 0
    train_time = 0
    start_time = time.time()
    for step in range(last_step + 1, args.max_step + 1):
        optimizer.zero_grad()

        sample_start_time = time.time()
        with torch.no_grad():
            sample, x_hat = net.sample(args.batch_size)
        assert not sample.requires_grad
        assert not x_hat.requires_grad
        sample_time += time.time() - sample_start_time

        train_start_time = time.time()

        log_prob = net.log_prob(sample)
        # 0.998**9000 ~ 1e-8
        beta = args.beta * (1 - args.beta_anneal**step)
        with torch.no_grad():
            energy = ising.energy(sample, args.ham, args.lattice,
                                  args.boundary)
            loss = log_prob + beta * energy
        assert not energy.requires_grad
        assert not loss.requires_grad
        loss_reinforce = torch.mean((loss - loss.mean()) * log_prob)
        loss_reinforce.backward()

        if args.clip_grad:
            nn.utils.clip_grad_norm_(params, args.clip_grad)

        optimizer.step()

        if args.lr_schedule:
            scheduler.step(loss.mean())

        train_time += time.time() - train_start_time

        if args.print_step and step % args.print_step == 0:
            free_energy_mean = loss.mean() / args.beta / args.L**2
            free_energy_std = loss.std() / args.beta / args.L**2
            entropy_mean = -log_prob.mean() / args.L**2
            energy_mean = energy.mean() / args.L**2
            mag = sample.mean(dim=0)
            mag_mean = mag.mean()
            mag_sqr_mean = (mag**2).mean()
            if step > 0:
                sample_time /= args.print_step
                train_time /= args.print_step
            used_time = time.time() - start_time
            my_log(
                'step = {}, F = {:.8g}, F_std = {:.8g}, S = {:.8g}, E = {:.8g}, M = {:.8g}, Q = {:.8g}, lr = {:.3g}, beta = {:.8g}, sample_time = {:.3f}, train_time = {:.3f}, used_time = {:.3f}'
                .format(
                    step,
                    free_energy_mean.item(),
                    free_energy_std.item(),
                    entropy_mean.item(),
                    energy_mean.item(),
                    mag_mean.item(),
                    mag_sqr_mean.item(),
                    optimizer.param_groups[0]['lr'],
                    beta,
                    sample_time,
                    train_time,
                    used_time,
                ))
            sample_time = 0
            train_time = 0

            if args.save_sample:
                state = {
                    'sample': sample,
                    'x_hat': x_hat,
                    'log_prob': log_prob,
                    'energy': energy,
                    'loss': loss,
                }
                torch.save(state, '{}_save/{}.sample'.format(
                    args.out_filename, step))

        if (args.out_filename and args.save_step
                and step % args.save_step == 0):
            state = {
                'net': net.state_dict(),
                'optimizer': optimizer.state_dict(),
            }
            if args.lr_schedule:
                state['scheduler'] = scheduler.state_dict()
            torch.save(state, '{}_save/{}.state'.format(
                args.out_filename, step))

        if (args.out_filename and args.visual_step
                and step % args.visual_step == 0):
            torchvision.utils.save_image(
                sample,
                '{}_img/{}.png'.format(args.out_filename, step),
                nrow=int(sqrt(sample.shape[0])),
                padding=0,
                normalize=True)

            if args.print_sample:
                x_hat_np = x_hat.view(x_hat.shape[0], -1).cpu().numpy()
                x_hat_std = np.std(x_hat_np, axis=0).reshape([args.L] * 2)

                x_hat_cov = np.cov(x_hat_np.T)
                x_hat_cov_diag = np.diag(x_hat_cov)
                x_hat_corr = x_hat_cov / (
                    sqrt(x_hat_cov_diag[:, None] * x_hat_cov_diag[None, :]) +
                    args.epsilon)
                x_hat_corr = np.tril(x_hat_corr, -1)
                x_hat_corr = np.max(np.abs(x_hat_corr), axis=1)
                x_hat_corr = x_hat_corr.reshape([args.L] * 2)

                energy_np = energy.cpu().numpy()
                energy_count = np.stack(
                    np.unique(energy_np, return_counts=True)).T

                my_log(
                    '\nsample\n{}\nx_hat\n{}\nlog_prob\n{}\nenergy\n{}\nloss\n{}\nx_hat_std\n{}\nx_hat_corr\n{}\nenergy_count\n{}\n'
                    .format(
                        sample[:args.print_sample, 0],
                        x_hat[:args.print_sample, 0],
                        log_prob[:args.print_sample],
                        energy[:args.print_sample],
                        loss[:args.print_sample],
                        x_hat_std,
                        x_hat_corr,
                        energy_count,
                    ))

            if args.print_grad:
                my_log('grad max_abs min_abs mean std')
                for name, param in named_params:
                    if param.grad is not None:
                        grad = param.grad
                        grad_abs = torch.abs(grad)
                        my_log('{} {:.3g} {:.3g} {:.3g} {:.3g}'.format(
                            name,
                            torch.max(grad_abs).item(),
                            torch.min(grad_abs).item(),
                            torch.mean(grad).item(),
                            torch.std(grad).item(),
                        ))
                    else:
                        my_log('{} None'.format(name))
                my_log('')
def main():
    start_time = time()
    last_step = get_last_ckpt_step()
    assert last_step >= 0
    my_log(f'Checkpoint found: {last_step}\n')
    print_args()

    net_init, net_apply, net_init_cache, net_apply_fast = get_net()

    params = load_ckpt(last_step)
    in_shape = (args.batch_size, args.L, args.L, 1)
    _, cache_init = net_init_cache(params, jnp.zeros(in_shape), (-1, -1))

    # sample_raw_fun = get_sample_fun(net_apply, None)
    sample_raw_fun = get_sample_fun(net_apply_fast, cache_init)
    # sample_k_fun = get_sample_k_fun(net_apply, None)
    sample_k_fun = get_sample_k_fun(net_apply_fast, net_init_cache)
    log_q_fun = get_log_q_fun(net_apply)

    @jit
    def update(spins_old, log_q_old, energy_old, step, accept_count,
               energy_mean, energy_var_sum, rng):
        rng, rng_k, rng_sample, rng_accept = jrand.split(rng, 4)
        k = get_k(rng_k)
        spins = sample_k_fun(k, params, spins_old, rng_sample)
        log_q = log_q_fun(params, spins)
        energy = energy_fun(spins)

        log_uniform = jnp.log(jrand.uniform(rng_accept, (args.batch_size, )))
        accept = log_uniform < (log_q_old - log_q + args.beta *
                                (energy_old - energy))

        spins = jnp.where(jnp.expand_dims(accept, axis=(1, 2, 3)), spins,
                          spins_old)
        log_q = jnp.where(accept, log_q, log_q_old)
        energy = jnp.where(accept, energy, energy_old)
        mag = spins.mean(axis=(1, 2, 3))

        step += 1
        accept_count += accept.sum()
        energy_per_spin = energy / args.L**2
        energy_mean, energy_var_sum = welford_update(energy_per_spin.mean(),
                                                     step, energy_mean,
                                                     energy_var_sum)

        return (spins, log_q, energy, mag, accept, k, step, accept_count,
                energy_mean, energy_var_sum, rng)

    rng, rng_init = jrand.split(jrand.PRNGKey(args.seed))
    # Sample initial configurations from the network
    spins = sample_raw_fun(args.batch_size, params, rng_init)
    log_q = log_q_fun(params, spins)
    energy = energy_fun(spins)

    step = 0
    accept_count = 0
    energy_mean = 0
    energy_var_sum = 0

    data_filename = args.log_filename.replace('.log', '.hdf5')
    writer_proto = [
        # Uncomment to save all the sampled spins
        # ('spins', bool, (args.batch_size, args.L, args.L)),
        ('log_q', np.float32, (args.batch_size, )),
        ('energy', np.int32, (args.batch_size, )),
        ('mag', np.float32, (args.batch_size, )),
        ('accept', bool, (args.batch_size, )),
        ('k', np.int32, None),
    ]
    ensure_dir(data_filename)
    with ChunkedDataWriter(data_filename, writer_proto,
                           args.save_step) as writer:
        my_log('Sampling...')
        while step < args.max_step:
            (spins, log_q, energy, mag, accept, k, step, accept_count,
             energy_mean, energy_var_sum,
             rng) = update(spins, log_q, energy, step, accept_count,
                           energy_mean, energy_var_sum, rng)
            # Uncomment to save all the sampled spins
            # writer.write(spins[:, :, :, 0] > 0, log_q, energy, mag, accept, k)
            writer.write(log_q, energy, mag, accept, k)

            if args.print_step and step % args.print_step == 0:
                accept_rate = accept_count / (step * args.batch_size)
                energy_std = jnp.sqrt(energy_var_sum / step)
                my_log(', '.join([
                    f'step = {step}',
                    f'P = {accept_rate:.8g}',
                    f'E = {energy_mean:.8g}',
                    f'E_std = {energy_std:.8g}',
                    f'time = {time() - start_time:.3f}',
                ]))