Example #1
0
def main():
    HERE = os.path.dirname(__file__)
    # Import MNIST data
    sys.path.append(
        os.path.realpath(os.path.join(HERE, '..', '..', 'vision', 'mnist')))
    from mnist_data import data_iterator_mnist
    from args import get_args
    from classification import mnist_lenet_prediction, mnist_resnet_prediction

    args = get_args(description=__doc__)

    mnist_cnn_prediction = mnist_lenet_prediction
    if args.net == 'resnet':
        mnist_cnn_prediction = mnist_resnet_prediction

    # Infer parameter file name and read it.
    model_save_path = os.path.join('../../vision/mnist',
                                   args.model_save_path)
    parameter_file = os.path.join(
        model_save_path,
        '{}_params_{:06}.h5'.format(args.net, args.max_iter))
    try:
        nn.load_parameters(parameter_file)
    except IOError:
        logger.error("Run classification.py before runnning this script.")
        exit(1)

    # Create a computation graph to be saved.
    image = nn.Variable([args.batch_size, 1, 28, 28])
    pred = mnist_cnn_prediction(image, test=True)

    # Save NNP file (used in C++ inference later.).
    nnp_file = '{}_{:06}.nnp'.format(args.net, args.max_iter)
    runtime_contents = {
        'networks': [
            {'name': 'runtime',
             'batch_size': args.batch_size,
             'outputs': {'y': pred},
             'names': {'x': image}}],
        'executors': [
            {'name': 'runtime',
             'network': 'runtime',
             'data': ['x'],
             'output': ['y']}]}
    nn.utils.save.save(nnp_file, runtime_contents)
def train():
    """
    Naive Multi-Device Training

    NOTE: the communicator exposes low-level interfaces

    * Parse command line arguments.
    * Instantiate a communicator and set parameter variables.
    * Specify contexts for computation.
    * Initialize DataIterator.
    * Construct a computation graph for training and one for validation.
    * Initialize solver and set parameter variables to that.
    * Create monitor instances for saving and displaying training stats.
    * Training loop
      * Computate error rate for validation data (periodically)
      * Get a next minibatch.
      * Execute forwardprop
      * Set parameter gradients zero
      * Execute backprop.
      * Inplace allreduce (THIS IS THE MAIN difference from a single device training)
      * Solver updates parameters by using gradients computed by backprop.
      * Compute training error
    """
    # Parse args
    args = get_args()
    n_train_samples = 50000
    bs_valid = args.batch_size

    # Communicator and Context
    extension_module = "cuda.cudnn"
    ctx = extension_context(extension_module)
    comm = C.MultiProcessDataParalellCommunicator(ctx)
    comm.init()
    n_devices = comm.size
    mpi_rank = comm.rank
    device_id = mpi_rank
    ctx = extension_context(extension_module, device_id=device_id)

    # Create training graphs
    test = False
    image_train = nn.Variable((args.batch_size, 3, 32, 32))
    label_train = nn.Variable((args.batch_size, 1))
    pred_train = cifar100_resnet23_prediction(
        image_train, ctx, test)
    loss_train = cifar100_resnet32_loss(pred_train, label_train)
    input_image_train = {"image": image_train, "label": label_train}

    # add parameters to communicator
    comm.add_context_and_parameters((ctx, nn.get_parameters()))

    # Create validation graph
    test = True
    image_valid = nn.Variable((bs_valid, 3, 32, 32))
    pred_valid = cifar100_resnet23_prediction(
        image_valid, ctx, test)
    input_image_valid = {"image": image_valid}

    # Solvers
    solver = S.Adam()
    solver.set_parameters(nn.get_parameters())
    base_lr = args.learning_rate
    warmup_iter = int(1. * n_train_samples /
                      args.batch_size / n_devices) * args.warmup_epoch
    warmup_slope = 1. * n_devices / warmup_iter

    # Create monitor
    from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed
    monitor = Monitor(args.monitor_path)
    monitor_loss = MonitorSeries("Training loss", monitor, interval=10)
    monitor_err = MonitorSeries("Training error", monitor, interval=10)
    monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100)
    monitor_verr = MonitorSeries("Test error", monitor, interval=10)
    with data_iterator_cifar100(args.batch_size, True) as tdata, \
            data_iterator_cifar100(bs_valid, False) as vdata:
        # Training-loop
        for i in range(int(args.max_iter / n_devices)):
            # Validation
            if mpi_rank == 0:
                if i % int(n_train_samples / args.batch_size / n_devices) == 0:
                    ve = 0.
                    for j in range(args.val_iter):
                        image, label = vdata.next()
                        input_image_valid["image"].d = image
                        pred_valid.forward()
                        ve += categorical_error(pred_valid.d, label)
                    ve /= args.val_iter
                    monitor_verr.add(i * n_devices, ve)
                if i % int(args.model_save_interval / n_devices) == 0:
                    nn.save_parameters(os.path.join(
                        args.model_save_path, 'params_%06d.h5' % i))

            # Forward/Zerograd/Backward
            image, label = tdata.next()
            input_image_train["image"].d = image
            input_image_train["label"].d = label
            loss_train.forward()
            solver.zero_grad()
            loss_train.backward()

            # In-place Allreduce
            comm.allreduce(division=True)

            # Solvers update
            solver.update()

            # Linear Warmup
            if i < warmup_iter:
                lr = base_lr * n_devices * warmup_slope * i
                solver.set_learning_rate(lr)
            else:
                lr = base_lr * n_devices
                solver.set_learning_rate(lr)

            if mpi_rank == 0:
                e = categorical_error(
                    pred_train.d, input_image_train["label"].d)
                monitor_loss.add(i * n_devices, loss_train.d.copy())
                monitor_err.add(i * n_devices, e)
                monitor_time.add(i * n_devices)
    if mpi_rank == 0:
        nn.save_parameters(os.path.join(
            args.model_save_path,
            'params_%06d.h5' % (args.max_iter / n_devices)))
def main():
    args = get_args()
    dir_name = "results/%s/%s-%s"%(args.env, "3setup", strftime("%m_%d_%H_%M", gmtime()))
    os.makedirs(dir_name, exist_ok=True)
    logfile = open(dir_name+"/log.txt", "w")

    with open(os.path.join(dir_name,'args.txt'), 'w') as f:
        json.dump(args.__dict__, f, indent=2)

    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    env = gym.make(args.env)
    #env = outer_env.wrapped_env

    num_hidden = args.hidden_size
    VARIANCE = args.var
    iter_steps = args.iter_steps
    
    # has to be neural network policy
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    N_SAMPLES = args.n_samples if args.n_samples>0 else int(env.observation_space.shape[0]*4)
    LOW_REW_SET = int(N_SAMPLES*0.2)
    TOP_N_CONSTRIANTS = int(N_SAMPLES*1.5)
    
    def make_policy(mean, var):
        if mean is not None:
            mean = torch.Tensor(mean).to(device)
        if var is not None:
            var = torch.Tensor(var).to(device)
        return Policy_quad_norm(env.observation_space.shape[0],
                            env.action_space.shape[0],
                            num_hidden=num_hidden, 
                            mean=mean, 
                            var=var).to(device)

    print('Using device:', device)

    sample_policy, sample_eval = make_policy(None, None), -1700

    replay_buffer = Replay_buffer(args.gamma)

    dynamics = DynamicsEnsemble(args.env, num_models=3)

    ep_no_improvement = 0


    for i_episode in count(1):

        # hack
        if ep_no_improvement > 3:
            N_SAMPLES = int(N_SAMPLES * 1.2)
            TOP_N_CONSTRIANTS = int(N_SAMPLES*1.5) #-1
            LOW_REW_SET = int(LOW_REW_SET*1.2)
            iter_steps = TOP_N_CONSTRIANTS*2

            if VARIANCE>1e-4:
                VARIANCE = VARIANCE/1.2
                print("Updated Var to: %.3f"%(VARIANCE))
            ep_no_improvement = 0

        print("constraints: {}, to correct: {}".format(N_SAMPLES, TOP_N_CONSTRIANTS))
        # Exploration
        num_steps = 0
        explore_episodes = 0
        explore_rew =0
        state_action_rew = []
        lowest_rew = []

        while num_steps < iter_steps:
            state = env.reset()
            for t in range(1000): 
                action = sample_policy.select_action(state, VARIANCE)
                action = action.flatten()
                name_str = "expl_var" #explore
                next_state, reward, done, _ = env.step(action)
                explore_rew += reward

                replay_buffer.push((state,next_state,action, reward, done, (name_str, explore_episodes, t))) 

                if args.correct and i_episode>0:
                    if (args.env == "Hopper-v2" or args.env == "Walker2d-v2") and done:
                        reward = float('-inf')
                    if len(state_action_rew) < LOW_REW_SET:# or (args.env == "Hopper-v2" or args.env == "Walker2d-v2" and done):
                        state_action_rew.append([state,action,reward])
                        lowest_rew.append(reward)
                    elif reward < max(lowest_rew):
                        state_action_rew = sorted(state_action_rew, key=lambda l: l[2]) #sort by reward
                        state_action_rew[-1] = [state,action,reward]
                        lowest_rew.remove(max(lowest_rew))
                        lowest_rew.append(reward)

                if done:
                    break
                
                state = next_state
            
            num_steps += (t-1)
            explore_episodes += 1

        explore_rew /= explore_episodes
        print('\nEpisode {}\tExplore reward: {:.2f}\tAverage ep len: {:.1f}\n'.format(i_episode, explore_rew, num_steps/explore_episodes))

        # do corrections. 
        low_rew_constraints_set = []
        if args.correct and i_episode>1:
            print("exploring better actions", len(state_action_rew))
            #sample possible corrections
            for s, a, r in state_action_rew:
                max_a, _ = run_cem(dynamics, s)
                low_rew_constraints_set.append((s, max_a, "bad_states", 0, 0))

        # Train Dynamics
        X, Y, A, _, _, _ = replay_buffer.sample(-1)

        if i_episode!=1:
            print("Previous model evaluation:", dynamics.get_accuracy(X,Y,A))

        if len(X) <1500:
            X = np.concatenate([X, prev_X])
            X = X if len(X)<1500 else X[:1500]
            Y = np.concatenate([Y, prev_Y])
            Y = Y if len(Y)<1500 else Y[:1500]
            A = np.concatenate([A, prev_A])
            A = A if len(A)<1500 else A[:1500]

        dynamics.fit(X, Y, A, epoch=args.model_training_epoch)
        
        prev_X, prev_Y, prev_A =  X, Y, A

        best_tuples = replay_buffer.best_state_actions_replace(top_n_constraints=TOP_N_CONSTRIANTS, by='rewards', discard = True)

        mean, var = replay_buffer.get_mean_var()

        # support
        num_support = int(N_SAMPLES*0.7)
        support_states = np.random.uniform(low=-5, high=5, size=[num_support, 
                                                    env.observation_space.shape[0]])
        confidence = sorted([(x, dynamics.get_uncertainty(x, 
                                sample_policy.select_action(x, 0)[0])) 
                            for x in support_states],
                            key = lambda t: t[1])
        support_tuples = []
        print("confidence here:")
        print(confidence[:5])
        sliice = int(len(confidence)/2)
        for s, conf in confidence:
            if conf < 10: #arbitrary bound. for later
                max_a, _ = run_cem(dynamics, s)
                support_tuples.append((s, max_a, "model", 0, 0))
            else:
                a = sample_policy.select_action(s, 0)[0].tolist()
                support_tuples.append((s, a, "support", 0, 0))

        # sample and solve
        max_policy, max_eval, max_set = sample_policy, sample_eval, best_tuples
        branch_buffer = Replay_buffer(args.gamma)

        print(TOP_N_CONSTRIANTS)
        print(len(best_tuples))
        print(len(low_rew_constraints_set))
        
        for branch in range(args.branches):

            branch_policy = make_policy(None, None)
            branch_buffer = Replay_buffer(args.gamma)

            if N_SAMPLES >= len(best_tuples): 
                constraints = best_tuples
            else:   
                constraints = random.sample(best_tuples+support_tuples, N_SAMPLES)

            # Get metadata of constraints
            states, actions, info, rewards, _ = zip(*constraints)
            print("ep %d b %d: %d constraints mean: %.3f  std: %.3f  max: %.3f" % ( i_episode, branch, len(constraints), np.mean(rewards), np.std(rewards), max(rewards)))
            
            print(info)

            if isinstance(states[0], torch.Tensor):
                states = torch.cat(states)
            else:
                states = torch.Tensor(states)
            
            if isinstance(actions[0], torch.Tensor):
                actions = torch.cat(actions)
            else:
                actions = torch.Tensor(actions)

            branch_policy.train(states.to(device), actions.to(device), epoch=args.training_epoch)
           
            # Evaluate
            eval_rew = 0
            for i in range(EVAL_TRAJ):
                state, done = env.reset(), False
                step = 0
                while not done: # Don't infinite loop while learning
                    action = branch_policy.select_action(state,0)
                    action = action.flatten()
                    next_state, reward, done, _ = env.step(action)
                    eval_rew += reward
                    branch_buffer.push((state, next_state, action, reward, done, ("eval", i, step))) 
                    state = next_state
                    step += 1
                    if args.render:
                        env.render()
                    if done:
                        break
            eval_rew /= EVAL_TRAJ

            #log
            print('Episode {}\tBranch: {}\tEval reward: {:.2f}\tExplore reward: {:.2f}'.format(
                i_episode, branch, eval_rew, explore_rew))
            logfile.write('Episode {}\tBranch: {}\tConstraints:{}\tEval reward: {:.2f}\n'.format(i_episode, branch, len(constraints), eval_rew))

            if eval_rew > max_eval:
                print("updated to this policy")
                max_eval, max_policy, max_set = eval_rew, branch_policy, constraints
                replay_buffer = branch_buffer

        # the end of branching
        if max_eval > sample_eval:
            with open("%s/%d_constraints.p"%(dir_name,i_episode), "wb") as f:
                pickle.dump({"all": best_tuples, "constraints": max_set}, f)

            with open("%s/%d_policy.p"%(dir_name,i_episode), 'wb') as out:
                policy_state_dict = OrderedDict({k:v.to('cpu') for k, v in max_policy.state_dict().items()})
                pickle.dump(policy_state_dict, out)

            sample_policy, sample_eval = max_policy, max_eval
            ep_no_improvement = 0
        else:
            ep_no_improvement +=1

        if i_episode>50:
            break
Example #4
0
def main():
    args = get_args()
    save_args(args)
    train(args)
Example #5
0
def train():
    """
    Main script.

    Steps:

    * Parse command line arguments.
    * Specify contexts for computation.
    * Initialize DataIterator.
    * Construct a computation graph for training and one for validation.
    * Initialize solver and set parameter variables to that.
    * Create monitor instances for saving and displaying training stats.
    * Training loop
      * Computate error rate for validation data (periodically)
      * Get a next minibatch.
      * Execute forwardprop
      * Set parameter gradients zero
      * Execute backprop.
      * Solver updates parameters by using gradients computed by backprop.
      * Compute training error
    """
    # Parse args
    args = get_args()
    n_train_samples = 50000
    bs_valid = args.batch_size
    extension_module = args.context
    ctx = get_extension_context(extension_module,
                                device_id=args.device_id,
                                type_config=args.type_config)
    nn.set_default_context(ctx)
    if args.net == "cifar10_resnet23":
        prediction = functools.partial(resnet23_prediction,
                                       ncls=10,
                                       nmaps=64,
                                       act=F.relu)
        data_iterator = data_iterator_cifar10
    if args.net == "cifar100_resnet23":
        prediction = functools.partial(resnet23_prediction,
                                       ncls=100,
                                       nmaps=384,
                                       act=F.elu)
        data_iterator = data_iterator_cifar100

    # Create training graphs
    test = False
    image_train = nn.Variable((args.batch_size, 3, 32, 32))
    label_train = nn.Variable((args.batch_size, 1))
    pred_train = prediction(image_train, test)
    loss_train = loss_function(pred_train, label_train)
    input_image_train = {"image": image_train, "label": label_train}

    # Create validation graph
    test = True
    image_valid = nn.Variable((bs_valid, 3, 32, 32))
    pred_valid = prediction(image_valid, test)
    input_image_valid = {"image": image_valid}

    # Solvers
    solver = S.Adam()
    solver.set_parameters(nn.get_parameters())

    # Create monitor
    from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed
    monitor = Monitor(args.monitor_path)
    monitor_loss = MonitorSeries("Training loss", monitor, interval=10)
    monitor_err = MonitorSeries("Training error", monitor, interval=10)
    monitor_time = MonitorTimeElapsed("Training time", monitor, interval=10)
    monitor_verr = MonitorSeries("Test error", monitor, interval=1)

    # Data Iterator
    tdata = data_iterator(args.batch_size, True)
    vdata = data_iterator(args.batch_size, False)

    # Training-loop
    for i in range(args.max_iter):
        # Validation
        if i % int(n_train_samples / args.batch_size) == 0:
            ve = 0.
            for j in range(args.val_iter):
                image, label = vdata.next()
                input_image_valid["image"].d = image
                pred_valid.forward()
                ve += categorical_error(pred_valid.d, label)
            ve /= args.val_iter
            monitor_verr.add(i, ve)
        if int(i % args.model_save_interval) == 0:
            nn.save_parameters(
                os.path.join(args.model_save_path, 'params_%06d.h5' % i))

        # Forward/Zerograd/Backward
        image, label = tdata.next()
        input_image_train["image"].d = image
        input_image_train["label"].d = label
        loss_train.forward()
        solver.zero_grad()
        loss_train.backward()

        # Solvers update
        solver.update()

        e = categorical_error(pred_train.d, input_image_train["label"].d)
        monitor_loss.add(i, loss_train.d.copy())
        monitor_err.add(i, e)
        monitor_time.add(i)

    nn.save_parameters(
        os.path.join(args.model_save_path, 'params_%06d.h5' % (args.max_iter)))
Example #6
0
from pathlib import Path

import torch
import numpy as np
from allennlp.models import Model
from allennlp.data.vocabulary import Vocabulary
from allennlp.common import JsonDict
from pytorch_transformers.optimization import AdamW

import args
import readers
import common
from predictor import McScriptPredictor
from util import example_input, is_cuda, train_model, load_data, print_args

ARGS = args.get_args()
common.set_args(ARGS)


def make_prediction(model: Model,
                    reader: readers.BaseReader,
                    verbose: bool = False) -> JsonDict:
    "Create a predictor to run our model and get predictions."
    model.eval()
    predictor = McScriptPredictor(model, reader)

    if verbose:
        print()
        print('#' * 5, 'EXAMPLE', '#' * 5)

    passage, question, answer1, label1 = example_input(0)
def train():
    """
    Main script.

    Steps:

    * Parse command line arguments.
    * Specify a context for computation.
    * Initialize DataIterator for MNIST.
    * Construct a computation graph for training and validation.
    * Initialize a solver and set parameter variables to it.
    * Create monitor instances for saving and displaying training stats.
    * Training loop
      * Computate error rate for validation data (periodically)
      * Get a next minibatch.
      * Execute forwardprop on the training graph.
      * Compute training error
      * Set parameter gradients zero
      * Execute backprop.
      * Solver updates parameters by using gradients computed by backprop.
    """
    args = get_args()

    from numpy.random import seed
    seed(0)

    # Get context.
    from nnabla.ext_utils import get_extension_context
    logger.info("Running in %s" % args.context)
    ctx = get_extension_context(
        args.context, device_id=args.device_id, type_config=args.type_config)
    nn.set_default_context(ctx)

    # Create CNN network for both training and testing.
    if args.net == 'lenet':
        mnist_cnn_prediction = mnist_lenet_prediction
    elif args.net == 'resnet':
        mnist_cnn_prediction = mnist_resnet_prediction
    else:
        raise ValueError("Unknown network type {}".format(args.net))

    # TRAIN
    # Create input variables.
    image = nn.Variable([args.batch_size, 1, 28, 28])
    label = nn.Variable([args.batch_size, 1])
    # Create prediction graph.
    pred = mnist_cnn_prediction(image, test=False, aug=args.augment_train)
    pred.persistent = True
    # Create loss function.
    loss = F.mean(F.softmax_cross_entropy(pred, label))

    # TEST
    # Create input variables.
    vimage = nn.Variable([args.batch_size, 1, 28, 28])
    vlabel = nn.Variable([args.batch_size, 1])
    # Create prediction graph.
    vpred = mnist_cnn_prediction(vimage, test=True, aug=args.augment_test)

    # Create Solver.
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Create monitor.
    from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed
    monitor = Monitor(args.monitor_path)
    monitor_loss = MonitorSeries("Training loss", monitor, interval=10)
    monitor_err = MonitorSeries("Training error", monitor, interval=10)
    monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100)
    monitor_verr = MonitorSeries("Test error", monitor, interval=10)

    # Initialize DataIterator for MNIST.
    from numpy.random import RandomState
    data = data_iterator_mnist(args.batch_size, True, rng=RandomState(1223))
    vdata = data_iterator_mnist(args.batch_size, False)
    # Training loop.
    for i in range(args.max_iter):
        if i % args.val_interval == 0:
            # Validation
            ve = 0.0
            for j in range(args.val_iter):
                vimage.d, vlabel.d = vdata.next()
                vpred.forward(clear_buffer=True)
                vpred.data.cast(np.float32, ctx)
                ve += categorical_error(vpred.d, vlabel.d)
            monitor_verr.add(i, ve / args.val_iter)
        if i % args.model_save_interval == 0:
            nn.save_parameters(os.path.join(
                args.model_save_path, 'params_%06d.h5' % i))
        # Training forward
        image.d, label.d = data.next()
        solver.zero_grad()
        loss.forward(clear_no_need_grad=True)
        loss.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()
        loss.data.cast(np.float32, ctx)
        pred.data.cast(np.float32, ctx)
        e = categorical_error(pred.d, label.d)
        monitor_loss.add(i, loss.d.copy())
        monitor_err.add(i, e)
        monitor_time.add(i)

    ve = 0.0
    for j in range(args.val_iter):
        vimage.d, vlabel.d = vdata.next()
        vpred.forward(clear_buffer=True)
        ve += categorical_error(vpred.d, vlabel.d)
    monitor_verr.add(i, ve / args.val_iter)

    parameter_file = os.path.join(
        args.model_save_path, '{}_params_{:06}.h5'.format(args.net, args.max_iter))
    nn.save_parameters(parameter_file)

    runtime_contents = {
        'networks': [
            {'name': 'Validation',
             'batch_size': args.batch_size,
             'outputs': {'y': vpred},
             'names': {'x': vimage}}],
        'executors': [
            {'name': 'Runtime',
             'network': 'Validation',
             'data': ['x'],
             'output': ['y']}]}
    save.save(os.path.join(args.model_save_path,
                           '{}_result.nnp'.format(args.net)), runtime_contents)
Example #8
0
def main():
    args = get_args()
    save_args(args, "match")

    match(args)
Example #9
0
def train():
    args = get_args()

    # Get context.
    from nnabla.ext_utils import get_extension_context
    logger.info("Running in %s" % args.context)
    ctx = get_extension_context(args.context,
                                device_id=args.device_id,
                                type_config=args.type_config)
    nn.set_default_context(ctx)

    # Create CNN network for both training and testing.
    mnist_cnn_prediction = mnist_lenet_prediction

    # TRAIN
    reference = "reference"
    # Create input variables.
    image = nn.Variable([args.batch_size, 1, 28, 28])
    label = nn.Variable([args.batch_size, 1])
    # Create `reference` prediction graph.
    pred = mnist_cnn_prediction(image, scope=reference, test=False)
    pred.persistent = True
    # Create loss function.
    loss = F.mean(F.softmax_cross_entropy(pred, label))

    # TEST
    # Create input variables.
    vimage = nn.Variable([args.batch_size, 1, 28, 28])
    vlabel = nn.Variable([args.batch_size, 1])
    # Create reference prediction graph.
    vpred = mnist_cnn_prediction(vimage, scope=reference, test=True)

    # Create Solver.
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Create monitor.
    from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed
    monitor = Monitor(args.monitor_path)
    monitor_loss = MonitorSeries("Training loss", monitor, interval=10)
    monitor_err = MonitorSeries("Training error", monitor, interval=10)
    monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100)
    monitor_verr = MonitorSeries("Test error", monitor, interval=10)

    # Initialize DataIterator for MNIST.
    data = data_iterator_mnist(args.batch_size, True)
    vdata = data_iterator_mnist(args.batch_size, False)
    best_ve = 1.0
    ve = 1.0
    # Training loop.
    for i in range(args.max_iter):
        if i % args.val_interval == 0:
            # Validation
            ve = 0.0
            for j in range(args.val_iter):
                vimage.d, vlabel.d = vdata.next()
                vpred.forward(clear_buffer=True)
                ve += categorical_error(vpred.d, vlabel.d)
            ve /= args.val_iter
            monitor_verr.add(i, ve)
        if ve < best_ve:
            nn.save_parameters(
                os.path.join(args.model_save_path, 'params_%06d.h5' % i))
            best_ve = ve
        # Training forward
        image.d, label.d = data.next()
        solver.zero_grad()
        loss.forward(clear_no_need_grad=True)
        loss.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()
        e = categorical_error(pred.d, label.d)
        monitor_loss.add(i, loss.d.copy())
        monitor_err.add(i, e)
        monitor_time.add(i)

    ve = 0.0
    for j in range(args.val_iter):
        vimage.d, vlabel.d = vdata.next()
        vpred.forward(clear_buffer=True)
        ve += categorical_error(vpred.d, vlabel.d)
    monitor_verr.add(i, ve / args.val_iter)

    parameter_file = os.path.join(args.model_save_path,
                                  'params_{:06}.h5'.format(args.max_iter))
    nn.save_parameters(parameter_file)
def infer():
    """
    Main script.

    Steps:

    * Parse command line arguments.
    * Specify a context for computation.
    * Initialize DataIterator for MNIST.
    * Construct a computation graph for inference.
    * Load parameter variables to infer.
    * Create monitor instances for saving and displaying infering stats.
    """
    args = get_args()

    from numpy.random import seed
    seed(0)

    # Get context.
    from nnabla.ext_utils import get_extension_context
    logger.info("Running in %s" % args.context)
    ctx = get_extension_context(args.context,
                                device_id=args.device_id,
                                type_config=args.type_config)
    nn.set_default_context(ctx)

    # Create CNN network for both training and testing.
    if args.net == 'lenet':
        mnist_cnn_prediction = mnist_lenet_prediction
    elif args.net == 'resnet':
        mnist_cnn_prediction = mnist_resnet_prediction
    else:
        raise ValueError("Unknown network type {}".format(args.net))

    # TEST
    # Create input variables.
    vimage = nn.Variable([args.batch_size, 1, 28, 28])
    vlabel = nn.Variable([args.batch_size, 1])
    # Create prediction graph.
    vpred = mnist_cnn_prediction(vimage, test=True, aug=args.augment_test)

    # Create Solver.
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Create monitor.
    from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed
    monitor = Monitor(args.monitor_path)
    monitor_loss = MonitorSeries("Training loss", monitor, interval=10)
    monitor_err = MonitorSeries("Training error", monitor, interval=10)
    monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100)
    monitor_verr = MonitorSeries("Test error", monitor, interval=10)

    # Initialize DataIterator for MNIST.
    from numpy.random import RandomState
    data = data_iterator_mnist(args.batch_size, True, rng=RandomState(1223))
    vdata = data_iterator_mnist(1, False)

    from nnabla.utils.nnp_graph import NnpLoader

    # Read a .nnp file.
    nnp = NnpLoader(args.pretrained)
    # Assume a graph `graph_a` is in the nnp file.
    net = nnp.get_network(nnp.get_network_names()[0], batch_size=1)
    # `x` is an input of the graph.
    x = net.inputs['x']
    # 'y' is an outputs of the graph.
    y = net.outputs['y']
    ve = 0.0

    for j in range(10000):
        x.d, vlabel.d = vdata.next()
        y.forward(clear_buffer=True)
        ve += categorical_error(y.d, vlabel.d)
    #monitor_verr.add(1, ve / args.val_iter)

    print("acc=", 1 - ve / 10000, ".")
    # append F.Softmax to the prediction graph so users see intuitive outputs
    runtime_contents = {
        'networks': [{
            'name': 'Validation',
            'batch_size': args.batch_size,
            'outputs': {
                'y': F.softmax(vpred)
            },
            'names': {
                'x': vimage
            }
        }],
        'executors': [{
            'name': 'Runtime',
            'network': 'Validation',
            'data': ['x'],
            'output': ['y']
        }]
    }
Example #11
0
        "test_beg": opt['test_beg'],
    }
    data_setting = {
        'path': opt['base_img_path'],
        'protected_attribute': opt['protected_attribute'],
        'attribute': opt['attribute'],
        'data_params': data_params,
        'batch_size': opt['batch_size']
    }
    opt['data_setting'] = data_setting
    return opt


if __name__ == "__main__":

    opt = args.get_args()
    opt = get_data_settings(opt)
    attr_list = utils.get_all_attr()
    ctx = get_extension_context(opt['context'],
                                device_id=opt['device_id'],
                                type_config=opt['type_config'])
    nn.set_default_context(ctx)
    batch_size = opt['data_setting']['batch_size']
    test = dl.actual_celeba_dataset(opt['data_setting'],
                                    batch_size,
                                    augment=False,
                                    split='test',
                                    shuffle=False)
    AC = clf.attribute_classifier(
        model_load_path="{}/{}/best/best_acc.h5".format(
            opt['model_save_path'], attr_list[opt['attribute']]))
Example #12
0
    data = data_iterator_mnist(batch_size, train=False, shuffle=True, rng=rng)
    for i in range(10000 / batch_size):
        image_data, label_data = data.next()
        image.d = image_data / 255.
        feature.forward(clear_buffer=True)
        features.append(feature.d.copy())
        labels.append(label_data.copy())
    features = np.vstack(features)
    labels = np.vstack(labels)

    # Visualize
    f = plt.figure(figsize=(16, 9))
    for i in range(10):
        c = plt.cm.Set1(i / 10.)
        plt.plot(features[labels.flat == i, 0].flatten(),
                 features[labels.flat == i, 1].flatten(),
                 '.',
                 c=c)
    plt.legend(map(str, range(10)))
    plt.grid()
    plt.savefig(os.path.join(args.monitor_path, "embed.png"))


if __name__ == '__main__':
    monitor_path = 'tmp.monitor.siamese'
    args = get_args(monitor_path=monitor_path,
                    model_save_path=monitor_path,
                    max_iter=5000)
    train(args)
    visualize(args)
Example #13
0
def main():

    args = get_args()
    state_size = args.state_size
    batch_size = args.batch_size
    num_steps = args.num_steps
    num_layers = args.num_layers
    max_epoch = args.max_epoch
    max_norm = args.gradient_clipping_max_norm
    num_words = 10000
    lr = args.learning_rate

    train_data, val_data, test_data = get_data()

    # Get context.
    from nnabla.ext_utils import get_extension_context
    logger.info("Running in %s" % args.context)
    ctx = get_extension_context(args.context,
                                device_id=args.device_id,
                                type_config=args.type_config)
    nn.set_default_context(ctx)

    from nnabla.monitor import Monitor, MonitorSeries
    monitor = Monitor(args.work_dir)
    monitor_perplexity = MonitorSeries("Training perplexity",
                                       monitor,
                                       interval=10)
    monitor_vperplexity = MonitorSeries("Validation perplexity",
                                        monitor,
                                        interval=(len(val_data) //
                                                  (num_steps * batch_size)))
    monitor_tperplexity = MonitorSeries("Test perplexity",
                                        monitor,
                                        interval=(len(test_data) //
                                                  (num_steps * 1)))

    l1 = LSTMWrapper(batch_size, state_size)
    l2 = LSTMWrapper(batch_size, state_size)

    # train graph

    x = nn.Variable((batch_size, num_steps))
    t = nn.Variable((batch_size, num_steps))
    w = I.UniformInitializer((-0.1, 0.1))
    b = I.ConstantInitializer(1)
    loss = get_loss(l1, l2, x, t, w, b, num_words, batch_size, state_size,
                    True)
    l1.share_data()
    l2.share_data()

    # validation graph

    vx = nn.Variable((batch_size, num_steps))
    vt = nn.Variable((batch_size, num_steps))
    vloss = get_loss(l1, l2, vx, vt, w, b, num_words, batch_size, state_size)
    solver = S.Sgd(lr)
    solver.set_parameters(nn.get_parameters())

    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)
    best_val = 10000
    for epoch in range(max_epoch):
        l1.reset_state()
        l2.reset_state()
        for i in range(len(train_data) // (num_steps * batch_size)):
            x.d, t.d = get_batch(train_data, i * num_steps, batch_size,
                                 num_steps)
            solver.zero_grad()
            loss.forward()
            loss.backward(clear_buffer=True)
            solver.weight_decay(1e-5)
            gradient_clipping(nn.get_parameters().values(), max_norm)
            solver.update()
            perp = perplexity(loss.d.copy())
            monitor_perplexity.add(
                (len(train_data) // (num_steps * batch_size)) * (epoch) + i,
                perp)
        l1.reset_state()
        l2.reset_state()
        vloss_avg = 0
        for i in range(len(val_data) // (num_steps * batch_size)):
            vx.d, vt.d = get_batch(val_data, i * num_steps, batch_size,
                                   num_steps)
            vloss.forward()
            vloss_avg += vloss.d.copy()
        vloss_avg /= float((len(val_data) // (num_steps * batch_size)))
        vper = perplexity(vloss_avg)

        if vper < best_val:
            best_val = vper
            if vper < 200:
                save_name = "params_epoch_{:02d}.h5".format(epoch)
                nn.save_parameters(os.path.join(args.save_dir, save_name))
        else:
            solver.set_learning_rate(solver.learning_rate() * 0.25)
            logger.info("Decreased learning rate to {:05f}".format(
                solver.learning_rate()))
        monitor_vperplexity.add(
            (len(val_data) // (num_steps * batch_size)) * (epoch) + i, vper)

    # for final test split
    t_batch_size = 1
    tl1 = LSTMWrapper(t_batch_size, state_size)
    tl2 = LSTMWrapper(t_batch_size, state_size)
    tloss_avg = 0
    tx = nn.Variable((t_batch_size, num_steps))
    tt = nn.Variable((t_batch_size, num_steps))
    tloss = get_loss(tl1, tl2, tx, tt, w, b, num_words, 1, state_size)

    tl1.share_data()
    tl2.share_data()

    for i in range(len(test_data) // (num_steps * t_batch_size)):
        tx.d, tt.d = get_batch(test_data, i * num_steps, 1, num_steps)
        tloss.forward()
        tloss_avg += tloss.d.copy()
    tloss_avg /= float((len(test_data) // (num_steps * t_batch_size)))
    tper = perplexity(tloss_avg)
    monitor_tperplexity.add(
        (len(test_data) // (num_steps * t_batch_size)) * (epoch) + i, tper)
Example #14
0
        self.i = i

    def __getitem__(self, item):
        if self.i == 0:
            return self.texti.trainX[item], self.texti.trainY[item]
        elif self.i == 1:
            return self.texti.validX[item], self.texti.validY[item]
        elif self.i == 2:
            return self.texti.testX[item], self.texti.testY[item]

    def __len__(self):
        return self.texti.trainX.shape[0]


if __name__ == "__main__":
    config = args.get_args()
    texti = TextIterator(config)
    trainDataLoader = DataLoader(dataset=MyDataSet(config, texti, 0),
                                 batch_size=config.batchSize,
                                 shuffle=True,
                                 num_workers=0,
                                 drop_last=True)
    testDataLoader = DataLoader(dataset=MyDataSet(config, texti, 2),
                                batch_size=config.batchSize,
                                shuffle=False,
                                num_workers=0,
                                drop_last=True)
    for epoch in range(2):
        for i, data in enumerate(trainDataLoader):
            inputs, labels = data
            print("epoch: ", epoch, " ", inputs, " ", inputs.shape, " ",
Example #15
0
def main():
    args = get_args()
    config = utils.read_config(args.config)
    app = Application(config)
    app.process()
Example #16
0
def train():
    """
    Main script.

    Steps:

    * Parse command line arguments.
    * Specify a context for computation.
    * Initialize DataIterator for MNIST.
    * Construct a computation graph for training and validation.
    * Initialize a solver and set parameter variables to it.
    * Create monitor instances for saving and displaying training stats.
    * Training loop
      * Computate error rate for validation data (periodically)
      * Get a next minibatch.
      * Execute forwardprop on the training graph.
      * Compute training error
      * Set parameter gradients zero
      * Execute backprop.
      * Solver updates parameters by using gradients computed by backprop.
    """
    args = get_args()

    # Get context.
    from nnabla.contrib.context import extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    # Create CNN network for both training and testing.
    mnist_cnn_prediction = mnist_lenet_prediction
    if args.net == 'resnet':
        mnist_cnn_prediction = mnist_resnet_prediction

    # TRAIN
    # Create input variables.
    image = nn.Variable([args.batch_size, 1, 28, 28])
    label = nn.Variable([args.batch_size, 1])
    # Create prediction graph.
    pred = mnist_cnn_prediction(image, test=False)
    pred.persistent = True
    # Create loss function.
    loss = F.mean(F.softmax_cross_entropy(pred, label))

    # TEST
    # Create input variables.
    vimage = nn.Variable([args.batch_size, 1, 28, 28])
    vlabel = nn.Variable([args.batch_size, 1])
    # Create predition graph.
    vpred = mnist_cnn_prediction(vimage, test=True)

    # Create Solver.
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Create monitor.
    from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed
    monitor = Monitor(args.monitor_path)
    monitor_loss = MonitorSeries("Training loss", monitor, interval=10)
    monitor_err = MonitorSeries("Training error", monitor, interval=10)
    monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100)
    monitor_verr = MonitorSeries("Test error", monitor, interval=10)

    # Initialize DataIterator for MNIST.
    data = data_iterator_mnist(args.batch_size, True)
    vdata = data_iterator_mnist(args.batch_size, False)
    # Training loop.
    for i in range(args.max_iter):
        if i % args.val_interval == 0:
            # Validation
            ve = 0.0
            for j in range(args.val_iter):
                vimage.d, vlabel.d = vdata.next()
                vpred.forward(clear_buffer=True)
                ve += categorical_error(vpred.d, vlabel.d)
            monitor_verr.add(i, ve / args.val_iter)
        if i % args.model_save_interval == 0:
            nn.save_parameters(os.path.join(
                args.model_save_path, 'params_%06d.h5' % i))
        # Training forward
        image.d, label.d = data.next()
        solver.zero_grad()
        loss.forward(clear_no_need_grad=True)
        loss.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()
        e = categorical_error(pred.d, label.d)
        monitor_loss.add(i, loss.d.copy())
        monitor_err.add(i, e)
        monitor_time.add(i)

    ve = 0.0
    for j in range(args.val_iter):
        vimage.d, vlabel.d = vdata.next()
        vpred.forward(clear_buffer=True)
        ve += categorical_error(vpred.d, vlabel.d)
    monitor_verr.add(i, ve / args.val_iter)

    parameter_file = os.path.join(
        args.model_save_path, '{}_params_{:06}.h5'.format(args.net, args.max_iter))
    nn.save_parameters(parameter_file)
def train():
    """
    Naive Multi-Device Training

    NOTE: the communicator exposes low-level interfaces

    * Parse command line arguments.
    * Specify contexts for computation.
    * Initialize DataIterator.
    * Construct computation graphs for training and one for validation.
    * Initialize solvers and set parameter variables to those.
    * Instantiate a communicator and set parameter variables.
    * Create monitor instances for saving and displaying training stats.
    * Training loop
      * Computate error rate for validation data (periodically)
      * Get a next minibatch.
      * Execute forwardprops
      * Set parameter gradients zero
      * Execute backprop.
      * Inplace allreduce (THIS IS THE MAIN difference from a single device training)
      * Solver updates parameters by using gradients computed by backprop.
      * Compute training error
    """
    # Parse args
    args = get_args()
    n_train_samples = 50000
    bs_valid = args.batch_size

    # Create contexts
    extension_module = args.context
    if extension_module != "cuda" and \
            extension_module != "cuda.cudnn":
        raise Exception("Use `cuda` or `cuda.cudnn` extension_module.")
    n_devices = args.n_devices
    ctxs = []
    for i in range(n_devices):
        ctx = extension_context(extension_module, device_id=i)
        ctxs.append(ctx)
    ctx = ctxs[-1]

    # Create training graphs
    input_image_train = []
    preds_train = []
    losses_train = []
    test = False
    for i in range(n_devices):
        image = nn.Variable((args.batch_size, 3, 32, 32))
        label = nn.Variable((args.batch_size, 1))
        device_scope_name = "device{}".format(i)

        pred = cifar100_resnet23_prediction(
            image, ctxs[i], device_scope_name, test)
        loss = cifar100_resnet32_loss(pred, label)

        input_image_train.append({"image": image, "label": label})
        preds_train.append(pred)
        losses_train.append(loss)

    # Create validation graph
    test = True
    device_scope_name = "device{}".format(0)
    image_valid = nn.Variable((bs_valid, 3, 32, 32))
    pred_valid = cifar100_resnet23_prediction(
        image_valid, ctxs[i], device_scope_name, test)
    input_image_valid = {"image": image_valid}

    # Solvers
    solvers = []
    for i in range(n_devices):
        with nn.context_scope(ctxs[i]):
            solver = S.Adam()
            device_scope_name = "device{}".format(i)
            with nn.parameter_scope(device_scope_name):
                params = nn.get_parameters()
                solver.set_parameters(params)
            solvers.append(solver)

    # Communicator
    comm = C.DataParalellCommunicator(ctx)
    for i in range(n_devices):
        device_scope_name = "device{}".format(i)
        with nn.parameter_scope(device_scope_name):
            ctx = ctxs[i]
            params = nn.get_parameters()
            comm.add_context_and_parameters((ctx, params))
    comm.init()

    # Create threadpools with one thread
    pools = []
    for _ in range(n_devices):
        pool = ThreadPool(processes=1)
        pools.append(pool)

    # Once forward/backward to safely secure memory
    for device_id in range(n_devices):
        data, label = \
            (np.random.randn(*input_image_train[device_id]["image"].shape),
             (np.random.rand(*input_image_train[device_id]["label"].shape) * 10).astype(np.int32))

        ret = pools[device_id].apply_async(forward_backward,
                                           (input_image_train[device_id]["image"], data,
                                            input_image_train[device_id]["label"], label,
                                               losses_train[device_id], solvers[device_id]))
        ret.get()
        losses_train[device_id].d  # sync to host

    # Create monitor.
    from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed
    monitor = Monitor(args.monitor_path)
    monitor_loss = MonitorSeries("Training loss", monitor, interval=10)
    monitor_err = MonitorSeries("Training error", monitor, interval=10)
    monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100)
    monitor_verr = MonitorSeries("Test error", monitor, interval=10)
    with data_iterator_cifar100(args.batch_size, True) as tdata, \
            data_iterator_cifar100(bs_valid, False) as vdata:
        # Training-loop
        for i in range(int(args.max_iter / n_devices)):
            # Validation
            if i % int(n_train_samples / args.batch_size / n_devices) == 0:
                ve = 0.
                for j in range(args.val_iter):
                    image, label = vdata.next()
                    input_image_valid["image"].d = image
                    pred_valid.forward()
                    ve += categorical_error(pred_valid.d, label)
                ve /= args.val_iter
                monitor_verr.add(i * n_devices, ve)
            if i % int(args.model_save_interval / n_devices) == 0:
                nn.save_parameters(os.path.join(
                    args.model_save_path, 'params_%06d.h5' % i))

            # Forwards/Zerograd/Backwards
            fb_results = []
            for device_id in range(n_devices):
                image, label = tdata.next()

                res = pools[device_id].apply_async(forward_backward,
                                                   (input_image_train[device_id]["image"], image,
                                                    input_image_train[device_id]["label"], label,
                                                    losses_train[device_id], solvers[device_id]))
                fb_results.append(res)
            for device_id in range(n_devices):
                fb_results[device_id].get()

            # In-place Allreduce
            comm.allreduce()

            # Solvers update
            for device_id in range(n_devices):
                solvers[device_id].update()

            e = categorical_error(
                preds_train[-1].d, input_image_train[-1]["label"].d)
            monitor_loss.add(i * n_devices, losses_train[-1].d.copy())
            monitor_err.add(i * n_devices, e)
            monitor_time.add(i * n_devices)

    nn.save_parameters(os.path.join(
        args.model_save_path,
        'params_%06d.h5' % (args.max_iter / n_devices)))
Example #18
0
def train():
    args = get_args()

    # Set context.
    from nnabla.ext_utils import get_extension_context
    logger.info("Running in {}:{}".format(args.context, args.type_config))
    ctx = get_extension_context(args.context,
                                device_id=args.device_id,
                                type_config=args.type_config)
    nn.set_default_context(ctx)

    data_iterator = data_iterator_librispeech(args.batch_size, args.data_dir)
    _data_source = data_iterator._data_source  # dirty hack...

    # model
    x = nn.Variable(
        shape=(args.batch_size, data_config.duration, 1))  # (B, T, 1)
    onehot = F.one_hot(x, shape=(data_config.q_bit_len, ))  # (B, T, C)
    wavenet_input = F.transpose(onehot, (0, 2, 1))  # (B, C, T)

    # speaker embedding
    if args.use_speaker_id:
        s_id = nn.Variable(shape=(args.batch_size, 1))
        with nn.parameter_scope("speaker_embedding"):
            s_emb = PF.embed(s_id, n_inputs=_data_source.n_speaker,
                             n_features=WavenetConfig.speaker_dims)
            s_emb = F.transpose(s_emb, (0, 2, 1))
    else:
        s_emb = None

    net = WaveNet()
    wavenet_output = net(wavenet_input, s_emb)

    pred = F.transpose(wavenet_output, (0, 2, 1))

    # (B, T, 1)
    t = nn.Variable(shape=(args.batch_size, data_config.duration, 1))

    loss = F.mean(F.softmax_cross_entropy(pred, t))

    # for generation
    prob = F.softmax(pred)

    # Create Solver.
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Create monitor.
    monitor = Monitor(args.monitor_path)
    monitor_loss = MonitorSeries("Training loss", monitor, interval=10)

    # setup save env.
    audio_save_path = os.path.join(os.path.abspath(
        args.model_save_path), "audio_results")
    if audio_save_path and not os.path.exists(audio_save_path):
        os.makedirs(audio_save_path)

    # Training loop.
    for i in range(args.max_iter):
        # todo: validation

        x.d, _speaker, t.d = data_iterator.next()
        if args.use_speaker_id:
            s_id.d = _speaker.reshape(-1, 1)

        solver.zero_grad()
        loss.forward(clear_no_need_grad=True)
        loss.backward(clear_buffer=True)
        solver.update()

        loss.data.cast(np.float32, ctx)
        monitor_loss.add(i, loss.d.copy())

        if i % args.model_save_interval == 0:
            prob.forward()
            audios = mu_law_decode(
                np.argmax(prob.d, axis=-1), quantize=data_config.q_bit_len)  # (B, T)
            save_audio(audios, i, audio_save_path)
Example #19
0
    model_times = "model_1/"  # 第几次保存的模型,主要是用来获取最佳结果

    bert_vocab_file = "../bert-base-uncased/vocab.txt"
    bert_model_dir = "../bert-base-uncased"

    do_train = True
    do_test = True

    # map(lambda: x, y: os.path.join(x, y),

    from Processors.Yelp2Processor import Yelp2Processor

    if model_name == "BertOrigin":
        main(
            args.get_args(model_name, data_dir, output_dir, cache_dir, log_dir,
                          bert_vocab_file, bert_model_dir), model_times,
            Yelp2Processor)
    elif model_name == "BertCNN":
        from BertCNN import args_model

        main(
            args.get_args(model_name, data_dir, output_dir, cache_dir, log_dir,
                          bert_vocab_file, bert_model_dir), model_times,
            Yelp2Processor, args_model.get_args())
    elif model_name == "BertATT":

        main(
            args.get_args(model_name, data_dir, output_dir, cache_dir, log_dir,
                          bert_vocab_file, bert_model_dir), model_times,
            Yelp2Processor)
    elif model_name == "BertRCNN":
Example #20
0
        if len(all_scores) > 0 and topk == 1:
            index = torch.argmax(all_scores)
            output_data[q] = paths[index]
        elif len(all_scores) > 0 and topk > 1:
            sorted_scores, index = torch.sort(all_scores, descending=True)
            output_data[q] = [paths[i] for i in index[:topk]]
        else:
            print(q, 'no path')

    with open(fn_out, 'w') as f:
        json.dump(output_data, f, ensure_ascii=False)


if __name__ == "__main__":

    args = get_args(mode='predict')

    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu

    # tokenize
    tokenizer = BertTokenizer.from_pretrained(args.bert_vocab,
                                              do_lower_case=args.do_lower_case)
    bert_field = BertField('BERT', tokenizer=tokenizer)
    print("loaded tokenizer")

    # gpu
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
    print('使用%s号GPU' % args.gpu)

    # model
    # model = Bert_Comparing(args)
Example #21
0
        monitor_fake.add(i, fake)
        monitor_loss_gen.add(i, loss_gen.d.copy())

        # Discriminator update.
        solver_dis.zero_grad()
        loss_dis.forward(clear_no_need_grad=True)
        loss_dis.backward(clear_buffer=True)
        solver_dis.weight_decay(args.weight_decay)
        solver_dis.update()
        monitor_loss_dis.add(i, loss_dis.d.copy())
        monitor_time.add(i)

    with nn.parameter_scope("gen"):
        nn.save_parameters(
            os.path.join(args.model_save_path, "generator_param_%06d.h5" % i))
    with nn.parameter_scope("dis"):
        nn.save_parameters(
            os.path.join(args.model_save_path,
                         "discriminator_param_%06d.h5" % i))


if __name__ == '__main__':
    monitor_path = 'tmp.monitor.dcgan'
    args = get_args(monitor_path=monitor_path,
                    model_save_path=monitor_path,
                    max_iter=20000,
                    learning_rate=0.0002,
                    batch_size=64,
                    weight_decay=0.0001)
    train(args)
Example #22
0
def main():
    global WANDB_STEP
    args = get_args()
    print(args)

    set_seed(args.seed)

    device = th.device("cpu" if args.devid < 0 else f"cuda:{args.devid}")
    args.device = device
    aux_device = th.device(
        "cpu" if args.aux_devid < 0 else f"cuda:{args.aux_devid}")
    args.aux_device = aux_device

    TEXT = torchtext.data.Field(batch_first=True)

    if args.dataset == "ptb":
        Dataset = PennTreebank
    elif args.dataset == "wikitext2":
        Dataset = WikiText2

    train, valid, test = Dataset.splits(
        TEXT,
        newline_eos=True,
    )

    TEXT.build_vocab(train)
    V = TEXT.vocab

    def batch_size_tokens(new, count, sofar):
        return max(len(new.text), sofar)

    def batch_size_sents(new, count, sofar):
        return count

    if args.iterator == "bucket":
        train_iter, valid_iter, test_iter = BucketIterator.splits(
            (train, valid, test),
            batch_sizes=[args.bsz, args.eval_bsz, args.eval_bsz],
            device=device,
            sort_key=lambda x: len(x.text),
            batch_size_fn=batch_size_tokens
            if args.bsz_fn == "tokens" else batch_size_sents,
        )
    elif args.iterator == "bptt":
        train_iter, valid_iter, test_iter = BPTTIterator.splits(
            (train, valid, test),
            batch_sizes=[args.bsz, args.eval_bsz, args.eval_bsz],
            device=device,
            bptt_len=args.bptt,
            sort=False,
        )
    else:
        raise ValueError(f"Invalid iterator {args.iterator}")

    if args.no_shuffle_train:
        train_iter.shuffle = False

    name = get_name(args)
    import tempfile
    #wandb.init(project="hmm-lm", name=name, config=args, dir=tempfile.mkdtemp())
    args.name = name

    model = None
    from models.factoredhmmlm import FactoredHmmLm
    model = FactoredHmmLm(V, args)
    model.to(device)
    print(model)
    num_params, num_trainable_params = count_params(model)
    print(f"Num params, trainable: {num_params:,}, {num_trainable_params:,}")
    #wandb.run.summary["num_params"] = num_params

    if args.eval_only:
        model.load_state_dict(th.load(args.eval_only)["model"])
        v_start_time = time.time()
        if args.model == "mshmm" or args.model == "factoredhmm":
            if args.num_classes > 2**15:
                eval_fn = mixed_cached_eval_loop
            else:
                eval_fn = cached_eval_loop
        elif args.model == "hmm":
            eval_fn = cached_eval_loop
        else:
            eval_fn = eval_loop
        valid_losses, valid_n = eval_fn(
            args,
            V,
            valid_iter,
            model,
        )
        report(valid_losses, valid_n, f"Valid perf", v_start_time)

        t_start_time = time.time()
        test_losses, test_n = eval_fn(
            args,
            V,
            test_iter,
            model,
        )
        report(test_losses, test_n, f"Test perf", t_start_time)

        sys.exit()

    parameters = list(model.parameters())
    if args.optimizer == "adamw":
        optimizer = AdamW(
            parameters,
            lr=args.lr,
            betas=(args.beta1, args.beta2),
            weight_decay=args.wd,
        )
    elif args.optimizer == "sgd":
        optimizer = SGD(
            parameters,
            lr=args.lr,
        )
    if args.schedule == "reducelronplateau":
        scheduler = ReduceLROnPlateau(
            optimizer,
            factor=1. / args.decay,
            patience=args.patience,
            verbose=True,
            mode="max",
        )
    elif args.schedule == "noam":
        warmup_steps = args.warmup_steps

        def get_lr(step):
            scale = warmup_steps**0.5 * min(step**(-0.5),
                                            step * warmup_steps**(-1.5))
            return args.lr * scale

        scheduler = LambdaLR(
            optimizer,
            get_lr,
            last_epoch=-1,
            verbse=True,
        )
    else:
        raise ValueError("Invalid schedule options")

    # training loop, factor out later if necessary
    for e in range(args.num_epochs):
        start_time = time.time()
        if args.log_counts > 0 and args.keep_counts > 0:
            # reset at START of epoch
            model.state_counts.fill_(0)
        train_losses, train_n = train_loop(
            args,
            V,
            train_iter,
            model,
            parameters,
            optimizer,
            scheduler,
            valid_iter=valid_iter if not args.overfit else None,
            verbose=True,
        )
        total_time = report(train_losses, train_n, f"Train epoch {e}",
                            start_time)

        v_start_time = time.time()
        #eval_fn = cached_eval_loop if args.model == "mshmm" else eval_loop
        if args.model == "mshmm" or args.model == "factoredhmm":
            if args.num_classes > 2**15:
                eval_fn = mixed_cached_eval_loop
            else:
                eval_fn = cached_eval_loop
        elif args.model == "hmm":
            eval_fn = cached_eval_loop
        else:
            eval_fn = eval_loop
        valid_losses, valid_n = eval_fn(args, V, valid_iter, model)
        report(valid_losses, valid_n, f"Valid epoch {e}", v_start_time)

        if args.schedule in valid_schedules:
            scheduler.step(valid_losses.evidence
                           if not args.overfit else train_losses.evidence)

        update_best_valid(valid_losses, valid_n, model, optimizer, scheduler,
                          args.name)

        #wandb.log({
        #"train_loss": train_losses.evidence / train_n,
        #"train_ppl": math.exp(-train_losses.evidence / train_n),
        #"epoch_time": total_time,
        #"valid_loss": valid_losses.evidence / valid_n,
        #"valid_ppl": math.exp(-valid_losses.evidence / valid_n),
        #"best_valid_loss": BEST_VALID / valid_n,
        #"best_valid_ppl": math.exp(-BEST_VALID / valid_n),
        #"epoch": e,
        #}, step=WANDB_STEP)

        if args.log_counts > 0 and args.keep_counts > 0:
            # TODO: FACTOR OUT
            # only look at word tokens
            counts = (model.counts / model.counts.sum(0, keepdim=True))[:, 4:]
            c, v = counts.shape
            #cg4 = counts > 1e-4
            #cg3 = counts > 1e-3
            cg2 = counts > 1e-2

            # state counts
            # log these once per epoch, then set back to zero
            sc0 = (model.state_counts == 0).sum()
            sc1 = (model.state_counts == 1).sum()
            sc2 = (model.state_counts == 2).sum()
            sc3 = (model.state_counts == 3).sum()
            sc4 = (model.state_counts == 4).sum()
            sc5 = (model.state_counts >= 5).sum()

            #wandb.log({
            #"avgcounts@1e-4": cg4.sum().item() / float(v),
            #"avgcounts@1e-3": cg3.sum().item() / float(v),
            #"avgcounts@1e-2": cg2.sum().item() / float(v),
            #"maxcounts@1e-4": cg4.sum(0).max().item() / float(v),
            #"maxcounts@1e-3": cg3.sum(0).max().item() / float(v),
            #"maxcounts@1e-2": cg2.sum(0).max().item(),
            #"mincounts@1e-4": cg4.sum(0).min().item() / float(v),
            #"mincounts@1e-3": cg3.sum(0).min().item() / float(v),
            #"mincounts@1e-2": cg2.sum(0).min().item(),
            #"maxcounts": counts.sum(0).max().item(),
            #"mincounts": counts.sum(0).min().item(),

            #"statecounts=0": sc0,
            #"statecounts=1": sc1,
            #"statecounts=2": sc2,
            #"statecounts=3": sc3,
            #"statecounts=4": sc4,
            #"statecounts>=5": sc5,
            #}, step=WANDB_STEP)
            del cg2
            del counts

    # won't use best model. Rerun with eval_only
    t_start_time = time.time()
    test_losses, test_n = eval_fn(
        args,
        V,
        test_iter,
        model,
    )
    report(test_losses, test_n, f"Test perf", t_start_time)
Example #23
0
import os
import sys

src_dir = os.path.join(os.getcwd(), 'src')
sys.path.append(src_dir)

from utils.doc_utils import *
from utils.searcher import *
from args import get_args
from shutil import copyfileobj

if __name__ == '__main__':
    args, _ = get_args()
    collection = args.collection
    anserini_path = args.anserini_path
    data_path = args.data_path
    index_path = args.index_path
    dataset_path = os.path.join(args.data_path, 'datasets')
    if not os.path.exists(dataset_path):
        os.mkdir(dataset_path)
    output_fn = os.path.join(dataset_path, collection + '_sents.csv')

    fqrel = os.path.join(data_path, 'qrels', 'qrels.' + collection + '.txt')
    ftopic = os.path.join(data_path, 'topics', 'topics.' + collection + '.txt')

    if os.path.exists(fqrel):
        qid2docid = get_relevant_docids(fqrel)
    else:
        # No qrels, label all as 0.
        qid2docid = {}
    qid2text = get_query(ftopic, collection=collection)
Example #24
0
def main():
    args = get_args()
    if args.command == "create":
        create_pcd_dataset_from_mesh(args.mesh_data_path)
def train():
    """
    Main script.

    Naive Multi-Device Training

    NOTE: the communicator exposes low-level interfaces

    * Parse command line arguments.
    * Instantiate a communicator and set parameter variables.
    * Specify contexts for computation.
    * Initialize DataIterator.
    * Construct a computation graph for training and one for validation.
    * Initialize solver and set parameter variables to that.
    * Create monitor instances for saving and displaying training stats.
    * Training loop
      * Computate error rate for validation data (periodically)
      * Get a next minibatch.
      * Execute forwardprop
      * Set parameter gradients zero
      * Execute backprop.
      * Inplace allreduce (THIS IS THE MAIN difference from a single device training)
      * Solver updates parameters by using gradients computed by backprop.
      * Compute training error

    """

    args = get_args()
    if args.tiny_mode:
        n_train_samples = 100000
    else:
        n_train_samples = 1282167

    # Communicator and Context
    from nnabla.ext_utils import get_extension_context
    extension_module = "cudnn"
    ctx = get_extension_context(extension_module, type_config=args.type_config)
    comm = C.MultiProcessDataParalellCommunicator(ctx)
    comm.init()
    n_devices = comm.size
    mpi_rank = comm.rank
    device_id = mpi_rank
    ctx.device_id = str(device_id)
    nn.set_default_context(ctx)

    # workarond to start with the same parameters.
    rng = np.random.RandomState(device_id)
    if args.tiny_mode:
        # We use Tiny ImageNet from Stanford CS231N class.
        # (Tiny ImageNet, https://tiny-imagenet.herokuapp.com/)
        # Tiny ImageNet consists of 200 categories, each category has 500 images
        # in training set. The image size is 64x64. To adapt ResNet into 64x64
        # image inputs, the input image size of ResNet is set as 56x56, and
        # the stride in the first conv and the first max pooling are removed.
        # Please check README.
        data = data_iterator_tiny_imagenet(args.batch_size, 'train')
        vdata = data_iterator_tiny_imagenet(args.batch_size, 'val')
        num_classes = 200
    else:
        # We use ImageNet.
        # (ImageNet, https://imagenet.herokuapp.com/)
        # ImageNet consists of 1000 categories, each category has 1280 images
        # in training set. The image size is various. To adapt ResNet into
        # 320x320 image inputs, the input image size of ResNet is set as
        # 224x224. We need to get tar file and create cache file(320x320 images).
        # Please check README.
        data = data_iterator_imagenet(args.batch_size,
                                      args.train_cachefile_dir,
                                      rng=rng)
        vdata = data_iterator_imagenet(args.batch_size, args.val_cachefile_dir)
        vdata = vdata.slice(rng=None,
                            num_of_slices=n_devices,
                            slice_pos=device_id)
        num_classes = 1000
    # Workaround to start with the same initialized weights for all workers.
    np.random.seed(313)
    t_model = get_model(args, num_classes, test=False, tiny=args.tiny_mode)
    t_model.pred.persistent = True  # Not clearing buffer of pred in backward
    t_pred2 = t_model.pred.unlinked()
    t_e = F.mean(F.top_n_error(t_pred2, t_model.label))
    v_model = get_model(args, num_classes, test=True, tiny=args.tiny_mode)
    v_model.pred.persistent = True  # Not clearing buffer of pred in forward
    v_pred2 = v_model.pred.unlinked()
    v_e = F.mean(F.top_n_error(v_pred2, v_model.label))

    # Add parameters to communicator.
    comm.add_context_and_parameters((ctx, nn.get_parameters()))

    # Create Solver.
    solver = S.Momentum(args.learning_rate, 0.9)
    solver.set_parameters(nn.get_parameters())

    # Setting warmup.
    base_lr = args.learning_rate / n_devices
    warmup_iter = int(1. * n_train_samples / args.batch_size /
                      args.accum_grad / n_devices) * args.warmup_epoch
    warmup_slope = base_lr * (n_devices - 1) / warmup_iter
    solver.set_learning_rate(base_lr)

    # Create monitor.
    import nnabla.monitor as M
    monitor = M.Monitor(args.monitor_path)
    monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10)
    monitor_err = M.MonitorSeries("Training error", monitor, interval=10)
    monitor_vloss = M.MonitorSeries("Validation loss", monitor, interval=1)
    monitor_verr = M.MonitorSeries("Validation error", monitor, interval=1)
    monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=10)
    monitor_vtime = M.MonitorTimeElapsed("Validation time",
                                         monitor,
                                         interval=1)

    # Training loop.
    vl = nn.Variable()
    ve = nn.Variable()
    for i in range(int(args.max_iter / n_devices)):
        # Save parameters
        if i % (args.model_save_interval // n_devices) == 0 and device_id == 0:
            nn.save_parameters(
                os.path.join(args.model_save_path, 'param_%06d.h5' % i))

        # Validation
        if i % (args.val_interval // n_devices) == 0 and i != 0:
            ve_local = 0.
            vl_local = 0.
            val_iter_local = args.val_iter // n_devices
            for j in range(val_iter_local):
                images, labels = vdata.next()
                v_model.image.d = images
                v_model.label.d = labels
                v_model.image.data.cast(np.uint8, ctx)
                v_model.label.data.cast(np.int32, ctx)
                v_model.loss.forward(clear_buffer=True)
                v_e.forward(clear_buffer=True)
                vl_local += v_model.loss.d.copy()
                ve_local += v_e.d.copy()
            vl_local /= val_iter_local
            vl.d = vl_local
            comm.all_reduce(vl.data, division=True, inplace=True)
            ve_local /= val_iter_local
            ve.d = ve_local
            comm.all_reduce(ve.data, division=True, inplace=True)

            if device_id == 0:
                monitor_vloss.add(i * n_devices, vl.d.copy())
                monitor_verr.add(i * n_devices, ve.d.copy())
                monitor_vtime.add(i * n_devices)

        # Training
        l = 0.0
        e = 0.0
        solver.zero_grad()

        def accumulate_error(l, e, t_model, t_e):
            l += t_model.loss.d
            e += t_e.d
            return l, e

        # Gradient accumulation loop
        for j in range(args.accum_grad):
            images, labels = data.next()
            if j != 0:
                # Update e and l according to previous results of forward
                # propagation.
                # The update of last iteration is performed
                # after solver update to avoid unnecessary CUDA synchronization.
                # This is performed after data.next() in order to overlap
                # the data loading and graph execution.
                # TODO: Move this to the bottom of the loop when prefetch
                # data loader is available.
                l, e = accumulate_error(l, e, t_model, t_e)
            t_model.image.d = images
            t_model.label.d = labels
            t_model.image.data.cast(np.uint8, ctx)
            t_model.label.data.cast(np.int32, ctx)
            t_model.loss.forward(clear_no_need_grad=True)
            t_model.loss.backward(clear_buffer=True)  # Accumulating gradients
            t_e.forward(clear_buffer=True)

        # AllReduce
        params = [x.grad for x in nn.get_parameters().values()]
        comm.all_reduce(params, division=False, inplace=False)

        # Update
        solver.weight_decay(args.weight_decay)
        solver.update()

        # Accumulate errors after solver update
        l, e = accumulate_error(l, e, t_model, t_e)

        # Linear Warmup
        if i <= warmup_iter:
            lr = base_lr + warmup_slope * i
            solver.set_learning_rate(lr)

        # Synchronize by averaging the weights over devices using allreduce
        if (i + 1) % args.sync_weight_every_itr == 0:
            weights = [x.data for x in nn.get_parameters().values()]
            comm.all_reduce(weights, division=True, inplace=True)

        if device_id == 0:
            monitor_loss.add(i * n_devices, l / args.accum_grad)
            monitor_err.add(i * n_devices, e / args.accum_grad)
            monitor_time.add(i * n_devices)

        # Learning rate decay at scheduled iter
        if i * n_devices in args.learning_rate_decay_at:
            solver.set_learning_rate(solver.learning_rate() * 0.1)

    if device_id == 0:
        nn.save_parameters(
            os.path.join(args.model_save_path,
                         'param_%06d.h5' % (args.max_iter / n_devices)))
Example #26
0
def main():
    args, other = get_args()

    experiment = args.experiment
    anserini_path = args.anserini_path
    datasets_path = os.path.join(args.data_path, 'datasets')

    if not os.path.isdir('log'):
        os.mkdir('log')

    if args.mode == 'training':
        train(args)
    elif args.mode == 'inference':
        scores = test(args)
        print_scores(scores)
    else:
        folds_path = os.path.join(anserini_path, 'src', 'main', 'resources',
                                  'fine_tuning', args.folds_file)
        qrels_path = os.path.join(anserini_path, 'src', 'main', 'resources',
                                  'topics-and-qrels', args.qrels_file)

        topK = int(other[0])
        alpha = float(other[1])
        beta = float(other[2])
        gamma = float(other[3])
        test_folder_set = int(other[4])
        mode = other[5]

        # Divide topics according to fold parameters
        train_topics, test_topics, all_topics = [], [], []
        with open(folds_path) as f:
            folds = json.load(f)
        for i in range(0, len(folds)):
            all_topics.extend(folds[i])
            if i != test_folder_set:
                train_topics.extend(folds[i])
            else:
                test_topics.extend(folds[i])

        if args.interactive:
            sentid2text = query_sents(args)
            test(args)  # inference over each sentence

        collection_path = os.path.join(
            datasets_path, args.collection +
            '.csv') if not args.interactive else args.interactive_path
        predictions_path = os.path.join(
            args.data_path, 'predictions', 'predict.' +
            experiment) if not args.interactive else os.path.join(
                args.data_path, 'predictions', args.predict_path)

        top_doc_dict, doc_bm25_dict, sent_dict, q_dict, doc_label_dict = eval_bm25(
            collection_path)
        score_dict = load_bert_scores(predictions_path, q_dict, sent_dict)

        if args.interactive:
            top_rank_docs = visualize_scores(collection_path, score_dict)
            with open(os.path.join(args.data_path, 'query_sent_scores.csv'),
                      'w') as scores_file:
                for doc in top_rank_docs[:100]:
                    scores_file.write('{}\t{}\t{}\t{}\t{}\n'.format(
                        doc[0], sentid2text[doc[0]], doc[1], doc[2],
                        'BM25' if doc[3] > 0 else 'BERT'))
                for doc in top_rank_docs[-100:]:
                    scores_file.write('{}\t{}\t{}\t{}\t{}\n'.format(
                        doc[0], sentid2text[doc[0]], doc[1], doc[2],
                        'BM25' if doc[3] > 0 else 'BERT'))

        if not os.path.isdir('runs'):
            os.mkdir('runs')

        if mode == 'train':
            topics = train_topics if not args.interactive else list(
                q_dict.keys())
            # Grid search for best parameters
            for a in np.arange(0.0, alpha, 0.1):
                for b in np.arange(0.0, beta, 0.1):
                    for g in np.arange(0.0, gamma, 0.1):
                        calc_q_doc_bert(score_dict,
                                        'run.' + experiment + '.cv.train',
                                        topics, top_doc_dict, doc_bm25_dict,
                                        topK, a, b, g)
                        base = 'runs/run.' + experiment + '.cv.train'
                        os.system(
                            '{}/eval/trec_eval.9.0.4/trec_eval -M1000 -m map {} {}> eval.base'
                            .format(anserini_path, qrels_path, base))
                        with open('eval.base', 'r') as f:
                            for line in f:
                                metric, qid, score = line.split('\t')
                                map_score = float(score)
                                print(test_folder_set, round(a, 2),
                                      round(b, 2), round(g, 2), map_score)

        elif mode == 'test':
            topics = test_topics if not args.interactive else list(
                q_dict.keys())
            calc_q_doc_bert(
                score_dict,
                'run.' + experiment + '.cv.test.' + str(test_folder_set),
                topics, top_doc_dict, doc_bm25_dict, topK, alpha, beta, gamma)
        else:
            topics = all_topics if not args.interactive else list(
                q_dict.keys())
            calc_q_doc_bert(score_dict, 'run.' + experiment + '.cv.all',
                            topics, top_doc_dict, doc_bm25_dict, topK, alpha,
                            beta, gamma)
Example #27
0
def train():
    args = get_args()

    # Get context.
    from nnabla.ext_utils import get_extension_context
    logger.info("Running in %s" % args.context)
    ctx = get_extension_context(args.context,
                                device_id=args.device_id,
                                type_config=args.type_config)
    nn.set_default_context(ctx)

    # Create CNN network for both training and testing.
    if args.model_load_path == "":
        raise Exception("Set `model_load_path`")
    nn.load_parameters(args.model_load_path)
    model_prediction = cifar10_resnet23_slim_prediction
    # TRAIN
    maps = 64
    data_iterator = data_iterator_cifar10
    c = 3
    h = w = 32
    n_train = 50000
    n_valid = 10000

    # Create input variables.
    image = nn.Variable([args.batch_size, c, h, w])
    label = nn.Variable([args.batch_size, 1])
    # Create model_prediction graph.
    pred = model_prediction(image, maps=maps, test=False)
    pred.persistent = True
    # Create loss function.
    loss = F.mean(F.softmax_cross_entropy(pred, label))

    # TEST
    # Create input variables.
    vimage = nn.Variable([args.batch_size, c, h, w])
    vlabel = nn.Variable([args.batch_size, 1])
    # Create predition graph.
    vpred = model_prediction(vimage, maps=maps, test=True)

    # Set mask
    create_and_set_mask(nn.get_parameters(grad_only=False),
                        rrate=args.reduction_rate)

    # Create Solver.
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Create monitor.
    from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed
    monitor = Monitor(args.monitor_path)
    monitor_loss = MonitorSeries("Training loss", monitor, interval=10)
    monitor_err = MonitorSeries("Training error", monitor, interval=10)
    monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100)
    monitor_verr = MonitorSeries("Test error", monitor, interval=1)

    # Initialize DataIterator
    data = data_iterator(args.batch_size, True)
    vdata = data_iterator(args.batch_size, False)
    best_ve = 1.0
    ve = 1.0
    # Training loop.
    for i in range(args.max_iter):
        if i % args.val_interval == 0:
            # Validation
            ve = 0.0
            for j in range(int(n_valid / args.batch_size)):
                vimage.d, vlabel.d = vdata.next()
                vpred.forward(clear_buffer=True)
                ve += categorical_error(vpred.d, vlabel.d)
            ve /= int(n_valid / args.batch_size)
            monitor_verr.add(i, ve)
        if ve < best_ve:
            nn.save_parameters(
                os.path.join(args.model_save_path, 'params_%06d.h5' % i))
            best_ve = ve
        # Training forward
        image.d, label.d = data.next()
        solver.zero_grad()
        loss.forward(clear_no_need_grad=True)
        loss.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()
        e = categorical_error(pred.d, label.d)
        monitor_loss.add(i, loss.d.copy())
        monitor_err.add(i, e)
        monitor_time.add(i)

    ve = 0.0
    for j in range(int(n_valid / args.batch_size)):
        vimage.d, vlabel.d = vdata.next()
        vpred.forward(clear_buffer=True)
        ve += categorical_error(vpred.d, vlabel.d)
    ve /= int(n_valid / args.batch_size)
    monitor_verr.add(i, ve)

    parameter_file = os.path.join(args.model_save_path,
                                  'params_{:06}.h5'.format(args.max_iter))
    nn.save_parameters(parameter_file)
Example #28
0
def main():
    args = get_args()
    save_args(args, "generate")
    interpolate(args)
Example #29
0
# -------------------------------------------
def print_info(peak_data):
    print("Significance = %.2f" % peak_data['nu'])
    print("dfG/dx, dfG/dy, dfG/dz = ", peak_data['f1'])
    print("xd = %.2f" % peak_data['xd'], "a12sq = %.2f" % peak_data['a12sq'],
          "a13sq = %.2f" % peak_data['a13sq'])
    print("Euler1: a1, b1, p1 = ", peak_data['Euler1'])
    print("vx,vy,vz (peak velocity in km/s) :", peak_data['v_peculiar'])
    print("epsilon = %.2f" % peak_data['epsilon'],
          "omega = %.2f" % peak_data['omega'])
    print("Euler2: a2, b2, p2 = ", peak_data['Euler2'])


# load args
# -------------------------------------------
args = get_args()

Rdm_seed = args.Rdm_seed
RG = args.RG
Ng = args.Ng
Lbox = args.Lbox
print("Seed = ", Rdm_seed)
print("Lbox = %.1f" % Lbox, "Ng = %d" % Ng, "RG = %.2f" % RG)

# Choose cosmology
# -------------------------------------------
cosmology = nbcosmos.WMAP9
mycosmo = Cosmos(FLRW=True, obj=cosmology)

# generate linear density field at z=0
# -------------------------------------------
Example #30
0
def train():
    """
    Main script.
    """

    args = get_args()

    # Get context.
    from nnabla.ext_utils import get_extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = get_extension_context(extension_module,
                                device_id=args.device_id,
                                type_config=args.type_config)
    nn.set_default_context(ctx)

    if args.tiny_mode:
        # We use Tiny ImageNet from Stanford CS231N class.
        # (Tiny ImageNet, https://tiny-imagenet.herokuapp.com/)
        # Tiny ImageNet consists of 200 categories, each category has 500 images
        # in training set. The image size is 64x64. To adapt ResNet into 64x64
        # image inputs, the input image size of ResNet is set as 56x56, and
        # the stride in the first conv and the first max pooling are removed.
        # Please check README.
        data = data_iterator_tiny_imagenet(args.batch_size, 'train')
        vdata = data_iterator_tiny_imagenet(args.batch_size, 'val')
        num_classes = 200
    else:
        # We use ImageNet.
        # (ImageNet, https://imagenet.herokuapp.com/)
        # ImageNet consists of 1000 categories, each category has 1280 images
        # in training set. The image size is various. To adapt ResNet into
        # 320x320 image inputs, the input image size of ResNet is set as
        # 224x224. We need to get tar file and create cache file(320x320 images).
        # Please check README.
        data = data_iterator_imagenet(args.batch_size,
                                      args.train_cachefile_dir)
        vdata = data_iterator_imagenet(args.batch_size, args.val_cachefile_dir)
        num_classes = 1000
    t_model = get_model(args, num_classes, test=False, tiny=args.tiny_mode)
    t_model.pred.persistent = True  # Not clearing buffer of pred in backward

    # TODO: need_grad should be passed to get_unlinked_variable after v1.0.3 fix.
    t_pred2 = t_model.pred.get_unlinked_variable()
    t_pred2.need_grad = False

    t_e = F.mean(F.top_n_error(t_pred2, t_model.label))
    v_model = get_model(args, num_classes, test=True, tiny=args.tiny_mode)
    v_model.pred.persistent = True  # Not clearing buffer of pred in forward

    # TODO: need_grad should be passed to get_unlinked_variable after v1.0.3 fix.
    v_pred2 = v_model.pred.get_unlinked_variable()
    v_pred2.need_grad = False

    v_e = F.mean(F.top_n_error(v_pred2, v_model.label))

    # Save_nnp_Epoch0
    contents = save_nnp({'x': v_model.image}, {'y': v_model.pred},
                        args.batch_size)
    save.save(os.path.join(args.model_save_path, 'Imagenet_result_epoch0.nnp'),
              contents)

    # Create Solver.
    solver = S.Momentum(args.learning_rate, 0.9)
    solver.set_parameters(nn.get_parameters())

    start_point = 0
    if args.checkpoint is not None:
        # load weights and solver state info from specified checkpoint file.
        start_point = load_checkpoint(args.checkpoint, solver)

    # Create monitor.
    import nnabla.monitor as M
    monitor = M.Monitor(args.monitor_path)
    monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10)
    monitor_err = M.MonitorSeries("Training error", monitor, interval=10)
    monitor_vloss = M.MonitorSeries("Validation loss", monitor, interval=10)
    monitor_verr = M.MonitorSeries("Validation error", monitor, interval=10)
    monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=10)
    monitor_vtime = M.MonitorTimeElapsed("Validation time",
                                         monitor,
                                         interval=10)

    # Training loop.
    for i in range(start_point, args.max_iter):
        # Save parameters
        if i % args.model_save_interval == 0:
            # save checkpoint file
            save_checkpoint(args.model_save_path, i, solver)

        # Validation
        if i % args.val_interval == 0 and i != 0:

            # Clear all intermediate memory to save memory.
            # t_model.loss.clear_recursive()

            l = 0.0
            e = 0.0
            for j in range(args.val_iter):
                images, labels = vdata.next()
                v_model.image.d = images
                v_model.label.d = labels
                v_model.image.data.cast(np.uint8, ctx)
                v_model.label.data.cast(np.int32, ctx)
                v_model.loss.forward(clear_buffer=True)
                v_e.forward(clear_buffer=True)
                l += v_model.loss.d
                e += v_e.d
            monitor_vloss.add(i, l / args.val_iter)
            monitor_verr.add(i, e / args.val_iter)
            monitor_vtime.add(i)

            # Clear all intermediate memory to save memory.
            # v_model.loss.clear_recursive()

        # Training
        l = 0.0
        e = 0.0
        solver.zero_grad()

        def accumulate_error(l, e, t_model, t_e):
            l += t_model.loss.d
            e += t_e.d
            return l, e

        # Gradient accumulation loop
        for j in range(args.accum_grad):
            images, labels = data.next()
            t_model.image.d = images
            t_model.label.d = labels
            t_model.image.data.cast(np.uint8, ctx)
            t_model.label.data.cast(np.int32, ctx)
            t_model.loss.forward(clear_no_need_grad=True)
            t_model.loss.backward(clear_buffer=True)  # Accumulating gradients
            t_e.forward(clear_buffer=True)
            l, e = accumulate_error(l, e, t_model, t_e)

        solver.weight_decay(args.weight_decay)
        solver.update()

        monitor_loss.add(i, l / args.accum_grad)
        monitor_err.add(i, e / args.accum_grad)
        monitor_time.add(i)

        # Learning rate decay at scheduled iter
        if i in args.learning_rate_decay_at:
            solver.set_learning_rate(solver.learning_rate() * 0.1)
    nn.save_parameters(
        os.path.join(args.model_save_path, 'param_%06d.h5' % args.max_iter))

    # Save_nnp
    contents = save_nnp({'x': v_model.image}, {'y': v_model.pred},
                        args.batch_size)
    save.save(os.path.join(args.model_save_path, 'Imagenet_result.nnp'),
              contents)
Example #31
0
        loss_gen.forward(clear_no_need_grad=True)
        loss_gen.backward(clear_buffer=True)
        solver_gen.weight_decay(args.weight_decay)
        solver_gen.update()
        monitor_fake.add(i, fake)
        monitor_loss_gen.add(i, loss_gen.d.copy())

        # Discriminator update.
        solver_dis.zero_grad()
        loss_dis.forward(clear_no_need_grad=True)
        loss_dis.backward(clear_buffer=True)
        solver_dis.weight_decay(args.weight_decay)
        solver_dis.update()
        monitor_loss_dis.add(i, loss_dis.d.copy())
        monitor_time.add(i)

    with nn.parameter_scope("gen"):
        nn.save_parameters(os.path.join(
            args.model_save_path, "generator_param_%06d.h5" % i))
    with nn.parameter_scope("dis"):
        nn.save_parameters(os.path.join(
            args.model_save_path, "discriminator_param_%06d.h5" % i))


if __name__ == '__main__':
    monitor_path = 'tmp.monitor.dcgan'
    args = get_args(monitor_path=monitor_path, model_save_path=monitor_path,
                    max_iter=20000, learning_rate=0.0002, batch_size=64,
                    weight_decay=0.0001)
    train(args)
    def __init__(self,
                 n_states,
                 actions,
                 batch_size=int(128),
                 epsilon=0.1,
                 alpha=0.2,
                 gamma=0.9):
        # Get context.
        from nnabla.contrib.context import extension_context
        args = get_args()
        print "weight_decay:", args.weight_decay
        extension_module = args.context
        if args.context is None:
            extension_module = 'cpu'
        logger.info("Running in %s" % extension_module)
        ctx = extension_context(extension_module, device_id=args.device_id)
        nn.set_default_context(ctx)

        # Q-Learing parametes
        self.epsilon = epsilon
        self.alpha = alpha
        self.gamma = gamma

        self.actions = actions
        self.n_actions = len(actions)
        self.n_states = n_states

        # Neural network's training parametes
        self.learning_rate = 1e-3
        self.batch_size = batch_size
        self.model_save_path = 'models'
        self.model_save_interval = 1000
        self.weight_decay = 0

        # State-Action Plot's parametes
        self.plim = [-1.2, 0.6]
        self.vlim = [-0.07, 0.07]
        self.N_position = 27
        self.N_velocity = 27
        self.positions = np.linspace(self.plim[0],
                                     self.plim[1],
                                     num=self.N_position,
                                     endpoint=True)
        self.velocities = np.linspace(self.vlim[0],
                                      self.vlim[1],
                                      num=self.N_velocity,
                                      endpoint=True)

        # --------------------------------------------------
        print "Initializing the Neural Network."
        # --------------------------------------------------
        # Hidden layer's neuron number
        hn = 50
        # Preparing the Computation Graph for Q
        self.Q_x = nn.Variable([self.batch_size, self.n_states])
        self.Q_y = nn.Variable([self.batch_size, self.n_actions])

        # Construct Q-Network for Q-Learning.
        l1 = F.tanh(PF.affine(self.Q_x, hn, name='affine1'))
        self.Q_Network = PF.affine(l1, self.n_actions, name='affine2')
        self.Q_Network.persistent = True

        # Create loss function.
        #self.loss = F.mean(F.squared_error(self.train_model, self.yt))
        self.loss = F.mean(F.huber_loss(self.Q_Network, self.Q_y))

        # Preparing the Computation Graph for target Q-Network
        self.Q_target_x = nn.Variable([self.batch_size, self.n_states])
        self.Q_target_w1 = nn.Variable([self.n_states, hn],
                                       need_grad=False)  # Weights
        self.Q_target_b1 = nn.Variable([hn], need_grad=False)  # Biases
        self.Q_target_w2 = nn.Variable([hn, self.n_actions],
                                       need_grad=False)  # Weights
        self.Q_target_b2 = nn.Variable([self.n_actions],
                                       need_grad=False)  # Biases

        # Construct target Q-Network for Q-Learning.
        h1 = F.tanh(
            F.affine(self.Q_target_x, self.Q_target_w1, self.Q_target_b1))
        self.Q_target_Network = F.affine(h1, self.Q_target_w2,
                                         self.Q_target_b2)
        self.update_Q_target()

        # --------------------------------------------------
        print "Initializing the Solver."
        # --------------------------------------------------
        # Create Solver
        # self.solver = S.Sgd(self.learning_rate)
        self.solver = S.RMSprop(self.learning_rate, 0.95)
        self.solver.set_parameters(nn.get_parameters())

        self.update_Q = 100
        self.iter = 0
        #
        self.plot_reset = True
Example #33
0
        if args.dev: validDays.extend(devDays)
        if args.test: validDays.extend(testDays)
        validDays = sorted(validDays)

    ######################
    outputDays = validDays
    if args.output == "d": outputDays = devDays
    elif args.output == "t": outputDays = testDays

    return devDays, testDays, validDays, outputDays, timeWindow, Para_newsDayWindow

####################################################
if __name__ == "__main__":
    print "Program starts at ", time.asctime()

    args = get_args()
    print "**Para setting"
    print args

    ##############
    devDays, testDays, validDays, outputDays, timeWindow, Para_newsDayWindow = params(args, dataSelect=1)
    print "validDays", validDays
    print "outputDays", outputDays

    fileSuf_data = os.path.basename(os.path.dirname(args.input+"/")) # eg: "word201505"
    time_flag = "." + time.strftime("%Y%m%d%H%M%S", time.gmtime()) # eg: ".20170912035918"
    output_dir = "../ni_data/models/"+fileSuf_data+"/"
    if not os.path.exists(output_dir): os.mkdir(output_dir)
    if args.cluster == "dbscan": cluster_arg = "eps" + str(args.dbscan_eps)
    else: cluster_arg = "cnum" + str(args.num_cls)
Example #34
0
def train():
    """
    Main script.
    """

    args = get_args()

    # Get context.
    from nnabla.contrib.context import extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    # Dataset
    # We use Tiny ImageNet from Stanford CS231N class.
    # https://tiny-imagenet.herokuapp.com/
    # Tiny ImageNet consists of 200 categories, each category has 500 images
    # in training set. The image size is 64x64. To adapt ResNet into 64x64
    # image inputs, the input image size of ResNet is set as 56x56, and
    # the stride in the first conv and the first max pooling are removed.
    data = data_iterator_tiny_imagenet(args.batch_size, 'train')
    vdata = data_iterator_tiny_imagenet(args.batch_size, 'val')

    num_classes = 200
    tiny = True  # TODO: Switch ILSVRC2012 dataset and TinyImageNet.
    t_model = get_model(
        args, num_classes, test=False, tiny=tiny)
    t_model.pred.persistent = True  # Not clearing buffer of pred in backward
    v_model = get_model(
        args, num_classes, test=True, tiny=tiny)
    v_model.pred.persistent = True  # Not clearing buffer of pred in forward

    # Create Solver.
    solver = S.Momentum(args.learning_rate, 0.9)
    solver.set_parameters(nn.get_parameters())

    # Create monitor.
    import nnabla.monitor as M
    monitor = M.Monitor(args.monitor_path)
    monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10)
    monitor_err = M.MonitorSeries("Training error", monitor, interval=10)
    monitor_vloss = M.MonitorSeries("Validation loss", monitor, interval=10)
    monitor_verr = M.MonitorSeries("Validation error", monitor, interval=10)
    monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=10)

    # Training loop.
    for i in range(args.max_iter):
        # Save parameters
        if i % args.model_save_interval == 0:
            nn.save_parameters(os.path.join(
                args.model_save_path, 'param_%06d.h5' % i))

        # Validation
        if i % args.val_interval == 0:

            # Clear all intermediate memory to save memory.
            # t_model.loss.clear_recursive()

            l = 0.0
            e = 0.0
            for j in range(args.val_iter):
                images, labels = vdata.next()
                v_model.image.d = images
                v_model.label.d = labels
                v_model.image.data.cast(np.uint8, ctx)
                v_model.label.data.cast(np.int32, ctx)
                v_model.loss.forward(clear_buffer=True)
                l += v_model.loss.d
                e += categorical_error(v_model.pred.d, v_model.label.d)
            monitor_vloss.add(i, l / args.val_iter)
            monitor_verr.add(i, e / args.val_iter)

            # Clear all intermediate memory to save memory.
            # v_model.loss.clear_recursive()

        # Training
        l = 0.0
        e = 0.0
        solver.zero_grad()

        # Gradient accumulation loop
        for j in range(args.accum_grad):
            images, labels = data.next()
            t_model.image.d = images
            t_model.label.d = labels
            t_model.image.data.cast(np.uint8, ctx)
            t_model.label.data.cast(np.int32, ctx)
            t_model.loss.forward(clear_no_need_grad=True)
            t_model.loss.backward(clear_buffer=True)  # Accumulating gradients
            l += t_model.loss.d
            e += categorical_error(t_model.pred.d, t_model.label.d)
        solver.weight_decay(args.weight_decay)
        solver.update()
        monitor_loss.add(i, l / args.accum_grad)
        monitor_err.add(i, e / args.accum_grad)
        monitor_time.add(i)

        # Learning rate decay at scheduled iter
        if i in args.learning_rate_decay_at:
            solver.set_learning_rate(solver.learning_rate() * 0.1)
    nn.save_parameters(os.path.join(args.model_save_path,
                                    'param_%06d.h5' % args.max_iter))