Beispiel #1
0
def add_optimizer_rmsprop(model, config):
    optimizer.add_weight_decay(model, config['solver']['weight_decay'])
    optimizer.build_rms_prop(
        model,
        base_learning_rate=config['solver']['base_learning_rate'],
        epsilon=config['solver']['epsilon'],
        decay=config['solver']['decay'],
        momentum=config['solver']['momentum'],
        policy=config['solver']['lr_policy'],
        stepsize=config['solver']['stepsize'],
    )
Beispiel #2
0
 def build_optimizer(self, model, **kwargs):
     self._skip_gpu = False
     return build_rms_prop(model,
                           base_learning_rate=0.1,
                           epsilon=0.1,
                           **kwargs)
    def add_training_operators(self, model, output, label, device_opts, loss,
                               opt_type, base_learning_rate, policy, stepsize,
                               epsilon, beta1, beta2, gamma, momentum):
        with core.DeviceScope(device_opts):
            if loss == 'cross_entropy':
                xent = model.LabelCrossEntropy([output, label], 'xent')
                loss = model.AveragedLoss(xent, "loss")
            elif loss == 'euclidean':
                dist = model.net.SquaredL2Distance([label, output], 'dist')
                loss = dist.AveragedLoss([], ['loss'])

            model.AddGradientOperators([loss])

            if opt_type == 'adam':
                if policy == 'step':
                    opt = optimizer.build_adam(
                        model,
                        base_learning_rate=base_learning_rate,
                        policy=policy,
                        stepsize=stepsize,
                        beta1=beta1,
                        beta2=beta2,
                        epsilon=epsilon)
                elif policy == 'fixed' or policy == 'inv':
                    opt = optimizer.build_adam(
                        model,
                        base_learning_rate=base_learning_rate,
                        policy=policy,
                        beta1=beta1,
                        beta2=beta2,
                        epsilon=epsilon)
                print("adam optimizer selected")
            elif opt_type == 'sgd':
                if policy == 'step':
                    opt = optimizer.build_sgd(
                        model,
                        base_learning_rate=base_learning_rate,
                        policy=policy,
                        stepsize=stepsize,
                        gamma=gamma,
                        momentum=momentum)
                elif policy == 'fixed' or policy == 'inv':
                    opt = optimizer.build_sgd(
                        model,
                        base_learning_rate=base_learning_rate,
                        policy=policy,
                        gamma=gamma,
                        momentum=momentum)
                print("sgd optimizer selected")
            elif opt_type == 'rmsprop':
                if policy == 'step':
                    opt = optimizer.build_rms_prop(
                        model,
                        base_learning_rate=base_learning_rate,
                        policy=policy,
                        stepsize=stepsize,
                        decay=gamma,
                        momentum=momentum,
                        epsilon=epsilon)
                elif policy == 'fixed' or policy == 'inv':
                    opt = optimizer.build_rms_prop(
                        model,
                        base_learning_rate=base_learning_rate,
                        policy=policy,
                        decay=gamma,
                        momentum=momentum,
                        epsilon=epsilon)
                print("rmsprop optimizer selected")
            elif opt_type == 'adagrad':
                if policy == 'step':
                    opt = optimizer.build_adagrad(
                        model,
                        base_learning_rate=base_learning_rate,
                        policy=policy,
                        stepsize=stepsize,
                        decay=gamma,
                        epsilon=epsilon)
                elif policy == 'fixed' or policy == 'inv':
                    opt = optimizer.build_adagrad(
                        model,
                        base_learning_rate=base_learning_rate,
                        policy=policy,
                        decay=gamma,
                        epsilon=epsilon)
                print("adagrad optimizer selected")
 def build_optimizer(self, model, **kwargs):
     self._skip_gpu = False
     return build_rms_prop(
         model, base_learning_rate=0.1, epsilon=0.1, **kwargs
     )
Beispiel #5
0
def main():

    workspace.ResetWorkspace()

    if args.profile:
        workspace.GlobalInit([
            'caffe2', '--caffe2_log_level=2',
            '--caffe2_net_async_names_to_trace=benchmark',
            '--caffe2_net_async_tracing_dumping_nth=2',
            '--caffe2_net_async_tracing_nth=2'
        ])
    else:
        workspace.GlobalInit([
            'caffe2',
            '--caffe2_log_level=2',
            #'--caffe2_cpu_numa_enabled=1',
            '--caffe2_net_async_thread_pool_size=' + str(args.async_threads)
        ])

    model = model_helper.ModelHelper(name="FC")

    input_data, output_data = GetInput()
    workspace.FeedBlob("input", input_data)
    workspace.FeedBlob("input_T", input_data.T)
    workspace.FeedBlob("output", output_data)

    out = GetModel(model)

    if mode == 'train':
        xent = model.LabelCrossEntropy([out, "output"], 'xent')
        loss = model.AveragedLoss(xent, "loss")
        model.AddGradientOperators([loss])
        if args.optimizer == 'rms':
            optimizer.build_rms_prop(model,
                                     base_learning_rate=0.1,
                                     max_gradient_norm=None,
                                     allow_lr_injection=False)
        elif args.optimizer == 'sgd':
            optimizer.build_sgd(
                model,
                base_learning_rate=0.1,
                policy="step",
                stepsize=1,
                gamma=0.999,
            )

    #CAFFE2_NET_TYPE = types.ENUM(
    #            "simple", "dag", "async_dag", "singlethread_async", "async_scheduling"
    #            )
    if args.profile:
        model.Proto().type = 'prof_dag'
    if args.proto_type != '':
        model.net.Proto().type = args.proto_type

    model.net.Proto().num_workers = args.intra_threads

    #warmup_runs = iterations
    #main_runs = iterations
    #run_individual = True
    #stats = workspace.BenchmarkNet(model.name, warmup_runs, main_runs, run_individual)
    #print(stats)

    workspace.RunNetOnce(model.param_init_net)
    workspace.CreateNet(model.net, overwrite=True)

    workspace.RunNet(model.net.Proto().name, num_iter=warmup_steps)
    t1 = time.time()
    workspace.RunNet(model.net.Proto().name, num_iter=train_steps)
    t2 = time.time()
    total_time = t2 - t1
    example_per_sec = batch_size * train_steps / total_time
    global_step_per_sec = train_steps / total_time
    print("--------------------CAFFE2-------------------------")
    for arg in vars(args):
        print("***%s: %s" % (arg, getattr(args, arg)))
    if 'OMP_NUM_THREADS' in os.environ:
        print("***OMP_NUM_THREADS: ", os.environ['OMP_NUM_THREADS'])
    if 'MKL_NUM_THREADS' in os.environ:
        print("***MKL_NUM_THREADS: ", os.environ['MKL_NUM_THREADS'])
    print("***Total time: %s" % total_time)
    print("***Average time: %s" % (total_time / train_steps / (layer - 1)))
    flops = batch_size * (node * node *
                          (layer - 1) + node * input_size + node * output_size)
    if args.mode == 'train':
        # FWD 2x and BWD 4x
        flops *= 6 * train_steps
    else:
        flops *= 2 * train_steps
    print('***TFLOPS: {}'.format(flops / total_time / 1e12))
    print("---------------------------------------------")
    print("---------------------------------------------")