Exemple #1
0
 def forward(self, arch):
     # initial the first two nodes
     op0_list = []
     op1_list = []
     for idx, (op, f, t) in enumerate(arch):
         if idx%2 == 0:
             op0_list.append(op)
         else:
             op1_list.append(op)
     assert len(op0_list) == len(op1_list), 'inconsistent size between op0_list and op1_list'
     node_list = utils.get_variable(list(range(0, 2, 1)), self.device, requires_grad=False)
     op0_list = utils.get_variable(op0_list, self.device, requires_grad=False)
     op1_list = utils.get_variable(op1_list, self.device, requires_grad=False)
     # first two nodes
     x_node_hidden = self.node_hidden(node_list)
     x_op0_hidden = self.op_hidden(op0_list)
     x_op1_hidden = self.op_hidden(op1_list)
     '''
         node0
         node1
         op0, op1
     '''
     x_op_hidden = torch.cat((x_op0_hidden, x_op1_hidden), dim=1)
     x_hidden = torch.cat((x_node_hidden, x_op_hidden), dim=0)
     # initialize x and adj
     x = self.emb_attn(x_hidden)
     adj = utils.parse_arch(arch, self.steps+2).to(self.device)
     # normalize features and adj
     if self.normalize:
         x = utils.sum_normalize(x)
         adj = utils.sum_normalize(adj)
     x = F.relu(self.gc1(x, adj))
     x = F.dropout(x, self.dropout, training=self.training)
     x = self.gc2(x, adj)
     x = x[2:]
     logits = self.fc(x)
     logits = logits.view(self.steps*2, -1)
     entropy = 0
     log_p = 0
     arch_list = []
     try:
         COMPACT_PRIMITIVES = eval(self.op_type)
     except:
         assert False, 'not supported op type %s' %(self.op_type)
     transition_dict = LooseEnd_Transition_Dict if self.op_type == 'LOOSE_END_PRIMITIVES' else FullyConcat_Transition_Dict
     for idx, (op, f, t) in enumerate(arch):
         select_op = transition_dict[COMPACT_PRIMITIVES[op]]
         selected_arch_index = [COMPACT_PRIMITIVES.index(i) for i in select_op]
         tmp = logits[idx, selected_arch_index]
         prob = F.softmax(tmp, dim=-1)
         prob = prob + 1e-5
         log_prob = torch.log(prob)
         entropy += -(log_prob * prob).sum()
         act = prob.multinomial(num_samples=1)
         pruner_op_name = select_op[act]
         f_op = COMPACT_PRIMITIVES.index(pruner_op_name)
         arch_list.append((f_op, f, t))
         selected_log_p = log_prob.gather(-1, act)
         log_p += selected_log_p.sum()
     return arch_list, log_p, entropy
Exemple #2
0
def get_optimizer(strategy, lr, lr_schedule, dataset, batch_size):
    strategy_name = strategy.split("/")[0]
    if strategy_name == "oneshot":
        print("using oneshot strategy")

        lr = get_lr(lr, dataset, batch_size, schedule_mode=lr_schedule)

        return [
            # this parameter from  https://github.com/alexalemi/vib_demo/blob/master/MNISTVIB.ipynb
            tf.keras.optimizers.Adam(lr, 0.5)
        ], strategy, {}
    elif strategy == "algo1":
        slugs = strategy.split("/")
        print(f"using {slugs[0]} strategy with {slugs[1]}")

        opt_params = utils.parse_arch(slugs[1])

        if lr_schedule != "constant":
            raise ValueError(
                f"{strategy_name} is not yet supported lr_schedule")

        lr = get_lr(lr, dataset, batch_size, schedule_mode=lr_schedule)

        # one for encoder and decoder
        return (tf.keras.optimizers.Adam(lr, 0.5),
                tf.keras.optimizers.Adam(lr, 0.5)), slugs[0], opt_params

    elif strategy_name == "algo2":
        slugs = strategy.split("/")
        print(f"using {slugs[0]} strategy with {slugs[1]}")

        opt_params = utils.parse_arch(slugs[1])

        # one for encoder and decoder
        return (
            tf.keras.optimizers.Adam(
                get_lr(lr, dataset, batch_size, schedule_mode=lr_schedule),
                0.5),
            tf.keras.optimizers.Adam(
                get_lr(lr,
                       dataset,
                       batch_size,
                       schedule_mode=lr_schedule,
                       step_factor=opt_params["k"]), 0.5),
        ), slugs[0], opt_params
Exemple #3
0
 def forward(self, arch):
     # initial the first two nodes
     op0_list = []
     op1_list = []
     for idx, (op, f, t) in enumerate(arch):
         if idx % 2 == 0:
             op0_list.append(op)
         else:
             op1_list.append(op)
     assert len(op0_list) == len(
         op1_list), 'inconsistent size between op0_list and op1_list'
     node_list = utils.get_variable(list(range(0, 2, 1)),
                                    self.device,
                                    requires_grad=False)
     op0_list = utils.get_variable(op0_list,
                                   self.device,
                                   requires_grad=False)
     op1_list = utils.get_variable(op1_list,
                                   self.device,
                                   requires_grad=False)
     # first two nodes
     x_node_hidden = self.node_hidden(node_list)
     x_op0_hidden = self.op_hidden(op0_list)
     x_op1_hidden = self.op_hidden(op1_list)
     '''
         node0
         node1
         op0, op1
     '''
     x_op_hidden = torch.cat((x_op0_hidden, x_op1_hidden), dim=1)
     x_hidden = torch.cat((x_node_hidden, x_op_hidden), dim=0)
     # initialize x and adj
     x = self.emb_attn(x_hidden)
     adj = utils.parse_arch(arch, self.steps + 2).to(self.device)
     # normalize features and adj
     if self.normalize:
         x = utils.sum_normalize(x)
         adj = utils.sum_normalize(adj)
     x = F.relu(self.gc1(x, adj))
     x = F.dropout(x, self.dropout, training=self.training)
     x = self.gc2(x, adj)
     x = x[2:]
     logits = self.fc(x)
     logits = logits.view(self.steps * 2, -1)
     probs = F.softmax(logits, dim=-1)
     probs = probs + 1e-5
     log_probs = torch.log(probs)
     action = probs.multinomial(num_samples=1)
     selected_log_p = log_probs.gather(-1, action)
     log_p = selected_log_p.sum()
     entropy = -(log_probs * probs).sum()
     arch = utils.translate_arch(arch, action, self.op_type)
     return arch, log_p, entropy
Exemple #4
0
def load_model(path):

    with open(f"{path}/summary.yml", "r") as fh:
        summary = yaml.safe_load(fh)

        model_name, model_config = summary['model'].split("/")
        model_config = utils.parse_arch(model_config)

        input_shape = datasets.input_dims[summary["dataset"]]
        model_cls = get_network(model_name)
        model = model_cls(model_config, input_shape,
                          datasets.num_classes[summary["dataset"]],
                          summary["cov_type"], summary['beta'], summary['M'])

        model.load_weights(f"{path}/model")

    return model, summary
Exemple #5
0
        torch.cuda.manual_seed_all(args.seed)

corpus = data.Corpus(args.data)

eval_batch_size = 10
test_batch_size = 1
train_data = batchify(corpus.train, args.batch_size, args)
val_data = batchify(corpus.valid, eval_batch_size, args)
test_data = batchify(corpus.test, test_batch_size, args)


ntokens = len(corpus.dictionary)
try:
    genotype = eval("genotypes.%s" % args.arch)
except:
    genotype = parse_arch(args.arch)

if os.path.exists(os.path.join(args.save, 'model.pt')):
    print("Found model.pt in {}, automatically continue training.".format(args.save))
    args.continue_train = True

if args.continue_train:
    model = torch.load(os.path.join(args.save, 'model.pt'))
else:
    model = model.RNNModel(ntokens, args.emsize, args.nhid, args.nhidlast,
                       args.dropout, args.dropouth, args.dropoutx, args.dropouti, args.dropoute, 
                       cell_cls=model.DARTSCell, genotype=genotype)

if args.cuda:
    if args.single_gpu:
        parallel_model = model.cuda()
Exemple #6
0
        cudnn.benchmark = True
        cudnn.enabled = True
        torch.cuda.manual_seed_all(args.seed)

corpus = data.Corpus(args.data)

eval_batch_size = 10
test_batch_size = 1
train_data = batchify(corpus.train, args.batch_size, args.cuda)
val_data = batchify(corpus.valid, eval_batch_size, args.cuda)
test_data = batchify(corpus.test, test_batch_size, args.cuda)

ntokens = len(corpus.dictionary)

assert args.arch
args.arch = parse_arch(args.arch)

if os.path.exists(os.path.join(args.save, 'model.pt')):
    print("Found model.pt in {}, automatically continue training.".format(
        args.save))
    args.continue_train = True

if args.continue_train:
    model = torch.load(os.path.join(args.save, 'model.pt'))
else:
    model = model.RNNModel(ntokens,
                           args.emsize,
                           args.nhid,
                           args.nhidlast,
                           args.dropout,
                           args.dropouth,
Exemple #7
0
def train(model, dataset, data_augmentation, epochs, batch_size, beta, M,
          initial_lr, lr_schedule, strategy, output_dir, class_loss, cov_type):

    model_conf = model

    train_set, test_set, small_set = datasets.get_dataset(dataset)

    TRAIN_BUF, TEST_BUF = datasets.dataset_size[dataset]

    if data_augmentation:
        base_dataset = dataset.split("-")[0]
        print(f"Using image generator params from {base_dataset}")
        with open(f"./datasets/image-generator-config/{base_dataset}.yml",
                  "r") as fh:
            params = yaml.safe_load(fh)
            print(params)
        train_dataset = tf.keras.preprocessing.image.ImageDataGenerator(
            **params)
        train_dataset.fit(train_set[0])

    else:
        train_dataset = tf.data.Dataset.from_tensor_slices(train_set) \
            .shuffle(TRAIN_BUF).batch(batch_size)

    test_dataset = tf.data.Dataset.from_tensor_slices(test_set) \
        .shuffle(TEST_BUF).batch(batch_size)

    print(
        f"Training with {model} on {dataset} for {epochs} epochs (lr={initial_lr}, schedule={lr_schedule})"
    )
    print(
        f"Params: batch-size={batch_size} beta={beta} M={M} lr={initial_lr} strategy={strategy}"
    )

    optimizers, strategy_name, opt_params = losses.get_optimizer(
        strategy, lr, lr_schedule, dataset, batch_size)

    network_name, architecture = model.split("/")
    experiment_name = utils.get_experiment_name(
        f"{network_name}-{class_loss}-{cov_type}-{dataset}")

    print(f"Experiment name: {experiment_name}")
    artifact_dir = f"{output_dir}/{experiment_name}"
    print(f"Artifact directory: {artifact_dir}")

    train_log_dir = f"{artifact_dir}/logs/train"
    test_log_dir = f"{artifact_dir}/logs/test"

    train_summary_writer = tf.summary.create_file_writer(train_log_dir)
    test_summary_writer = tf.summary.create_file_writer(test_log_dir)

    # Instantiate model
    architecture = utils.parse_arch(architecture)

    model = nets.get_network(network_name)(architecture,
                                           datasets.input_dims[dataset],
                                           datasets.num_classes[dataset],
                                           cov_type,
                                           beta=beta,
                                           M=M)

    model.build(input_shape=(batch_size, *datasets.input_dims[dataset]))
    model.summary()

    print(f"Class loss: {class_loss}")
    model.class_loss = getattr(losses, f"compute_{class_loss}_class_loss")

    lr_labels = list(map(lambda x: f"lr_{x}", range(len(optimizers))))

    train_step = train_algo2 if strategy.split(
        "/")[0] == "algo2" else train_algo1

    print("Using trainstep: ", train_step)

    train_start_time = time.time()

    steps_per_epoch = int(np.ceil(train_set[0].shape[0] / batch_size))

    for epoch in range(1, epochs + 1):
        start_time = time.time()

        print(f"Epoch {epoch}")

        m, am = train_step(
            model, optimizers,
            train_dataset.flow(
                train_set[0], train_set[1], batch_size=batch_size)
            if data_augmentation else train_dataset, train_summary_writer, M,
            lr_labels, strategy_name, opt_params, epoch, steps_per_epoch)

        m = m.result().numpy()
        am = am.result().numpy()

        print(utils.format_metrics("Train", m, am))

        tfutils.log_metrics(train_summary_writer, metric_labels, m, epoch)
        tfutils.log_metrics(train_summary_writer, acc_labels, am, epoch)

        tfutils.log_metrics(
            train_summary_writer, lr_labels,
            map(lambda opt: opt._decayed_lr(tf.float32), optimizers), epoch)

        train_metrics = m.astype(float).tolist() + am.astype(float).tolist()
        end_time = time.time()

        test_metrics = evaluate(model, test_dataset, test_summary_writer, M,
                                epoch)

        print(f"--- Time elapse for current epoch {end_time - start_time}")

    train_end_time = time.time()
    elapsed_time = (train_end_time - train_start_time) / 60.

    test_metrics_dict = dict(zip(metric_labels + acc_labels, test_metrics))
    summary = dict(
        dataset=dataset,
        model=model_conf,
        strategy=strategy,
        beta=beta,
        epoch=epoch,
        M=M,
        lr=initial_lr,
        lr_schedule=lr_schedule,
        metrics=dict(
            train=dict(zip(metric_labels + acc_labels, train_metrics)),
            test=test_metrics_dict,
        ),
        class_loss=class_loss,
        cov_type=cov_type,
        batch_size=batch_size,
        elapsed_time=elapsed_time,  # in minutes
        test_accuracy_L12=test_metrics_dict["accuracy_L12"],
        data_augmentation=data_augmentation)

    if model.latent_dim == 2:
        plot_helper.plot_2d_representation(
            model,
            small_set,
            title="Epoch=%d Strategy=%s  Beta=%f M=%f" %
            (epoch, strategy, beta, M),
            path=f"{artifact_dir}/latent-representation.png")

    with train_summary_writer.as_default():
        tf.summary.text("setting",
                        json.dumps(summary, sort_keys=True, indent=4),
                        step=0)

    with open(f"{artifact_dir}/summary.yml", 'w') as f:
        print(summary)
        yaml.dump(summary, f, default_flow_style=False)

    model.save_weights(f"{artifact_dir}/model")

    print(f"Training took {elapsed_time:.4f} minutes")
    print(f"Please see artifact at: {artifact_dir}")