def forward(self, arch): # initial the first two nodes op0_list = [] op1_list = [] for idx, (op, f, t) in enumerate(arch): if idx%2 == 0: op0_list.append(op) else: op1_list.append(op) assert len(op0_list) == len(op1_list), 'inconsistent size between op0_list and op1_list' node_list = utils.get_variable(list(range(0, 2, 1)), self.device, requires_grad=False) op0_list = utils.get_variable(op0_list, self.device, requires_grad=False) op1_list = utils.get_variable(op1_list, self.device, requires_grad=False) # first two nodes x_node_hidden = self.node_hidden(node_list) x_op0_hidden = self.op_hidden(op0_list) x_op1_hidden = self.op_hidden(op1_list) ''' node0 node1 op0, op1 ''' x_op_hidden = torch.cat((x_op0_hidden, x_op1_hidden), dim=1) x_hidden = torch.cat((x_node_hidden, x_op_hidden), dim=0) # initialize x and adj x = self.emb_attn(x_hidden) adj = utils.parse_arch(arch, self.steps+2).to(self.device) # normalize features and adj if self.normalize: x = utils.sum_normalize(x) adj = utils.sum_normalize(adj) x = F.relu(self.gc1(x, adj)) x = F.dropout(x, self.dropout, training=self.training) x = self.gc2(x, adj) x = x[2:] logits = self.fc(x) logits = logits.view(self.steps*2, -1) entropy = 0 log_p = 0 arch_list = [] try: COMPACT_PRIMITIVES = eval(self.op_type) except: assert False, 'not supported op type %s' %(self.op_type) transition_dict = LooseEnd_Transition_Dict if self.op_type == 'LOOSE_END_PRIMITIVES' else FullyConcat_Transition_Dict for idx, (op, f, t) in enumerate(arch): select_op = transition_dict[COMPACT_PRIMITIVES[op]] selected_arch_index = [COMPACT_PRIMITIVES.index(i) for i in select_op] tmp = logits[idx, selected_arch_index] prob = F.softmax(tmp, dim=-1) prob = prob + 1e-5 log_prob = torch.log(prob) entropy += -(log_prob * prob).sum() act = prob.multinomial(num_samples=1) pruner_op_name = select_op[act] f_op = COMPACT_PRIMITIVES.index(pruner_op_name) arch_list.append((f_op, f, t)) selected_log_p = log_prob.gather(-1, act) log_p += selected_log_p.sum() return arch_list, log_p, entropy
def get_optimizer(strategy, lr, lr_schedule, dataset, batch_size): strategy_name = strategy.split("/")[0] if strategy_name == "oneshot": print("using oneshot strategy") lr = get_lr(lr, dataset, batch_size, schedule_mode=lr_schedule) return [ # this parameter from https://github.com/alexalemi/vib_demo/blob/master/MNISTVIB.ipynb tf.keras.optimizers.Adam(lr, 0.5) ], strategy, {} elif strategy == "algo1": slugs = strategy.split("/") print(f"using {slugs[0]} strategy with {slugs[1]}") opt_params = utils.parse_arch(slugs[1]) if lr_schedule != "constant": raise ValueError( f"{strategy_name} is not yet supported lr_schedule") lr = get_lr(lr, dataset, batch_size, schedule_mode=lr_schedule) # one for encoder and decoder return (tf.keras.optimizers.Adam(lr, 0.5), tf.keras.optimizers.Adam(lr, 0.5)), slugs[0], opt_params elif strategy_name == "algo2": slugs = strategy.split("/") print(f"using {slugs[0]} strategy with {slugs[1]}") opt_params = utils.parse_arch(slugs[1]) # one for encoder and decoder return ( tf.keras.optimizers.Adam( get_lr(lr, dataset, batch_size, schedule_mode=lr_schedule), 0.5), tf.keras.optimizers.Adam( get_lr(lr, dataset, batch_size, schedule_mode=lr_schedule, step_factor=opt_params["k"]), 0.5), ), slugs[0], opt_params
def forward(self, arch): # initial the first two nodes op0_list = [] op1_list = [] for idx, (op, f, t) in enumerate(arch): if idx % 2 == 0: op0_list.append(op) else: op1_list.append(op) assert len(op0_list) == len( op1_list), 'inconsistent size between op0_list and op1_list' node_list = utils.get_variable(list(range(0, 2, 1)), self.device, requires_grad=False) op0_list = utils.get_variable(op0_list, self.device, requires_grad=False) op1_list = utils.get_variable(op1_list, self.device, requires_grad=False) # first two nodes x_node_hidden = self.node_hidden(node_list) x_op0_hidden = self.op_hidden(op0_list) x_op1_hidden = self.op_hidden(op1_list) ''' node0 node1 op0, op1 ''' x_op_hidden = torch.cat((x_op0_hidden, x_op1_hidden), dim=1) x_hidden = torch.cat((x_node_hidden, x_op_hidden), dim=0) # initialize x and adj x = self.emb_attn(x_hidden) adj = utils.parse_arch(arch, self.steps + 2).to(self.device) # normalize features and adj if self.normalize: x = utils.sum_normalize(x) adj = utils.sum_normalize(adj) x = F.relu(self.gc1(x, adj)) x = F.dropout(x, self.dropout, training=self.training) x = self.gc2(x, adj) x = x[2:] logits = self.fc(x) logits = logits.view(self.steps * 2, -1) probs = F.softmax(logits, dim=-1) probs = probs + 1e-5 log_probs = torch.log(probs) action = probs.multinomial(num_samples=1) selected_log_p = log_probs.gather(-1, action) log_p = selected_log_p.sum() entropy = -(log_probs * probs).sum() arch = utils.translate_arch(arch, action, self.op_type) return arch, log_p, entropy
def load_model(path): with open(f"{path}/summary.yml", "r") as fh: summary = yaml.safe_load(fh) model_name, model_config = summary['model'].split("/") model_config = utils.parse_arch(model_config) input_shape = datasets.input_dims[summary["dataset"]] model_cls = get_network(model_name) model = model_cls(model_config, input_shape, datasets.num_classes[summary["dataset"]], summary["cov_type"], summary['beta'], summary['M']) model.load_weights(f"{path}/model") return model, summary
torch.cuda.manual_seed_all(args.seed) corpus = data.Corpus(args.data) eval_batch_size = 10 test_batch_size = 1 train_data = batchify(corpus.train, args.batch_size, args) val_data = batchify(corpus.valid, eval_batch_size, args) test_data = batchify(corpus.test, test_batch_size, args) ntokens = len(corpus.dictionary) try: genotype = eval("genotypes.%s" % args.arch) except: genotype = parse_arch(args.arch) if os.path.exists(os.path.join(args.save, 'model.pt')): print("Found model.pt in {}, automatically continue training.".format(args.save)) args.continue_train = True if args.continue_train: model = torch.load(os.path.join(args.save, 'model.pt')) else: model = model.RNNModel(ntokens, args.emsize, args.nhid, args.nhidlast, args.dropout, args.dropouth, args.dropoutx, args.dropouti, args.dropoute, cell_cls=model.DARTSCell, genotype=genotype) if args.cuda: if args.single_gpu: parallel_model = model.cuda()
cudnn.benchmark = True cudnn.enabled = True torch.cuda.manual_seed_all(args.seed) corpus = data.Corpus(args.data) eval_batch_size = 10 test_batch_size = 1 train_data = batchify(corpus.train, args.batch_size, args.cuda) val_data = batchify(corpus.valid, eval_batch_size, args.cuda) test_data = batchify(corpus.test, test_batch_size, args.cuda) ntokens = len(corpus.dictionary) assert args.arch args.arch = parse_arch(args.arch) if os.path.exists(os.path.join(args.save, 'model.pt')): print("Found model.pt in {}, automatically continue training.".format( args.save)) args.continue_train = True if args.continue_train: model = torch.load(os.path.join(args.save, 'model.pt')) else: model = model.RNNModel(ntokens, args.emsize, args.nhid, args.nhidlast, args.dropout, args.dropouth,
def train(model, dataset, data_augmentation, epochs, batch_size, beta, M, initial_lr, lr_schedule, strategy, output_dir, class_loss, cov_type): model_conf = model train_set, test_set, small_set = datasets.get_dataset(dataset) TRAIN_BUF, TEST_BUF = datasets.dataset_size[dataset] if data_augmentation: base_dataset = dataset.split("-")[0] print(f"Using image generator params from {base_dataset}") with open(f"./datasets/image-generator-config/{base_dataset}.yml", "r") as fh: params = yaml.safe_load(fh) print(params) train_dataset = tf.keras.preprocessing.image.ImageDataGenerator( **params) train_dataset.fit(train_set[0]) else: train_dataset = tf.data.Dataset.from_tensor_slices(train_set) \ .shuffle(TRAIN_BUF).batch(batch_size) test_dataset = tf.data.Dataset.from_tensor_slices(test_set) \ .shuffle(TEST_BUF).batch(batch_size) print( f"Training with {model} on {dataset} for {epochs} epochs (lr={initial_lr}, schedule={lr_schedule})" ) print( f"Params: batch-size={batch_size} beta={beta} M={M} lr={initial_lr} strategy={strategy}" ) optimizers, strategy_name, opt_params = losses.get_optimizer( strategy, lr, lr_schedule, dataset, batch_size) network_name, architecture = model.split("/") experiment_name = utils.get_experiment_name( f"{network_name}-{class_loss}-{cov_type}-{dataset}") print(f"Experiment name: {experiment_name}") artifact_dir = f"{output_dir}/{experiment_name}" print(f"Artifact directory: {artifact_dir}") train_log_dir = f"{artifact_dir}/logs/train" test_log_dir = f"{artifact_dir}/logs/test" train_summary_writer = tf.summary.create_file_writer(train_log_dir) test_summary_writer = tf.summary.create_file_writer(test_log_dir) # Instantiate model architecture = utils.parse_arch(architecture) model = nets.get_network(network_name)(architecture, datasets.input_dims[dataset], datasets.num_classes[dataset], cov_type, beta=beta, M=M) model.build(input_shape=(batch_size, *datasets.input_dims[dataset])) model.summary() print(f"Class loss: {class_loss}") model.class_loss = getattr(losses, f"compute_{class_loss}_class_loss") lr_labels = list(map(lambda x: f"lr_{x}", range(len(optimizers)))) train_step = train_algo2 if strategy.split( "/")[0] == "algo2" else train_algo1 print("Using trainstep: ", train_step) train_start_time = time.time() steps_per_epoch = int(np.ceil(train_set[0].shape[0] / batch_size)) for epoch in range(1, epochs + 1): start_time = time.time() print(f"Epoch {epoch}") m, am = train_step( model, optimizers, train_dataset.flow( train_set[0], train_set[1], batch_size=batch_size) if data_augmentation else train_dataset, train_summary_writer, M, lr_labels, strategy_name, opt_params, epoch, steps_per_epoch) m = m.result().numpy() am = am.result().numpy() print(utils.format_metrics("Train", m, am)) tfutils.log_metrics(train_summary_writer, metric_labels, m, epoch) tfutils.log_metrics(train_summary_writer, acc_labels, am, epoch) tfutils.log_metrics( train_summary_writer, lr_labels, map(lambda opt: opt._decayed_lr(tf.float32), optimizers), epoch) train_metrics = m.astype(float).tolist() + am.astype(float).tolist() end_time = time.time() test_metrics = evaluate(model, test_dataset, test_summary_writer, M, epoch) print(f"--- Time elapse for current epoch {end_time - start_time}") train_end_time = time.time() elapsed_time = (train_end_time - train_start_time) / 60. test_metrics_dict = dict(zip(metric_labels + acc_labels, test_metrics)) summary = dict( dataset=dataset, model=model_conf, strategy=strategy, beta=beta, epoch=epoch, M=M, lr=initial_lr, lr_schedule=lr_schedule, metrics=dict( train=dict(zip(metric_labels + acc_labels, train_metrics)), test=test_metrics_dict, ), class_loss=class_loss, cov_type=cov_type, batch_size=batch_size, elapsed_time=elapsed_time, # in minutes test_accuracy_L12=test_metrics_dict["accuracy_L12"], data_augmentation=data_augmentation) if model.latent_dim == 2: plot_helper.plot_2d_representation( model, small_set, title="Epoch=%d Strategy=%s Beta=%f M=%f" % (epoch, strategy, beta, M), path=f"{artifact_dir}/latent-representation.png") with train_summary_writer.as_default(): tf.summary.text("setting", json.dumps(summary, sort_keys=True, indent=4), step=0) with open(f"{artifact_dir}/summary.yml", 'w') as f: print(summary) yaml.dump(summary, f, default_flow_style=False) model.save_weights(f"{artifact_dir}/model") print(f"Training took {elapsed_time:.4f} minutes") print(f"Please see artifact at: {artifact_dir}")