by = bys[0] bx = bx.astype(np.float32) / 255. bx = np.expand_dims(bx, axis=1) by = np.squeeze(by.astype(np.int64)) classes = sorted(list(set(by.tolist()))) for i, c in enumerate(classes): by[by==c] = i inp_x = bx[:ways] sup_x = bx[ways:] inp_y = by[:ways] sup_y = by[ways:] bxs = [inp_x, sup_x, sup_y] bys = inp_y return (bxs, bys) train_file = f'{root}/omniglot_bg.h5' dstr = dl.DataReader(train_file, num_workers=5, transform_func=trans) gntr = dstr.few_shot_reader(batch_size, shots+1, ways) test_file = f'{root}/omniglot_eval.h5' dste = dl.DataReader(test_file, num_workers=5, transform_func=trans) gnte = dste.few_shot_seq_reader(batch_size * 2, shots=shots+1, selected_classes=[0,1,2,3,4]) gnte1 = dste.few_shot_seq_reader(batch_size * 2, shots=shots+1, selected_classes=[5,6,7,8,9]) listeners = [dl.Listener('test', gnte, [acc]), dl.Listener('test1', gnte1, [acc])] def loss_func(y_, y): return nn.CrossEntropyLoss()(y_.transpose(-2, -1), y) dlmodel = dl.DlModel(network, ckpt)
drop_prob = 0.1 def input_trans(bxs, bys): bx, = bxs by, = bys bx = bx.astype(np.int64) by = by.astype(np.int64) return ((bx, by[:-1]), by[1:]) file_root = '/data/examples/pt2en' tokenizer = torch.load(f'{file_root}/tokenizer.pt') enc_vocab_size = len(tokenizer[0][1]) + 2 dec_vocab_size = len(tokenizer[1][1]) + 2 dstr = dl.DataReader(f'{file_root}/pt2en_tr.h5', transform_func=input_trans, num_workers=5) gntr = dstr.common_reader(batch_size) dstv = dl.DataReader(f'{file_root}/pt2en_tv.h5', transform_func=input_trans, num_workers=5) gntv = dstv.common_reader(batch_size * 3, shuffle=False) network = Transformer(num_layers, num_heads, d_model, dff, enc_vocab_size, dec_vocab_size).cuda(0) optimizer = torch.optim.Adam(network.parameters()) class CustomSchedule(torch.optim.lr_scheduler._LRScheduler): def __init__(self, optimizer, d_model, warmup_steps=4000, last_epoch=-1): self.d_model = d_model self.warmup_steps = warmup_steps super(CustomSchedule, self).__init__(optimizer, last_epoch)
bx = bx.astype(np.float32).transpose([2, 0, 1]) by = by.astype(np.int64).squeeze() return ((bx, ), by) num_rounds = 5 num_ops = 11 stem = nn.Sequential(ConvOp(3, 64, kernel_size=1), nn.ReLU()) arch = ArchBuilder(stem, 10, 64, [2, 2, 2], num_rounds=num_rounds).cuda(0) optimizer = torch.optim.Adam(arch.parameters()) loss_func = nn.CrossEntropyLoss() ckpt = dl.Checkpoint('temp/evo/e1', max_to_keep=10, device=0) acc = dl.MetricAccuracy(device=0, name='acc') batch_size = 32 ds = dl.DataReader('/data/testdata/cifar10/cifar10_test.h5', transform_func=input_trans) gntr = ds.common_cls_reader(batch_size, selected_classes=['tr']) gnte = ds.common_cls_reader(batch_size * 3, selected_classes=['te'], shuffle=False) listeners = [EvoListener('test', gnte, [acc])] emodel = EvoModel(arch, ckpt, num_ops, num_rounds, device=0) warmup_num_epochs = 10 emodel.warm_up(gntr, loss_func, optimizer, num_epochs=warmup_num_epochs, metrics=[acc], listeners=listeners, from_scratch=True)
stem_layer = CatLayer(1, 8, kernel_size=3) output_layer = nn.Sequential(nn.Flatten(), nn.Linear(49, 10)) builder = ArchBuilder(stem_layer, lc, output_layer).cuda(0) def tran_func(bxs, bys): bx = bxs[0] by = bys[0] bx = bx.astype(np.float32) / 255. bx = np.expand_dims(bx, axis=0) by = np.squeeze(by.astype(np.int64)) return (bx, ), by ds = dl.DataReader('../data/mnist/mnist.h5', transform_func=tran_func, num_workers=0) gntr = ds.common_cls_reader(32, selected_classes=['train'], shuffle=True) gntv = ds.common_cls_reader(32, selected_classes=['valid'], shuffle=False) gnte = ds.common_cls_reader(32, selected_classes=['test'], shuffle=False) optimizer = torch.optim.Adam(builder.parameters()) loss_func = nn.CrossEntropyLoss() ckpt = dl.Checkpoint('temp/enas_t1/builder', max_to_keep=10, device=0, save_best_only=True, saving_metric='test_acc') acc = dl.MetricAccuracy(device=0, name='acc') total_steps = 20000