def __post_init__(self):
     self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
     self.model = ConvNet(self.input_shape, self.num_actions,
                          self.lr).to(self.device)
     self.tgt_model = ConvNet(self.input_shape, self.num_actions,
                              self.lr).to(self.device)
     self.model_update_count = 0
     self.current_loss = 0
Beispiel #2
0
def get_model(args):
    ''' define model '''
    model = ConvNet(use_batch_norm=True, use_resnet=False)

    print('---Model Information---')
    print('Net:', model)
    print('Use GPU:', args.use_cuda)
    return model.to(args.device)
class ModelsHandler:
    input_shape: tuple
    num_actions: int
    lr: float = field(default=0.001)

    def __post_init__(self):
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.model = ConvNet(self.input_shape, self.num_actions,
                             self.lr).to(self.device)
        self.tgt_model = ConvNet(self.input_shape, self.num_actions,
                                 self.lr).to(self.device)
        self.model_update_count = 0
        self.current_loss = 0

    def train_step(self, rb: ReplayBuffer, sample_size=300):
        # loss calcualation
        trans_sts = rb.sample(sample_size)
        states = torch.stack([trans.state_tensor
                              for trans in trans_sts]).to(self.device)
        next_states = torch.stack(
            [trans.next_state_tensor for trans in trans_sts]).to(self.device)
        not_done = torch.Tensor([trans.not_done_tensor
                                 for trans in trans_sts]).to(self.device)
        actions = [trans.action for trans in trans_sts]
        rewards = torch.stack([trans.reward_tensor
                               for trans in trans_sts]).to(self.device)

        with torch.no_grad():
            qvals_predicted = self.tgt_model(next_states).max(-1)

        self.model.optimizer.zero_grad()
        qvals_current = self.model(states)
        one_hot_actions = torch.nn.functional.one_hot(
            torch.LongTensor(actions), self.num_actions).to(self.device)
        loss = ((rewards + (not_done * qvals_predicted.values) -
                 torch.sum(qvals_current * one_hot_actions, -1))**2).mean()
        loss.backward()
        self.model.optimizer.step()
        return loss.detach().item()

    def update_target_model(self):
        state_dict = deepcopy(self.model.state_dict())
        self.tgt_model.load_state_dict(state_dict)
        self.model_update_count += 1

    def save_target_model(self):
        file_name = f"{datetime.now().strftime('%H:%M:%S')}.pth"
        temp_dir = os.environ.get('TMPDIR', '/tmp')
        file_name = os.path.join(temp_dir, file_name)
        torch.save(self.model, file_name)
        wandb.save(file_name)
Beispiel #4
0
def example2():
    cm = ConfigManager('testset')
    imgs = DataLoader.get_images_objects(cm.get_dataset_path(),
                                         'processed_x.pt',
                                         'processed_y.pt',
                                         to_tensor=True)
    print(type(imgs))
    dm = DatasetsManager(cm, imgs)

    n_output = 2
    net = ConvNet(n_output)
    optimizer = optim.Adam(net.parameters(), lr=1e-3)
    loss_function = nn.MSELoss()

    EPOCHS = 10
    BATCH_SIZE = 128

    print('Start training')
    for epoch in range(EPOCHS):
        for k in tqdm(range(0, len(dm.train), BATCH_SIZE)):
            batch_x = torch.cat(dm.train.get_x(start=k, end=k + BATCH_SIZE),
                                dim=0)
            batch_y = torch.Tensor(dm.train.get_y(start=k, end=k + BATCH_SIZE))
            print(type(batch_x))
            net.zero_grad()

            out = net(batch_x)
            loss = loss_function(out, batch_y)
            loss.backward()
            optimizer.step()

        print(f'Epoch: {epoch}. Loss: {loss}')

    correct = 0
    total = 0

    # with torch.no_grad():
    #     for k in tqdm(range(len(x_test))):
    #         real_class = torch.argmax(y_test[k])
    #         net_out = net(x_test[k].view(-1, 1, IMG_SIZE, IMG_SIZE))[0]  # returns list
    #         predicted_class = torch.argmax(net_out)

    #         if predicted_class == real_class:
    #             correct += 1
    #         total += 1

    print('Accuracy: ', round(correct / total, 3))

    torch.save(net, 'data/cnn_cats_dogs_model.pt')
Beispiel #5
0
def main():
    results = []
    # Modification starts
    sess = tf.Session()
    # if we don't have the trained model, simply do:
    # Trainer(sess)
    # pass the session and the image to find_circle function
    checkpoint_path = 'checkpoints/dump-63'
    inputs = tf.placeholder(tf.float32, shape=(None, 200, 200, 1))
    outputs = tf.placeholder(tf.float32, shape=(None, 3))
    predictions = ConvNet(inputs, outputs, mode='predict')

    saved_variables = tf.global_variables()
    saver = tf.train.Saver(saved_variables)
    saver.restore(sess, checkpoint_path)
    # End of modification
    for idx in range(1000):
        print('Inference on image: ' + str(idx))
        params, img = noisy_circle(200, 50, 2)
        detected = find_circle(img, sess, inputs, outputs, predictions)
        results.append(iou(params, detected))
    results = np.array(results)
    print((results > 0.7).mean())

    sess.close()
def get_model(args):
    # TODO
    model_type = args.model
    if model_type.lower() == "ConvNet".lower():
        return ConvNet()
    else:
        NotImplementedError()
    pass
def main():
    parser = get_command_line_parser()
    args = parser.parse_args()
    torch.manual_seed(args.seed)
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    use_gpu = torch.cuda.is_available()

    if use_gpu:
        print("Currently using GPU: {}".format(args.gpu))
        torch.backends.cudnn.benchmark = True
        torch.cuda.manual_seed_all(args.seed)
    else:
        print("Currently using CPU")

    trainloader, testloader = get_mnist_data(train_batch_size=args.batch_size,
                                             workers=args.workers)

    print("Creating model: {}".format(args.model))
    feature_extractor = ConvNet(depth=6, input_channel=1)
    model = BaseLine(feature_extractor=feature_extractor,
                     num_base_class=10,
                     embed_size=2)

    if use_gpu:
        model = model.cuda()

    # optimizer_model = torch.optim.SGD(model.parameters(), lr=args.lr_model, weight_decay=5e-04, momentum=0.9)
    optimizer_model = torch.optim.Adam(model.parameters(), lr=args.lr_model)

    if args.stepsize > 0:
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer_model,
                                                    step_size=args.stepsize,
                                                    gamma=args.gamma)

    start_time = time.time()

    for epoch in range(args.max_epoch):
        print("==> Epoch {}/{}".format(epoch + 1, args.max_epoch))
        train(model, optimizer_model, trainloader, use_gpu, 10, epoch, args)

        if args.stepsize > 0:
            scheduler.step()

        if args.eval_freq > 0 and (epoch + 1) % args.eval_freq == 0 or (
                epoch + 1) == args.max_epoch:
            print("==> Test")
            acc, err = evaluate(model,
                                testloader,
                                use_gpu,
                                10,
                                epoch,
                                args=args)
            print("Accuracy (%): {}\t Error rate (%): {}".format(acc, err))

    elapsed = round(time.time() - start_time)
    elapsed = str(datetime.timedelta(seconds=elapsed))
    print("Finished. Total elapsed time (h:m:s): {}".format(elapsed))
def initialise():
    game_controller = GameController(game_cfg.start_bbox, game_cfg.end_bbox,
                                     game_cfg.start_thres)
    player_controller = PlayerController(general_cfg.app)
    rl_recorder = RlRecorder()
    # TODO, read replay from disk
    player_controller.activate_chrome()  # switch to chrome
    timer = Timer(game_cfg.space_time_gap)
    performances = {'iter': [], 'score': []}
    if cnn_cfg.load_model and os.path.isfile(cnn_cfg.chkpnt_path):
        cnn = torch.load(cnn_cfg.chkpnt_path)
        cnn.cnn_cfg = cnn_cfg
        print("Load cnn model from ", cnn_cfg.chkpnt_path)
    else:
        cnn = ConvNet(cnn_cfg, num_classes=cnn_cfg.num_classes, lr=cnn_cfg.lr)
        print("Create new CNN done!")
    if torch.cuda.is_available():
        cnn = cnn.cuda()
        print("Cuda is available!")
    return game_controller, player_controller, rl_recorder, timer, performances, cnn
Beispiel #9
0
def demo_main(char_set, weight, name):

    _, valid_transform = get_transform()
    demo_data = DemoDataset('cleaned_data', name, valid_transform)

    test_loader = DataLoader(
        dataset=demo_data,
        batch_size=3,
        shuffle=False,
        num_workers=1,
        pin_memory=True,
    )

    model = ConvNet(1, len(char_set))

    if torch.cuda.is_available():
        model = model.cuda()

    print('load weights from {}'.format(weight))
    model.load_state_dict(torch.load(weight))
    model.eval()

    def map_indexlist_char(ind_list, char_set):
        return ''.join([char_set[i] for i in ind_list])

    with torch.no_grad():
        for batch_idx, (x, imgpath) in enumerate(test_loader):
            if batch_idx > 0:
                break
            x = x.cuda()
            out = model(x)
            _, pred_label = torch.max(out, 1)
            pred_name = map_indexlist_char(pred_label.tolist(), char_set)

    print('name {} pred name {}'.format(name, pred_name))

    def get_concat(im1, im2):
        dst = Image.new('RGB', (im1.width + im2.width, im1.height))
        dst.paste(im1, (0, 0))
        dst.paste(im2, (im1.width, 0))
        return dst

    concat_im = None
    for img in demo_data.images():
        im = Image.open(img)
        if concat_im is None:
            concat_im = im
        else:
            concat_im = get_concat(concat_im, im)
    #concat_im.show()
    concat_im.save('demo.jpg')
Beispiel #10
0
def get_model(args):
    ''' define model '''
    model = None
    if args.fc:
            model = FCNet()
    else:
            model = ConvNet()
    if args.cuda:
            model.cuda()
            
    print('\n---Model Information---')
    print('Net:',model)
    print('Use GPU:', args.cuda)
    
    return model
def get_model(args):
    ''' define model '''
    model = None
    if args.model == 'Net':
        model = Net()
    elif args.model == 'FCNet':
        model = FCNet()
    elif args.model == 'ConvNet':
        model = ConvNet()
    else:
        raise ValueError('The model is not defined!!')

    print('---Model Information---')
    print('Net:', model)
    print('Use GPU:', args.use_cuda)
    return model.to(args.device)
Beispiel #12
0
def train():
    g = ConvNet(is_training=True)

    # Start train
    with tf.Session(graph=g.graph) as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver(tf.global_variables())

        if tf.train.latest_checkpoint('checkpoint'):
            saver.restore(sess, tf.train.latest_checkpoint('checkpoint'))
            print("Loaded parameter from {}".format(
                tf.train.latest_checkpoint('checkpoint')))

        n_batches = g.mnist.train.num_examples // g.batch_size
        print("Start to train")
        for i in range(10):
            total_loss = 0

            for j in range(n_batches):
                X_batch, Y_batch = g.mnist.train.next_batch(g.batch_size)

                X_batch = np.reshape(X_batch, [-1, 28, 28, 1])

                _, loss_batch = sess.run([g.optimizer, g.loss], {
                    g.X: X_batch,
                    g.Y: Y_batch
                })
                total_loss += loss_batch
            print('Epoch {}: {}'.format(i + 1, total_loss / n_batches))

        X_test = np.reshape(g.mnist.test.images, [-1, 28, 28, 1])
        print(
            'Accuracy:',
            sess.run(g.accuracy,
                     feed_dict={
                         g.X: X_test,
                         g.Y: g.mnist.test.labels
                     }))
        saver.save(sess, 'checkpoint/model')
Beispiel #13
0
def train(opt, split):
    model = ConvNet(opt.nClasses, GAP=opt.GAP)
    optimizer = chainer.optimizers.NesterovAG(lr=opt.LR, momentum=opt.momentum)
    trainer = Trainer(model, optimizer, train_iter, val_iter, opt)
    log = {'train_acc': [], 'val_acc': [], 'lr': [], 'train_loss': []}

    if opt.testOnly:
        chainer.serializers.load_npz(
            os.path.join(opt.save, 'model_split{}.npz'.format(split)), trainer.model)
        val_top1 = trainer.val()
        print('| Val: top1 {:.2f}'.format(val_top1))        
        return

    for epoch in range(1, opt.nEpochs + 1):
        train_loss, train_top1 = trainer.train(epoch)
        val_top1 = trainer.val()
        sys.stderr.write('\r\033[K')
        sys.stdout.write(
            '| Epoch: {}/{} | Train: LR {}  Loss {:.3f}  top1 {:.2f} | Val: top1 {:.2f}\n'.format(
                epoch, opt.nEpochs, trainer.optimizer.lr, train_loss, train_top1, val_top1))
        sys.stdout.flush()
        log['lr'].append(trainer.optimizer.lr)
        log['train_loss'].append(train_loss)
        log['train_acc'].append(train_top1)
        log['val_acc'].append(val_top1)


    if opt.save != 'None':
        # Save weights
        chainer.serializers.save_npz(
            os.path.join(opt.save, 'model_split{}.npz'.format(split)), model)
        # Save logs
        with open(os.path.join(opt.save, 'logger{}.txt'.format(split)), "w") as f:
            for k, v in log.items():
                f.write(str(k) + ': ' + str(v) + '\n')
        # Save parameters
        with open(os.path.join(opt.save, 'opt{}.pkl'.format(split)), "wb") as f:
            pickle.dump(opt, f)
def run_exp(args, update_lambda, fix_weight):

    if args.predata is False:
        X_elementary, Y_elementary, X_hyper, Y_hyper, X_valid, Y_valid, X_test, Y_test = read_preprocess(params=args)
        np.savez(args.processedDataName, X_elementary=X_elementary, Y_elementary=Y_elementary, X_hyper=X_hyper,
             Y_hyper=Y_hyper, X_v=X_valid, Y_v=Y_valid, X_test=X_test, Y_test=Y_test)
    else:
        tmpload = np.load(args.processedDataName)
        X_elementary, Y_elementary, X_hyper, Y_hyper, X_valid, Y_valid, X_test, Y_test = \
            tmpload['X_elementary'], tmpload['Y_elementary'], tmpload['X_hyper'], tmpload['Y_hyper'],\
            tmpload['X_v'], tmpload['Y_v'], tmpload['X_test'], tmpload['Y_test']

    """
        Build Theano functions

    """

    if args.model == 'convnet':
        x = T.ftensor4('x')
    elif args.model == 'mlp':
        x = T.matrix('x')
    else:
        raise AttributeError
    y = T.matrix('y')
    lr_ele = T.fscalar('lr_ele')

    lr_ele_true = np.array(args.lrEle, theano.config.floatX)
    mom = 0.95 # Michael: momentum
    lr_hyper = T.fscalar('lr_hyper')
    grad_valid_weight = T.tensor4('grad_valid_weight')

    if args.model == 'mlp':
        model = MLP(x=x, y=y, args=args)
    elif args.model == 'convnet':
        model = ConvNet(x=x, y=y, args=args) #Michael: check here for model.params_theta

        if args.dataset == 'mnist':
            nc = 1
            nPlane = 28
        else:
            nc = 3
            nPlane = 32
        X_elementary = X_elementary.reshape(-1, nc, nPlane, nPlane)
        X_hyper = X_hyper.reshape(-1, nc, nPlane, nPlane)
        X_valid = X_valid.reshape(-1, nc, nPlane, nPlane)
        X_test = X_test.reshape(-1, nc, nPlane, nPlane)
    else:
        raise AttributeError
    
    # Michael: this computes the updated parameters
    # Michael: these aren't the new parameters themselves, but the update functions
    update_ele, update_valid, output_valid_list, share_var_dloss_dweight = update(model.params_theta, model.params_lambda, model.params_weight,
                                      model.loss, model.penalty, model.lossWithPenalty,
                                      lr_ele, lr_hyper, mom)

    if update_lambda:
        for up, origin in zip(update_lambda, model.params_lambda):
            origin.set_value(np.array(up))
            boo = origin.get_value()
            # print 'update', type(up), type(boo), boo[1]
            # TIME.sleep(20)

    if fix_weight:
        for fix, origin in zip(fix_weight, model.params_weight):
            origin.set_value(np.array(fix))
    else:
        fix_weight = []
        for origin in model.params_weight:
            fix_weight.append(origin.get_value())

    # Phase 1
    # Michael: ???
    func_elementary = theano.function(
        inputs=[x, y, lr_ele],
        outputs=[model.lossWithPenalty, model.prediction],
        updates=update_ele, #Michael: update_ele is the updating function, not the new parameters
        on_unused_input='ignore',
        allow_input_downcast=True)

    func_eval = theano.function(
        inputs=[x, y],
        outputs=[model.loss, model.prediction],
        on_unused_input='ignore',
        allow_input_downcast=True)

    # Phase 2
    # actually, in the backward phase
    func_hyper_valid = theano.function(
        inputs=[x, y],
        outputs=[model.loss, model.prediction] + output_valid_list,
        updates=update_valid,
        on_unused_input='ignore',
        allow_input_downcast=True)


    """
         Phase 1: meta-forward

    """
    X_mix = np.concatenate((X_valid, X_test), axis=0)
    Y_mix = np.concatenate((Y_valid, Y_test), axis=0)
    print(X_valid.shape, X_mix.shape)
    X_valid, Y_valid = X_mix[:len(X_mix) // 2], Y_mix[:len(X_mix) // 2]
    X_test, Y_test = X_mix[len(X_mix) // 2:], Y_mix[len(X_mix) // 2:]
    n_ele, n_valid, n_test = X_elementary.shape[0], X_valid.shape[0], X_test.shape[0]
    # TODO: remove this override
    n_ele = 20000
    X_elementary, Y_elementary = X_elementary[:n_ele], Y_elementary[:n_ele]

    print("# of ele, valid, test: ", n_ele, n_valid, n_test)
    n_batch_ele = n_ele // args.batchSizeEle
    test_perm, ele_perm = range(0, n_test), range(0, n_ele)
    last_iter = args.maxEpoch * n_batch_ele - 1
    temp_err_ele = []
    temp_cost_ele = []
    eval_loss = 0.
    t_start = time()

    iter_index_cache = []

    # save the model parameters into theta_initial
    theta_initial = []
    for i, w in enumerate(model.params_theta): # Michael: doesn't actually go through parameters, only [W, b, W, b, W, b, W, b]
        theta_initial.append(w.get_value())
    
    
    """
    # Michael: pick two random parameters, construct two lists to store the paths 
    # Michael: "i, w in enumerate(model.params_theta)", but random?
    # i is the layer (list index), w is the weight
    # model.params_theta = [W, b, W, b, W, b, W, b]
    # W's, b's are type theano.tensor.sharedvar.TensorSharedVariable
    # model.params_theta[0].get_value()[0][0][0][0] gives a weight, possibly repeated/shared?
    # model.params_theta[i1].get_value()[i2][i3][i4][i5]
    
    # Get coordinates of first weight
    coords1 = [np.random.randint(0, len(model.params_theta))] #[len(model.params_theta)-2] #[0] #[np.random.randint(0, len(model.params_theta))]
    layer_value = model.params_theta[coords1[0]].get_value()
    while not isinstance(layer_value, (int, float, np.float32, np.float64)): #while we haven't gotten to a weight
        coords1.append(np.random.randint(0, len(layer_value)))
        layer_value = layer_value[coords1[-1]]
    # Access and create list initialized with first value
    layer_value = model.params_theta[coords1[0]].get_value()
    for l in range(1, len(coords1)):
        layer_value = layer_value[coords1[l]]
    w_1 = [layer_value]
    
    #for l in range(1, len(coords1)-1):
    #    layer_value = layer_value[coords1[l]]
    #layer_value[coords1[-1]] = 1.0
    #w_1 = [1.0]
    
    
    # Get coordinates of second weight
    coords2 = [np.random.randint(0, len(model.params_theta))] #[len(model.params_theta)-2] #[0] #
    layer_value = model.params_theta[coords2[0]].get_value()
    while not isinstance(layer_value, (int, float, np.float32, np.float64)): #while we haven't gotten to a weight
        coords2.append(np.random.randint(0, len(layer_value)))
        layer_value = layer_value[coords2[-1]]
    # Access and create list initialized with first value
    layer_value = model.params_theta[coords2[0]].get_value()
    for l in range(1, len(coords2)):
        layer_value = layer_value[coords2[l]]
    w_2 = [layer_value]"""

    #for l in range(1, len(coords2)-1):
    #    layer_value = layer_value[coords2[l]]
    #layer_value[coords2[-1]] = 1.30
    #w_2 = [1.30]


    for i in range(0, args.maxEpoch * n_batch_ele): # Michael: SGD steps
        curr_epoch = i // n_batch_ele
        curr_batch = i % n_batch_ele
        
        

        """
            Learning rate and momentum schedules.

        """
        t = 1. * i // (args.maxEpoch * n_batch_ele) #Michael: never used?

        """
            Update

        """
        sample_idx_ele = ele_perm[(curr_batch * args.batchSizeEle):((curr_batch + 1) * args.batchSizeEle)] #Michael: batch indices
        iter_index_cache.append(sample_idx_ele)
        batch_x, batch_y = X_elementary[sample_idx_ele], Y_elementary[sample_idx_ele] #Michael: batch data
        if i == 399:
            print("399!!!!!!!!!!!", batch_y) #Michael: ???
            # TODO: last elementary step before hyperparameter update?
        #Michael: what's this for?
        tmp_y = np.zeros((args.batchSizeEle, 10)) #Michael: 10 for 10 classes; put a 1 in row=idx and column=class=element of idx 
        for idx, element in enumerate(batch_y): #Michael: idx = index, element = element at that index
            tmp_y[idx][element] = 1
        batch_y = tmp_y
        
        # Michael: This where the elementary parameters are updated
        res = func_elementary(batch_x, batch_y, lr_ele_true)
        (cost_ele, pred_ele, debugs) = (res[0], res[1], res[2:])



        # Michael: add new parameters to lists
        """layer_value = model.params_theta[coords1[0]].get_value()
        for l in range(1, len(coords1)):
            layer_value = layer_value[coords1[l]]
        w_1.append(layer_value)
        layer_value = model.params_theta[coords2[0]].get_value()
        for l in range(1, len(coords2)):
            layer_value = layer_value[coords2[l]]
        w_2.append(layer_value)"""
        
        # Michael: plot them right away
        if i%20 == 0: #only every 20
            #plt.plot(w_1, w_2, marker='o', ms=3.)
            #plt.plot(w_1[0], w_2[0], marker='o')
            #plt.plot(w_1[len(w_1)-1], w_2[len(w_1)-1], marker='o', ms=10.)
            #plt.show()
            print(i)
        
        # print("Epoch %d, batch %d, time = %ds, train_loss = %.4f" %
        #       (curr_epoch, curr_batch, time() - t_start, cost_ele))

        # temp_err_ele += [1. * sum(batch_y != pred_ele) / args.batchSizeEle]
        temp_cost_ele += [cost_ele]
        eval_error = 0.

        # if np.isnan(cost_ele):
        #     print 'NANS', cost_ele


        """
            Evaluate

        """
        if args.verbose or (curr_batch == n_batch_ele - 1):

            if args.model == 'mlp':
                n_eval = n_test
            else:
                n_eval = 1000

            temp_idx = test_perm[:n_eval]
            batch_x, batch_y = X_test[temp_idx], Y_test[temp_idx]
            tmp_y = np.zeros((n_eval, 10))
            for idx, element in enumerate(batch_y):
                tmp_y[idx][element] = 1
            batch_y = tmp_y
            eval_loss, y_test = func_eval(batch_x, batch_y)

            wrong = 0
            for e1, e2 in zip(y_test, Y_test[temp_idx]):
                if e1 != e2:
                    wrong += 1
            # eval_error = 1. * sum(int(Y_test[temp_idx] != batch_y)) / n_eval
            eval_error = 100. * wrong / n_eval
            print("test sample", n_eval)
            print("Valid on Test Set: Epoch %d, batch %d, time = %ds, eval_loss = %.4f, eval_error = %.4f" %
                  (curr_epoch, curr_batch + 1, time() - t_start, eval_loss, eval_error))







    # save the model parameters after T1 into theta_final
    theta_final = []
    for i, w in enumerate(model.params_theta):
        theta_final.append(w.get_value())

    
    # Michael: plot paths
    #plt.plot(w_1, w_2, marker='o', ms=3.)
    #plt.plot(w_1[0], w_2[0], marker='o')
    #plt.plot(w_1[len(w_1)-1], w_2[len(w_1)-1], marker='o', ms=10.)
    #plt.show()
    
    # Michael: plot paths
    #plt.plot(range(0,len(w_2)), w_2, marker='o', ms=3.)
    #plt.plot(0, w_2[0], marker='o')
    #plt.plot(len(w_2)-1, w_2[len(w_1)-1], marker='o', ms=10.)
    #plt.show()





    """
        Phase 2: Validation on Hyper set

    """
    n_hyper = X_hyper.shape[0]
    n_batch_hyper = n_hyper // args.batchSizeHyper
    hyper_perm = range(0, n_hyper)
    # np.random.shuffle(hyper_perm)

    err_valid = 0.
    cost_valid = 0.
    t_start = time()
    grad_l_theta = []
    for i in range(0, n_batch_hyper):
        sample_idx = hyper_perm[(i * args.batchSizeHyper):((i + 1) * args.batchSizeHyper)]
        batch_x, batch_y = X_elementary[sample_idx], Y_elementary[sample_idx]
        # TODO: refactor, too slow
        tmp_y = np.zeros((args.batchSizeEle, 10))
        for idx, element in enumerate(batch_y):
            tmp_y[idx][element] = 1
        batch_y = tmp_y
        res = func_hyper_valid(batch_x, batch_y)
        valid_cost, pred_hyper, grad_temp = res[0], res[1], res[2:]
        err_tmp = 0.
        # err_tmp = 1. * sum(batch_y != pred_hyper) / args.batchSizeHyper
        err_valid += err_tmp
        # print "err_temp", err_tmp
        cost_valid += valid_cost

        # accumulate gradient and then take the average
        if i == 0:
            for grad in grad_temp:
                grad_l_theta.append(np.asarray(grad))
        else:
            for k, grad in enumerate(grad_temp):
                grad_l_theta[k] += grad

    err_valid /= n_batch_hyper
    cost_valid /= n_batch_hyper

    # get average grad of all iterations on validation set

    for i, grad in enumerate(grad_l_theta):
        print(grad.shape)
        grad_l_theta[i] = grad / (np.array(n_hyper * 1., dtype=theano.config.floatX))


    print("Valid on Hyper Set: time = %ds, valid_err = %.2f, valid_loss = %.4f" %
          (time() - t_start, err_valid * 100, cost_valid))

    """
        Phase 3: meta-backward

    """

    # updates for phase 3

    update_hyper, output_hyper_list, phase_3_input = updates_hyper(model.params_lambda, model.params_weight,
                                                    model.lossWithPenalty, grad_l_theta, output_valid_list)

    # Phase 3
    # dloss_dpenalty = T.grad(model.loss, model.params_lambda)
    func_hyper = theano.function(
        inputs=[x, y],
        outputs=output_hyper_list + output_valid_list,
        updates=update_hyper,
        on_unused_input='ignore',
        allow_input_downcast=True)

    # Michael: this is the backwards approximating path
    # init for pseudo params
    pseudo_params = []
    for i, v in enumerate(model.params_theta):
        pseudo_params.append(v.get_value())

    def replace_pseudo_params(ratio):
        for i, param in enumerate(model.params_theta):
            pseudo_params[i] = (1 - ratio) * theta_initial[i] + ratio * theta_final[i]
            param.set_value(pseudo_params[i])
            
    n_backward = len(iter_index_cache)//10
    print("n_backward", n_backward)

    rho = np.linspace(0.001, 0.999, n_backward)

    # initialization
    up_lambda, up_v = [], []
    for param in model.params_lambda:
        temp_param = np.zeros_like(param.get_value() * 0., dtype=theano.config.floatX)
        up_lambda += [temp_param]

    for param in model.params_weight:
        temp_v = np.zeros_like(param.get_value() * 0., dtype=theano.config.floatX)
        up_v += [temp_v]

    # time.sleep(20)
    up_theta = grad_l_theta

    iter_index_cache = iter_index_cache[:n_backward]

    for iteration in range(n_backward)[::-1]:
        # Michael: this is the backwards approximating path
        replace_pseudo_params(rho[iteration])         # line 4
        curr_epoch = iteration // n_batch_ele
        curr_batch = iteration % n_batch_ele
        if iteration % 40 == 0:
            print("Phase 3, ep{} iter{}, total{}".format(curr_epoch, curr_batch, iteration))
        sample_idx_ele = iter_index_cache[iteration]
        # sample_idx_ele = ele_perm[(curr_batch * args.batchSizeEle):((curr_batch + 1) * args.batchSizeEle)]
        batch_x, batch_y = X_elementary[sample_idx_ele], Y_elementary[sample_idx_ele]
        if curr_batch == 399:
            print("399!!!!!!!!!!!", batch_y)
        tmp_y = np.zeros((args.batchSizeEle, 10))
        for idx, element in enumerate(batch_y):
            tmp_y[idx][element] = 1
        batch_y = tmp_y

        if args.model == 'mlp':
            for p3, p1, input_p in zip(up_v, up_theta, phase_3_input):
                # print p3.shape, p1.shape
                p3 += lr_ele_true * p1
                input_p.set_value(p3)
                tmp = input_p.get_value()
                # print 'set up_v to obtain hypergrad', tmp[1][1]
                # TIME.sleep(2)
        else:
            for p3, p1, input_p in zip(up_v, up_theta, phase_3_input):
                p3 += lr_ele_true * p1
                input_p.set_value(p3)

        # hessian vector product
        HVP_value = func_hyper(batch_x, batch_y)
        HVP_weight_value = HVP_value[:4]
        HVP_lambda_value = HVP_value[4:8]
        debug_orz = HVP_value[8:]

        # return
        cnt = 0
        for p1, p2, p3, hvp1, hvp2 in zip(up_theta, up_lambda, up_v, HVP_weight_value, HVP_lambda_value):
            # this code is to monitor the up_lambda
            if cnt == 3:
                tmp2 = np.array(hvp2)
                tmp1 = np.array(hvp1)
                if iteration % 40 == 0:
                    print("up_lambda", p2[3][0])
            else:
                cnt += 1
            p1 -= (1. - mom) * np.array(hvp1)
            p2 -= (1. - mom) * np.array(hvp2)
            p3 *= mom

        # print up_lambda[2][0][0]

    return model.params_lambda, up_lambda, fix_weight, eval_loss, eval_error
Beispiel #15
0
 model = ConvNet(
     conv_params={
         'kernel': ((1, 16), (1, 8), (1, 8)),
         'num_filter': (
             16,
             32,
             64,
         ),
         'stride': (
             (1, 1),
             (1, 1),
             (1, 1),
         ),
         'padding': (
             (0, 0),
             (0, 0),
             (0, 0),
         ),
         'dilate': (
             (1, 1),
             (1, 1),
             (1, 1),
         )
     },
     act_params={'act_type': (
         'relu',
         'relu',
         'relu',
         'relu',
     )},
     pool_params={
         'pool_type': (
             'avg',
             'avg',
             'avg',
         ),
         'kernel': (
             (1, 16),
             (1, 16),
             (1, 16),
         ),
         'stride': (
             (1, 2),
             (1, 2),
             (1, 2),
         ),
         'padding': (
             (0, 0),
             (0, 0),
             (0, 0),
         ),
         'dilate': (
             (1, 1),
             (1, 1),
             (1, 1),
         )
     },
     fc_params={'hidden_dim': (64, )},
     drop_prob=0,
     #                         input_dim = (2,1,8192)
     input_dim=(1, 1, 8192))
def perform_experiments(n_runs=10,
                        n_points=1000,
                        n_epochs=200,
                        run_best=False,
                        verbose=False):
    """
    Perform experiments for 5 different neural network architectures and losses.
    
    To run all experiments call this function with default params
    
    :param n_runs: number of runs for which experiment should be repeated
    :param n_points: number of training and testing data points used in the experiments
    :param n_epochs: number of epochs every architecture should be trained on
    :param run_best: If True only the best architecture (Siamese Network with auxiliary loss) is trained
    :param verbose: If True, print training and validation loss every epoch
    :returns: dictionary containing history of training (training, validation loss and accuracy)
    """
    history_mlp_net = []
    history_conv_net = []
    history_conv_net_aux = []
    history_siamese = []
    history_siamese_aux = []

    for n_run in range(n_runs):
        data_set = generate_pair_sets(n_points)
        MAX_VAL = 255.0

        TRAIN_INPUT = Variable(data_set[0]) / MAX_VAL
        TRAIN_TARGET = Variable(data_set[1])
        TRAIN_CLASSES = Variable(data_set[2])

        TEST_INPUT = Variable(data_set[3]) / MAX_VAL
        TEST_TARGET = Variable(data_set[4])
        TEST_CLASSES = Variable(data_set[5])

        if not run_best:
            ##############################################################################
            # Creates Multilayer Perceptron Network with ReLU activationss
            mlp_net = MLPNet(in_features=392,
                             out_features=2,
                             n_layers=3,
                             n_hidden=16)

            # Set train flag on (for dropouts)
            mlp_net.train()

            # Train the model and append the history
            history_mlp_net.append(
                train_model(mlp_net,
                            train_input=TRAIN_INPUT.view((n_points, -1)),
                            train_target=TRAIN_TARGET,
                            val_input=TEST_INPUT.view((n_points, -1)),
                            val_target=TEST_TARGET,
                            n_epochs=n_epochs,
                            verbose=verbose))

            # Set train flag to False for getting accuracies on validation data
            mlp_net.eval()
            acc = get_accuracy(mlp_net, TEST_INPUT.view(
                (n_points, -1)), TEST_TARGET) * 100.0
            print("Run: {}, Mlp_net Test Accuracy: {:.3f} %".format(
                n_run, acc))

            ##############################################################################
            # Create ConvNet without auxiliary outputs
            conv_net = ConvNet(n_classes=2, n_layers=3, n_features=16)

            # Set train flag on (for dropouts)
            conv_net.train()

            # Train the model and append the history
            history_conv_net.append(
                train_model(conv_net,
                            train_input=TRAIN_INPUT,
                            train_target=TRAIN_TARGET,
                            val_input=TEST_INPUT,
                            val_target=TEST_TARGET,
                            n_epochs=n_epochs,
                            verbose=verbose))

            # Set train flag to False for getting accuracies on validation data
            conv_net.eval()
            acc = get_accuracy(conv_net, TEST_INPUT, TEST_TARGET) * 100.0
            print("Run: {}, ConvNet Test Accuracy: {:.3f} %".format(
                n_run, acc))

            ##############################################################################
            # Create ConvNet with auxiliary outputs
            conv_net_aux = ConvNet(n_classes=22, n_layers=3, n_features=16)

            # Set train flag on (for dropouts)
            conv_net_aux.train()

            # Train the model and append the history
            history_conv_net_aux.append(
                train_model(conv_net_aux,
                            train_input=TRAIN_INPUT,
                            train_target=TRAIN_TARGET,
                            aux_param=1.0,
                            train_classes=TRAIN_CLASSES,
                            val_input=TEST_INPUT,
                            val_target=TEST_TARGET,
                            val_classes=TEST_CLASSES,
                            n_epochs=n_epochs,
                            verbose=verbose))

            # Set train flag to False for getting accuracies on validation data
            conv_net_aux.eval()
            acc = get_accuracy(conv_net_aux, TEST_INPUT, TEST_TARGET) * 100.0
            print("Run: {}, ConvNet Auxilary Test Accuracy: {:.3f} %".format(
                n_run, acc))

            ##############################################################################
            # Create Siamese Network without auxiliary outputs
            conv_net = BlockConvNet()
            conv_net_siamese = DeepSiameseNet(conv_net)

            # Set train flag on (for dropouts)
            conv_net.train()
            conv_net_siamese.train()

            # Train the model and append the history
            history_siamese.append(
                train_model(conv_net_siamese,
                            train_input=TRAIN_INPUT,
                            train_target=TRAIN_TARGET,
                            val_input=TEST_INPUT,
                            val_target=TEST_TARGET,
                            n_epochs=n_epochs,
                            verbose=verbose))

            # Set train flag to False for getting accuracies on validation data
            conv_net.eval()
            conv_net_siamese.eval()

            acc = get_accuracy(conv_net_siamese, TEST_INPUT,
                               TEST_TARGET) * 100.0
            print("Run: {}, Siamese Test Accuracy: {:.3f} %".format(
                n_run, acc))

        ##############################################################################
        # Create Siamese Network with auxiliary outputs
        conv_net = BlockConvNet()
        conv_net_siamese_aux = DeepSiameseNet(conv_net)

        # Set train flag on (for dropouts)
        conv_net.train()
        conv_net_siamese_aux.train()

        # Train the model and append the history
        history_siamese_aux.append(
            train_model(conv_net_siamese_aux,
                        train_input=TRAIN_INPUT,
                        train_target=TRAIN_TARGET,
                        train_classes=TRAIN_CLASSES,
                        val_input=TEST_INPUT,
                        val_target=TEST_TARGET,
                        val_classes=TEST_CLASSES,
                        aux_param=3.0,
                        n_epochs=n_epochs,
                        verbose=verbose))

        # Set train flag to False for getting accuracies on validation data
        conv_net.eval()
        conv_net_siamese_aux.eval()

        acc = get_accuracy(conv_net_siamese_aux, TEST_INPUT,
                           TEST_TARGET) * 100.0
        print("Run: {}, Siamese Auxilary Test Accuracy: {:.3f} %".format(
            n_run, acc))
        ##############################################################################

        return {
            'history_mlp_net': history_mlp_net,
            'history_conv_net': history_conv_net,
            'history_conv_net_aux': history_conv_net_aux,
            'history_siamese': history_siamese,
            'history_siamese_aux': history_siamese_aux
        }
Beispiel #17
0
def baseline_fitness(state_dict,num_epochs=600):
    
    # Hyper Parameters
    param = {    
        'batch_size': 4, 
        'test_batch_size': 50,
        'num_epochs': num_epochs,
        'learning_rate': 0.001,
        'weight_decay': 5e-4,
    }
    
    num_cnn_layer =sum( [ int(len(v.size())==4) for d, v in state_dict.items() ] )        

    num_fc_layer = sum( [ int(len(v.size())==2) for d, v in state_dict.items() ] ) 
    
    state_key = [ k for k,v in state_dict.items()]
        
    cfg = []
    first = True
    for d, v in state_dict.items():
        #print(v.data.size())    
        if len(v.data.size()) == 4 or len(v.data.size()) ==2:
            if first:
                first = False
                cfg.append(v.data.size()[1]) 
            cfg.append(v.data.size()[0])
    

    assert num_cnn_layer + num_fc_layer == len(cfg) - 1
    
    net = ConvNet(cfg, num_cnn_layer)
         
#    masks = []

    for i, p in enumerate(net.parameters()):
        
        p.data = state_dict[ state_key[i] ]
        
        if len(p.data.size()) == 4:
            pass
            #p_np = p.data.cpu().numpy()
            
            #masks.append(np.ones(p_np.shape).astype('float32'))
                    
            #value_this_layer = np.abs(p_np).sum(axis=(2,3))        
                                    
#            for j in range(len(value_this_layer)):
#                
#                for k in range(len(value_this_layer[0])):
#                    
#                    if abs( value_this_layer[j][k] ) < 1e-4:
#                    
#                        masks[-1][j][k] = 0.
                        
        elif len(p.data.size()) == 2:
            pass
            #p_np = p.data.cpu().numpy()
            
            #masks.append(np.ones(p_np.shape).astype('float32'))
                    
            #value_this_layer = np.abs(p_np)   
                                    
#            for j in range(len(value_this_layer)):
#                
#                for k in range(len(value_this_layer[0])):
#                    
#                    if abs( value_this_layer[j][k] ) < 1e-4:
#                    
#                        masks[-1][j][k] = 0.                                        
                        
    #net.set_masks(masks)   
    
    
    ## Retraining    
    loader_train, loader_test = load_dataset()
    
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.RMSprop(net.parameters(), lr=param['learning_rate'], 
                                    weight_decay=param['weight_decay'])
    #if num_epochs > 0:
    #    test(net, loader_test)
    
    #train(net, criterion, optimizer, param, loader_train)
    
    test_acc_list = []
    
    for t in range(num_epochs ):
    
        param['num_epochs'] = 10
        train(net, criterion, optimizer, param, loader_train)
    
        #print("--- After training ---")
        
        test_acc_list.append(test(net, loader_test))
        
    plt.plot(test_acc_list)
    
    with open('baseline_result.csv','a',newline='') as csvfile:
        writer  = csv.writer(csvfile)
        for row in test_acc_list:
            writer.writerow([row])
def retrain(state_dict, part=1, num_epochs=5):

    # Hyper Parameters
    param = {
        'batch_size': 4,
        'test_batch_size': 50,
        'num_epochs': num_epochs,
        'learning_rate': 0.001,
        'weight_decay': 5e-4,
    }

    num_cnn_layer = sum(
        [int(len(v.size()) == 4) for d, v in state_dict.items()])

    num_fc_layer = sum(
        [int(len(v.size()) == 2) for d, v in state_dict.items()])

    state_key = [k for k, v in state_dict.items()]

    cfg = []
    first = True
    for d, v in state_dict.items():
        #print(v.data.size())
        if len(v.data.size()) == 4 or len(v.data.size()) == 2:
            if first:
                first = False
                cfg.append(v.data.size()[1])
            cfg.append(v.data.size()[0])

    assert num_cnn_layer + num_fc_layer == len(cfg) - 1

    net = ConvNet(cfg, num_cnn_layer, part)

    masks = []

    for i, p in enumerate(net.parameters()):

        p.data = state_dict[state_key[i]]

        if len(p.data.size()) == 4:

            p_np = p.data.cpu().numpy()

            masks.append(np.ones(p_np.shape).astype('float32'))

            value_this_layer = np.abs(p_np).sum(axis=(2, 3))

            for j in range(len(value_this_layer)):

                for k in range(len(value_this_layer[0])):

                    if abs(value_this_layer[j][k]) < 1e-4:

                        masks[-1][j][k] = 0.

        elif len(p.data.size()) == 2:

            p_np = p.data.cpu().numpy()

            masks.append(np.ones(p_np.shape).astype('float32'))

            value_this_layer = np.abs(p_np)

            for j in range(len(value_this_layer)):

                for k in range(len(value_this_layer[0])):

                    if abs(value_this_layer[j][k]) < 1e-4:

                        masks[-1][j][k] = 0.

    net.set_masks(masks)

    ## Retraining
    loader_train, loader_test = load_dataset()

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.RMSprop(net.parameters(),
                                    lr=param['learning_rate'],
                                    weight_decay=param['weight_decay'])
    #if num_epochs > 0:
    #    test(net, loader_test)

    train(net, criterion, optimizer, param, loader_train)

    for i, p in enumerate(net.parameters()):

        state_dict[state_key[i]] = p.data
        #print(p.data == state_dict[ state_key[i] ])

    #print("--- After retraining ---")
    #test(net, loader_test)

    #return net.state_dict()
    return state_dict
            else:
                batch_e_i = batch_s_i + batch_size

            # print("batch_s_i: ", batch_s_i)
            # print("batch_e_i: ", batch_e_i)

            x_batch = X[batch_s_i:batch_e_i]
            y_batch = Y[batch_s_i:batch_e_i]

            x_batch = np.concatenate(x_batch, axis=0)
            y_batch = np.concatenate(y_batch, axis=0)
            yield x_batch, y_batch


if __name__ == '__main__':
    from models import ConvNet

    game_index_now = 10
    replays_paths = 'replays'
    batch_size = 2
    epoch = 20
    cnn = ConvNet(num_classes=2, lr=1e-3)

    random_samples, step_size = load_replays(
        game_index_now,
        pos_sample_factor=1.0,
        max_N=None,
        valid_game_index_range=float('inf'))
    cnn_data_loader = data_loader(batch_size, random_samples, step_size)
    cnn.train_model(cnn_data_loader, epoch, step_size)
Beispiel #20
0
def Solver(train, test, Debug, batch_size, lr
          , smoothing_constant, num_fc1, num_fc2, num_outputs, epochs, SNR
          , sl, pool_type ,pool_size ,pool_stride, params_init=None, period=None):
    
    num_examples = train.shape[0]
    # 训练集数据类型转换
    y = nd.array(~train.sigma.isnull() +0)
    X = nd.array(Normolise(train.drop(['mass','positions','gaps','max_peak','sigma','SNR_mf','SNR_mf0'],axis=1)))
    print('Label for training:', y.shape)
    print('Dataset for training:', X.shape, end='\n\n')

    dataset_train = gluon.data.ArrayDataset(X, y)
    train_data = gluon.data.DataLoader(dataset_train, batch_size, shuffle=True, last_batch='keep')

    y = nd.array(~test.sigma.isnull() +0)
    X = nd.array(Normolise(test.drop(['mass','positions','gaps','max_peak','sigma','SNR_mf','SNR_mf0'],axis=1)))
    print('Label for testing:', y.shape)
    print('Dataset for testing:', X.shape, end='\n\n')
    
    # 这里使用data模块来读取数据。创建测试数据。  (suffle)
    dataset_test = gluon.data.ArrayDataset(X, y)
    test_data = gluon.data.DataLoader(dataset_test, batch_size, shuffle=True, last_batch='keep')

    
    # Train
    loss_history = []
    loss_v_history = []
    moving_loss_history = []
    test_accuracy_history = []
    train_accuracy_history = []
    
#     assert period >= batch_size and period % batch_size == 0
    
    # Initializate parameters
    if params_init:
        print('Loading params...')
        params = params_init

        [W1, b1, W2, b2, W3, b3, W4, b4, W5, b5, W6, b6] = params

        # random fc layers
        weight_scale = .01
        W7 = nd.random_normal(loc=0, scale=weight_scale, shape=(sl, num_fc1), ctx=ctx )
        W8 = nd.random_normal(loc=0, scale=weight_scale, shape=(num_fc1, num_fc2), ctx=ctx )        
        W9 = nd.random_normal(loc=0, scale=weight_scale, shape=(num_fc2, num_outputs), ctx=ctx )
        b7 = nd.random_normal(shape=num_fc1, scale=weight_scale, ctx=ctx)
        b8 = nd.random_normal(shape=num_fc2, scale=weight_scale, ctx=ctx)    
        b9 = nd.random_normal(shape=num_outputs, scale=weight_scale, ctx=ctx)  

        params = [W1, b1, W2, b2, W3, b3, W4, b4, W5, b5, W6, b6]
        print('Random the FC1&2-layers...')

        vs = []
        sqrs = [] 
        for param in params:
            param.attach_grad()
            vs.append(param.zeros_like())
            sqrs.append(param.zeros_like())              
    else:
        params, vs, sqrs = init_params(num_fc1 = 64, num_fc2 = 64, num_outputs = 2, sl=sl)
        print('Initiate weights from random...')

    # Debug
    if Debug:
        print('Debuging...')
        if params_init:
            params = params_init
        else:
            params, vs, sqrs = init_params(num_fc1 = 64, num_fc2 = 64, num_outputs = 2, sl=sl)
        for data, _ in train_data:
            data = data.as_in_context(ctx).reshape((batch_size,1,1,-1))
            break
        print(pool_type, pool_size, pool_stride)
        _, _ = ConvNet(data, params, debug=Debug, pool_type=pool_type,pool_size = pool_size,pool_stride=pool_stride)
        print()
    
#     total_loss = [Total_loss(train_data_10, params, batch_size, num_outputs)]
    
    t = 0
#   Epoch starts from 1.
    print('pool_type: ', pool_type)
    print('pool_size: ', pool_size)
    print('pool_stride: ', pool_stride)
    print('sl: ', sl)
    for epoch in range(1, epochs + 1):
        Epoch_loss = []
#         学习率自我衰减。
        if epoch > 2:
#             lr *= 0.1
            lr /= (1+0.01*epoch)
        for batch_i, ((data, label),(data_v, label_v)) in enumerate(zip(train_data, test_data)):
            data = data.as_in_context(ctx).reshape((data.shape[0],1,1,-1))
            label = label.as_in_context(ctx)
            label_one_hot = nd.one_hot(label, num_outputs)
            with autograd.record():
                output, _ = ConvNet(data, params, pool_type=pool_type,pool_size = pool_size,pool_stride=pool_stride)
                loss = softmax_cross_entropy(output, label_one_hot)
            loss.backward()
#             print(output)
            # params = sgd(params, lr, batch_size)

#           Increment t before invoking adam.
            t += 1
            params, vs, sqrs = adam(params, vs, sqrs, lr, batch_size, t)

            data_v = data_v.as_in_context(ctx).reshape((data_v.shape[0],1,1,-1))
            label_v = label_v.as_in_context(ctx)
            label_v_one_hot = nd.one_hot(label_v, num_outputs)
            output_v, _ = ConvNet(data_v, params, pool_type=pool_type,pool_size = pool_size,pool_stride=pool_stride)
            loss_v = softmax_cross_entropy(output_v, label_v_one_hot)            
            
#             #########################
#              Keep a moving average of the losses
#             #########################
            curr_loss = nd.mean(loss).asscalar()
            curr_loss_v = nd.mean(loss_v).asscalar()
            moving_loss = (curr_loss if ((batch_i == 0) and (epoch-1 == 0))
                           else (1 - smoothing_constant) * moving_loss + (smoothing_constant) * curr_loss)

            loss_history.append(curr_loss)
            loss_v_history.append(curr_loss_v)
            moving_loss_history.append(moving_loss)
            Epoch_loss.append(curr_loss)
#             if batch_i * batch_size % period == 0:
#                 print('Curr_loss: ', curr_loss)
                
            print('Working on epoch %d. Curr_loss: %.5f (complete percent: %.2f/100' %(epoch, curr_loss*1.0, 1.0 * batch_i / (num_examples//batch_size) * 100) +')' , end='')
            sys.stdout.write("\r")
            # print('{"metric": "Training Loss for ALL", "value": %.5f}' %(curr_loss*1.0) )
            # print('{"metric": "Testing Loss for ALL", "value": %.5f}' %(curr_loss_v*1.0) )
#             print('{"metric": "Training Loss for SNR=%s", "value": %.5f}' %(str(SNR), curr_loss*1.0) )
#             print('{"metric": "Testing Loss for SNR=%s", "value": %.5f}' %(str(SNR), curr_loss_v*1.0) )
        train_accuracy = evaluate_accuracy(train_data, num_examples, batch_size, params, ConvNet,pool_type=pool_type,pool_size = pool_size,pool_stride=pool_stride)
        test_accuracy = evaluate_accuracy(test_data, num_examples, batch_size, params, ConvNet,pool_type=pool_type,pool_size = pool_size,pool_stride=pool_stride)
        test_accuracy_history.append(test_accuracy)
        train_accuracy_history.append(train_accuracy)


        print("Epoch %d, Moving_loss: %.6f, Epoch_loss(mean): %.6f, Train_acc %.4f, Test_acc %.4f" %
              (epoch, moving_loss, np.mean(Epoch_loss), train_accuracy, test_accuracy))
#         print('{"metric": "Train_acc. for SNR=%s in epoches", "value": %.4f}' %(str(SNR), train_accuracy) )
#         print('{"metric": "Test_acc. for SNR=%s in epoches", "value": %.4f}' %(str(SNR), test_accuracy) )
        yield (params, loss_history, loss_v_history, moving_loss_history, test_accuracy_history, train_accuracy_history)
Beispiel #21
0
    args = parser.parse_args()
    data_dir, model_dir = get_data_and_model_dir(args.model)
    json_path = os.path.join(model_dir, 'params.json')
    params = utils.Params(json_path)

    params.device = "cuda" if torch.cuda.is_available() else "cpu"
    params.seed = args.seed

    params.writer = SummaryWriter()
    # set random seed for reproducibility
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if params.device == "cuda":
        torch.cuda.manual_seed(args.seed)

    model_and_loss = {
        'cnn': (ConvNet(params), cnn_loss),
        'capsule': (CapsuleNet(params), capsule_loss),
        'darknet_d': (DarkNetD(params), dark_d_loss),
        'darknet_r': (DarkNetR(params), dark_r_loss),
        'darkcapsule': (DarkCapsuleNet(params), darkcapsule_loss),
    }
    model, loss_fn = model_and_loss[args.model]
    if args.summary:
        summary(model, config.input_shape[args.model])

    optimizer = Adam(model.parameters(), lr=args.lr)
    if args.mode == 'train':
        train_and_evaluate(model, optimizer, loss_fn, params,
                           data_dir + '/train.p', data_dir + '/eval.p',
                           model_dir)
                               download=False,
                               transform=transforms.ToTensor())
loader_train = torch.utils.data.DataLoader(train_dataset,
                                           batch_size=param['batch_size'],
                                           shuffle=True)

test_dataset = datasets.MNIST(root='../data/',
                              train=False,
                              download=False,
                              transform=transforms.ToTensor())
loader_test = torch.utils.data.DataLoader(test_dataset,
                                          batch_size=param['test_batch_size'],
                                          shuffle=True)

# Load the pretrained model
net = ConvNet()
net.load_state_dict(torch.load('models/convnet_pretrained1.pkl'))
#if torch.cuda.is_available():
#    print('CUDA ensabled.')
#    net.cuda()

# Pretraining
#criterion = nn.CrossEntropyLoss()
#optimizer = torch.optim.RMSprop(net.parameters(), lr=param1['learning_rate'],
#                                weight_decay=param['weight_decay'])
#
#train(net, criterion, optimizer, param1, loader_train)

# Save and load the entire model
#torch.save(net.state_dict(), 'models/convnet_pretrained1.pkl')
Beispiel #23
0
def example1():
    """ Train convnet and then save the model """
    DATASETS_DICT = './data'
    IMG_SIZE = CONFIG['img_size']

    # x_train = DataLoader.load(os.path.join(DATASETS_DICT, 'x_train_cats_dogs.npy'))
    # y_train = DataLoader.load(os.path.join(DATASETS_DICT, 'y_train_cats_dogs.npy'))
    # x_train = DataLoader.load(os.path.join(DATASETS_DICT, 'x_cats_dogs_skimage.npy'))
    # y_train = DataLoader.load(os.path.join(DATASETS_DICT, 'y_cats_dogs_skimage.npy'))

    # x_train = DataLoader.load(os.path.join(DATASETS_DICT, 'x_rps_skimage.npy'))
    # y_train = DataLoader.load(os.path.join(DATASETS_DICT, 'y_rps_skimage.npy'))
    x_train = DataLoader.load_npy(CONFIG['data']['x_path'])
    y_train = DataLoader.load_npy(CONFIG['data']['y_path'])

    x_train = torch.Tensor(x_train).view(-1, IMG_SIZE, IMG_SIZE)
    y_train = torch.Tensor(y_train)

    N_TRAIN = CONFIG['n_train']
    N_EVAL = CONFIG['n_eval']
    N_TEST = CONFIG['n_test']

    if N_TRAIN + N_EVAL + N_TEST > len(x_train):
        raise Exception('Not enough data!')

    # resnet50 works with 224, 244 input size
    n_output = 2
    net = ConvNet(n_output)
    optimizer = optim.Adam(net.parameters(), lr=1e-3)
    loss_function = nn.MSELoss()

    # split data
    x_eval = x_train[:N_EVAL]
    y_eval = y_train[:N_EVAL]

    x_test = x_train[N_EVAL:N_EVAL + N_TEST]
    y_test = y_train[N_EVAL:N_EVAL + N_TEST]

    x_train = x_train[N_EVAL + N_TEST:N_EVAL + N_TEST + N_TRAIN]
    y_oracle = y_train[N_EVAL + N_TEST:N_EVAL + N_TEST + N_TRAIN]

    # show_grid_imgs(x_train[:16], y_oracle[:16], (4, 4))

    EPOCHS = 10
    BATCH_SIZE = 128

    print('Start training')
    for epoch in range(EPOCHS):
        for k in tqdm(range(0, len(x_train), BATCH_SIZE)):
            batch_x = x_train[k:k + BATCH_SIZE].view(-1, 1, IMG_SIZE, IMG_SIZE)
            batch_y = y_oracle[k:k + BATCH_SIZE]

            net.zero_grad()

            out = net(batch_x)
            loss = loss_function(out, batch_y)
            loss.backward()
            optimizer.step()

        print(f'Epoch: {epoch}. Loss: {loss}')

    correct = 0
    total = 0

    with torch.no_grad():
        for k in tqdm(range(len(x_test))):
            real_class = torch.argmax(y_test[k])
            net_out = net(x_test[k].view(-1, 1, IMG_SIZE,
                                         IMG_SIZE))[0]  # returns list
            predicted_class = torch.argmax(net_out)

            if predicted_class == real_class:
                correct += 1
            total += 1

    print('Accuracy: ', round(correct / total, 3))

    torch.save(net, f'{DATASETS_DICT}/cnn_rps_model.pt')
Beispiel #24
0
                               download=True,
                               transform=transforms.ToTensor())
loader_train = torch.utils.data.DataLoader(train_dataset,
                                           batch_size=param['batch_size'],
                                           shuffle=True)

test_dataset = datasets.MNIST(root='../data/',
                              train=False,
                              download=True,
                              transform=transforms.ToTensor())
loader_test = torch.utils.data.DataLoader(test_dataset,
                                          batch_size=param['test_batch_size'],
                                          shuffle=True)

# Load the pretrained model
net = ConvNet()
net.load_state_dict(torch.load('models/convnet_pretrained.pkl'))
if torch.cuda.is_available():
    print('CUDA ensabled.')
    net.cuda()
print("--- Pretrained network loaded ---")
test(net, loader_test)

# prune the weights
masks = filter_prune(net, param['pruning_perc'])
net.set_masks(masks)
print("--- {}% parameters pruned ---".format(param['pruning_perc']))
test(net, loader_test)

# Retraining
criterion = nn.CrossEntropyLoss()
        for i in range(self.num_stacks):
            self.stack_frames(image_processed)
        return self.buffer.copy()

    def get_grid(self):
        stacked = np.expand_dims(self.buffer, 1)
        imgs_tensor = torch.tensor(stacked)
        grid_image = utils.make_grid(imgs_tensor, 1)
        return grid_image.numpy().transpose((1, 2, 0))

if __name__ == '__main__':
    save_images = True
    env = gym.make("Breakout-v0")
    obs = env.reset()
    f = Frame(640, 480, 4)
    for i in range(40):
        if i == 0:
            f.step(env, 1)
        obs, reward, done, info = f.step(env)
        if save_images:
            if i % 4 == 0:
                cur_date = datetime.now().isoformat()
                # save in temp directory
                file_save_path = f'{get_temp_dir("image {:0>2}.jpg".format(i))}'
                cv2.imwrite(file_save_path, f.get_grid())
                print("file saved at:", file_save_path)

    input_buffer = torch.unsqueeze(torch.Tensor(obs), dim=0)
    model = ConvNet(f.observation_shape, 4)
    print(model(input_buffer))
Beispiel #26
0
def train(name):
    record = pd.DataFrame(data=np.zeros((1, 4), dtype=np.float),
                          columns=['precision', 'accuracy', 'recall', 'F1'])
    for _ in range(opt.runs):
        seed = random.randint(1, 10000)
        print("Random Seed: ", seed)
        torch.manual_seed(seed)

        # mkdirs for checkpoints output
        os.makedirs(opt.checkpoints_folder, exist_ok=True)
        os.makedirs('%s/%s' % (opt.checkpoints_folder, name), exist_ok=True)
        os.makedirs('report_metrics', exist_ok=True)

        root_dir = 'report_metrics/%s_aug_%s_IMBA/%s' % (
            opt.model, str(opt.n_group), name)
        os.makedirs(root_dir, exist_ok=True)

        # 加载数据集
        path = 'UCRArchive_2018/' + name + '/' + name + '_TRAIN.tsv'
        train_set, n_class = load_ucr(path)

        print('启用平衡数据增强!')
        stratified_train_set = stratify_by_label(train_set)
        data_aug_set = data_aug_by_dft(stratified_train_set, opt.n_group)
        total_set = np.concatenate((train_set, data_aug_set))
        print('Shape of total set', total_set.shape)
        dataset = UcrDataset(total_set, channel_last=opt.channel_last)

        batch_size = int(min(len(dataset) / 10, 16))
        dataloader = UCR_dataloader(dataset, batch_size)

        # Common behavior
        seq_len = dataset.get_seq_len()  # 初始化序列长度
        # 创建分类器对象\损失函数\优化器
        if opt.model == 'r':
            net = ResNet(n_in=seq_len, n_classes=n_class).to(device)
        if opt.model == 'f':
            net = ConvNet(n_in=seq_len, n_classes=n_class).to(device)
        criterion = nn.CrossEntropyLoss().to(device)
        optimizer = optim.Adam(net.parameters(), lr=opt.lr)
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                         mode='min',
                                                         factor=0.5,
                                                         patience=50,
                                                         min_lr=0.0001)

        min_loss = 10000
        print('############# Start to Train ###############')
        net.train()
        for epoch in range(opt.epochs):
            for i, (data, label) in enumerate(dataloader):
                data = data.float()
                data = data.to(device)
                label = label.long()
                label = label.to(device)
                optimizer.zero_grad()
                output = net(data)
                loss = criterion(output, label.view(label.size(0)))
                loss.backward()
                optimizer.step()
                scheduler.step(loss)
                # print('[%d/%d][%d/%d] Loss: %.8f ' % (epoch, opt.epochs, i + 1, len(dataloader), loss.item()))
            if loss < min_loss:
                min_loss = loss
                # End of the epoch,save model
                print('MinLoss: %.10f Saving the best epoch model.....' %
                      min_loss)
                torch.save(
                    net, '%s/%s/%s_%s_best_IMBA.pth' %
                    (opt.checkpoints_folder, name, opt.model, str(
                        opt.n_group)))
        net_path = '%s/%s/%s_%s_best_IMBA.pth' % (opt.checkpoints_folder, name,
                                                  opt.model, str(opt.n_group))
        one_record = eval_accuracy(net_path, name)
        print('The minimum loss is %.8f' % min_loss)
        record = record.append(one_record, ignore_index=True)
    record = record.drop(index=[0])
    record.loc['mean'] = record.mean()
    record.loc['std'] = record.std()
    record.to_csv(root_dir + '/metrics.csv')
    # all_reprot_metrics.loc[name, 'acc_mean'] = record.at['mean', 'accuracy']
    # all_reprot_metrics.loc[name, 'acc_std'] = record.at['std', 'accuracy']
    # all_reprot_metrics.loc[name, 'F1_mean'] = record.at['mean', 'F1']
    # all_reprot_metrics.loc[name, 'F1_std'] = record.at['std', 'F1']

    print('\n')
Beispiel #27
0
 OURs_modified = ConvNet(
     conv_params={
         'kernel': ((1, 16), (1, 8), (1, 8)),
         'num_filter': (
             32,
             64,
             128,
         ),
         'stride': (
             (1, 1),
             (1, 1),
             (1, 1),
         ),
         'padding': (
             (0, 0),
             (0, 0),
             (0, 0),
         ),
         'dilate': (
             (1, 1),
             (1, 1),
             (1, 1),
         )
     },
     act_params={'act_type': (
         'elu',
         'elu',
         'elu',
         'elu',
     )},
     pool_params={
         'pool_type': (
             'max',
             'max',
             'max',
         ),
         'kernel': (
             (1, 4),
             (1, 4),
             (1, 4),
         ),
         'stride': (
             (1, 2),
             (1, 2),
             (1, 2),
         ),
         'padding': (
             (0, 0),
             (0, 0),
             (0, 0),
         ),
         'dilate': (
             (1, 1),
             (1, 1),
             (1, 1),
         )
     },
     fc_params={'hidden_dim': (128, )},
     drop_prob=0,
     #                         input_dim = (2,1,8192)
     input_dim=(1, 1, 8192))
    print('num_layers:', num_layers)

    param = nd.load(pretrained_add + param_add)

    OURS_ori = ConvNet(
        conv_params={
            'kernel': ((1, 16), ) + ((1, 8), ) * (num_layers - 1),
            'num_filter': temp(1 + (num_layers - 1)),
            'stride': ((1, 1), ) + ((1, 1), ) * (num_layers - 1),
            'padding': ((0, 0), ) + ((0, 0), ) * (num_layers - 1),
            'dilate': ((1, 1), ) + ((1, 1), ) * (num_layers - 1)
        },
        act_params={
            'act_type': (('relu', )) * 2 + (('relu', )) * (num_layers - 1)
        },
        pool_params={
            'pool_type': (('avg'), ) + (('avg'), ) * (num_layers - 1),
            'kernel': ((1, 16), ) + ((1, 16), ) * (num_layers - 1),
            'stride': ((1, 2), ) + ((1, 2), ) * (num_layers - 1),
            'padding': ((0, 0), ) + ((0, 0), ) * (num_layers - 1),
            'dilate': ((1, 1), ) + ((1, 1), ) * (num_layers - 1)
        },
        fc_params={'hidden_dim': (64, )},
        drop_prob=0,
        params_inits=param,
        input_dim=(1, 1, 8192))

    auc_list = []
    snr_list = np.linspace(0.1, 1, 10)
    j = 0
    while True:
Beispiel #29
0
 OURS_ori = ConvNet(
     conv_params={
         'kernel': ((1, 16), (1, 8), (1, 8)),
         'num_filter': (
             16,
             32,
             64,
         ),
         'stride': (
             (1, 1),
             (1, 1),
             (1, 1),
         ),
         'padding': (
             (0, 0),
             (0, 0),
             (0, 0),
         ),
         'dilate': (
             (1, 1),
             (1, 1),
             (1, 1),
         )
     },
     act_params={'act_type': (
         'relu',
         'relu',
         'relu',
         'relu',
     )},
     pool_params={
         'pool_type': (
             'avg',
             'avg',
             'avg',
         ),
         'kernel': (
             (1, 16),
             (1, 16),
             (1, 16),
         ),
         'stride': (
             (1, 2),
             (1, 2),
             (1, 2),
         ),
         'padding': (
             (0, 0),
             (0, 0),
             (0, 0),
         ),
         'dilate': (
             (1, 1),
             (1, 1),
             (1, 1),
         )
     },
     fc_params={'hidden_dim': (64, )},
     drop_prob=0,
     params_inits=param,
     input_dim=(1, 1, 8192))
    args = parse_args()

    # unpack args
    device = args.device
    epoch = 1
    lmbda = args.lmbda
    lr = args.lr
    criterion = make_criterion(args)

    train_loss_tracker, train_acc_tracker = [], []
    test_loss_tracker, test_acc_tracker = [], []

    # ADD FILENAMES FOR MODEL WEIGHTS TO QUANTIZE AND EVALUATE THEM
    filenames = ['control']

    experiment_net = ConvNet()
    experiment_net = experiment_net.to(device)
    base_accuracies = []
    for h in range(len(filenames)):
        experiment_net.load_state_dict(torch.load(filenames[h] + '.pt'))
        print('Test Accuracy without Quantization for ' + filenames[h] + '.pt')
        acc = test(experiment_net, testloader, criterion, epoch, lmbda,
                   test_loss_tracker, test_acc_tracker)
        base_accuracies.append(acc)

    # CHANGE FOR LOOP RANGE TO QUANTIZE FOR DIFFERENT BITWIDTHS
    for n_bits in range(4, 9):
        print('{} BITWIDTH'.format(n_bits))
        # L1 AND L2
        for n in range(len(filenames)):
            experiment_net.load_state_dict(torch.load(filenames[n] + '.pt'))