예제 #1
0
			entropy += dist.entropy().mean()
			
			log_probs.append(log_prob)
			values.append(value)
			rewards.append(torch.FloatTensor(reward).unsqueeze(1).to(device))
			masks.append(torch.FloatTensor(1 - done).unsqueeze(1).to(device))
			
			states.append(state)
			actions.append(action)
			
			state = next_state
			frame_idx += 1
			
			if frame_idx % 1000 == 0:
				test_reward = np.mean([test_env() for _ in range(1)])
				logger.log_scalar('Reward', test_reward, frame_idx)
	#				plot(frame_idx, test_rewards)

		next_state = torch.FloatTensor(next_state).to(device)
		_, next_value = model(next_state)
		returns = compute_gae(next_value, rewards, masks, values)

		returns   = torch.cat(returns).detach()
		log_probs = torch.cat(log_probs).detach()
		values    = torch.cat(values).detach()
		states    = torch.cat(states)
		actions   = torch.cat(actions)
		advantage = returns - values
		ppo_update(ppo_epochs, mini_batch_size, states, actions, log_probs, returns, advantage)
	# save model when training ends
	model.save(model, 'model.pth')
예제 #2
0
def main():
    global args
    args = parser.parse_args()
    
    torch.manual_seed(seed)
    torch.cuda.set_device(0)


    #build model
    model = CausualLSTM((IMG_HEIGHT, IMG_WIDTH), IMG_CHANNELS, args.nhid, args.kernel_size, args.nlayers,
                     True, args.seq_length, args.input_length).cuda()

    model = torch.nn.DataParallel(model, device_ids=[0, 2, 3]).cuda()

    l2_loss = nn.MSELoss(size_average=False).cuda()
    l1_loss = nn.L1Loss(size_average=False).cuda()
    criterion = (l1_loss,l2_loss)

    if args.test:
        test_input_param = {'path': os.path.join(args.path,'moving-mnist-test.npz'),
                            'minibatch_size': args.batch_size,
                            'input_data_type': 'float32',
                            'is_output_sequence': True}
        test_input_handle = InputHandle(test_input_param)
        test_input_handle.begin(do_shuffle = False)
        # load the model.
        with open(args.save, 'rb') as f:
            model = torch.load(f)
        #run the model on test data.
        test_mae, test_mse, test_psnr = evaluate(model,test_input_handle,criterion)
        print('=' * 120)
        print('| test mae {:5.2f} | test mse {:5.2f} | test psnr {:5.2f}|'.format(
            test_mae, test_mse, test_psnr))
        print('=' * 120)
        return 

    optimizer = torch.optim.Adamax(model.parameters(), lr = args.lr, betas=(0.9, 0.999))
    #load data
  
    
    logger = Logger(os.path.join('./log','convLSTM'))
    train_input_param = {'path': os.path.join(args.path,'moving-mnist-train.npz'),
                            'minibatch_size': args.batch_size,
                            'input_data_type': 'float32',
                            'is_output_sequence': True}
    train_input_handle = InputHandle(train_input_param)

    valid_input_param = {'path': os.path.join(args.path,'moving-mnist-valid.npz'),
                            'minibatch_size': args.batch_size,
                            'input_data_type': 'float32',
                            'is_output_sequence': True}
    valid_input_handle = InputHandle(valid_input_param)

    best_val_loss = None

    #test for evaluate function

    valid_mae,valid_mse,valid_psnr = evaluate(model,valid_input_handle,criterion)

    for epoch in range(1, args.epochs+1):
        train_input_handle.begin(do_shuffle = True)
        epoch_start_time = time.time()
        train_loss, train_mae, train_mse = train(model,train_input_handle,criterion,optimizer,epoch)
        valid_mae,valid_mse,valid_psnr = evaluate(model,valid_input_handle,criterion)
        print('\n| end of epoch {:3d} | time: {:5.5f}s | valid mae {:5.2f} |' 
            ' valid mse {:5.2f} | valid psnr {:5.2f}'
                .format(epoch, (time.time() - epoch_start_time),valid_mae,valid_mse,valid_psnr))
        print('-' * 120)
        logger.log_scalar('train_loss',train_loss, epoch)
        logger.log_scalar('train_mae',train_mae, epoch)
        logger.log_scalar('train_mse',train_mse, epoch)
        logger.log_scalar('valid_mae',valid_mae,epoch)
        logger.log_scalar('valid_mse',valid_mse, epoch)
        logger.log_scalar('valid_psnr',valid_psnr, epoch)
        # Save the model if the validation loss is the best we've seen so far.
        if not best_val_loss or valid_mae+valid_mse < best_val_loss:
            with open(args.save, 'wb') as f:
                torch.save(model, f)
            best_val_loss = valid_mae+valid_mse
class ValidCallBack(keras.callbacks.Callback):
    def __init__(self):
        super(ValidCallBack, self).__init__()

        # h5py file containing validation features and validation word embeddings
        self.F = h5py.File("./processed_features/validation_features.h5", "r")
        self.val_features = self.F["data/features"]

        wordF = h5py.File("./processed_features/embeddings.h5", 'r')
        self.word_embed = wordF["data/word_embeddings"][:, :]
        self.word_names = map(lambda a: a[0], wordF["data/word_names"][:])
        wordF.close()

        self.image_fnames = map(lambda a: a[0], self.F["data/fnames"][:])

        print "[LOG] ValidCallBack: "
        print "val_feats: {} -- word_embed: {} -- word_names: {} -- image_fnames: {}".format(
            self.val_features.shape, self.word_embed.shape,
            len(self.word_names), len(self.image_fnames))

        # find all classes present in validation set
        validation_classes = [cl.split("/")[-2] for cl in self.image_fnames]

        # Keep only those word_embed and word_names that are present in dataset
        self.unique_classes = list(set(validation_classes))
        self.unique_classes_embed = []
        for cl in self.unique_classes:
            idx = self.word_names.index(cl)
            self.unique_classes_embed.append(self.word_embed[idx])
        self.unique_classes_embed = np.array(self.unique_classes_embed)
        self.unique_classes_embed = self.unique_classes_embed / np.linalg.norm(
            self.unique_classes_embed, axis=1, keepdims=True)

        self.mylogger = Logger("logs/top_{}".format(time()))

    def on_epoch_end(self, epoch, logs={}):

        accuracy_list = []
        for i in range(len(self.val_features)):

            trueclass = self.image_fnames[i].split("/")[-2]
            feat = self.val_features[i]

            preds = self.model.predict(feat.reshape((1, 4096)))
            preds = preds / np.linalg.norm(preds)

            diff = self.unique_classes_embed - preds
            diff = np.linalg.norm(diff, axis=1)

            # min_idx = sorted(range(len(diff)), key=lambda x: diff[x])
            min_idx = np.argsort(diff)
            min_idx = min_idx[0:3]

            # print "current image of class {} | is closest to embedding of words:".format(trueclass)
            closest_words = []
            for i in min_idx:
                # print self.unique_classes[i]
                closest_words.append(self.unique_classes[i])

            # save closest words to accuracy_list
            accuracy_list.append([self.image_fnames, trueclass, closest_words])

        # Display accuracy
        top_1_acc = 0.0
        top_3_acc = 0.0
        for accuracy_data in accuracy_list:
            if accuracy_data[1] in accuracy_data[2][
                    0:3]:  # --- Top 1 Accuracy ---
                top_3_acc += 1
                if accuracy_data[1] == accuracy_data[2][
                        0]:  # --- Top 3 Accuracy ---
                    top_1_acc += 1

        top_1_acc = round(top_1_acc / len(accuracy_list), 3)
        top_3_acc = round(top_3_acc / len(accuracy_list), 3)

        print "top 1: {} | top 3: {} ".format(top_1_acc, top_3_acc)

        print epoch
        self.mylogger.log_scalar("top1", float(top_1_acc), epoch)
        self.mylogger.log_scalar("top3", float(top_3_acc), epoch)

    def custom_for_keras(self, ALL_word_embeds):
        ## only the top 20 rows from word_vectors is legit!
        def top_accuracy(true_word_indices, image_vectors):
            l2 = lambda x, axis: K.sqrt(
                K.sum(K.square(x), axis=axis, keepdims=True))
            l2norm = lambda x, axis: x / l2(x, axis)

            l2_words = l2norm(ALL_word_embeds, axis=1)
            l2_images = l2norm(image_vectors, axis=1)

            tiled_words = K.tile(K.expand_dims(l2_words, axis=1), (1, 200, 1))
            tiled_images = K.tile(K.expand_dims(l2_images, axis=1), (1, 20, 1))

            diff = K.squeeze(l2(l2_words - l2_images, axis=2))

            # slice_top3 = lambda x: x[:, 0:3]
            # slice_top1 = lambda x: x[:, 0:1]

            diff_top5 = metrics.top_k_categorical_accuracy(tiled_images, diff)
            return diff_top5

        return top_accuracy
예제 #4
0
            if train_steps % max(1, args.train_interval //
                                 args.n_proc) == 0 and train_steps > 0:
                train_steps = 0
                dqn.update()

            obs = obs_new
            episode_duration += 1
            for j, terminal in enumerate(is_terminal):
                if terminal:
                    if is_mario:
                        try:
                            total_x_pos += info[i]['x_pos']
                            n_info += 1
                        except IndexError:
                            pass
                    TB_LOGGER.log_scalar(tag='Episode Duration',
                                         value=episode_duration[j])
                    episode_duration[j] = 0
                    i += 1
                    total_episodes += 1
                    if total_episodes % 5 == 0:
                        logging.info('Simulated %s episodes' % total_episodes)

            if i > episodes_per_epoch:
                break

        if is_mario:
            TB_LOGGER.log_scalar(tag='Mean End X', value=total_x_pos / n_info)
        TB_LOGGER.log_scalar(tag='Train Reward:', value=tot_succ / i)
        tot_succ = 0
        total_x_pos = 0
def main():


    train_x, train_y, valid_x, valid_y, test_x, test_y = get_cifar10('./cifar-10-batches-py/')
    labels = unpickle('./cifar-10-batches-py/batches.meta')['label_names']

    train_x = train_x.astype(np.float32) / 255.0
    valid_x = valid_x.astype(np.float32) / 255.0
    test_x  = test_x.astype(np.float32) / 255.0


    num_epochs = args.epochs
    eta        = args.lr
    batch_size = args.batch_size

    # input 
    x = T.tensor4("x")
    y = T.ivector("y")
    
    # test values
    # x.tag.test_value = np.random.randn(6, 3, 32, 32).astype(np.float32)
    # y.tag.test_value = np.array([1,2,1,4,5]).astype(np.int32)
    # x.tag.test_value = x.tag.test_value / x.tag.test_value.max()

    # import ipdb; ipdb.set_trace()

    # network definition 
    conv1 = BinaryConv2D(input=x, num_filters=50, input_channels=3, size=3, strides=(1,1), padding=1,  name="conv1")
    act1  = Activation(input=conv1.output, activation="relu", name="act1")
    pool1 = Pool2D(input=act1.output, stride=(2,2), name="pool1")
    
    conv2 = BinaryConv2D(input=pool1.output, num_filters=100, input_channels=50, size=3, strides=(1,1), padding=1,  name="conv2")
    act2  = Activation(input=conv2.output, activation="relu", name="act2")
    pool2 = Pool2D(input=act2.output, stride=(2,2), name="pool2")

    conv3 = BinaryConv2D(input=pool2.output, num_filters=200, input_channels=100, size=3, strides=(1,1), padding=1,  name="conv3")
    act3  = Activation(input=conv3.output, activation="relu", name="act3")
    pool3 = Pool2D(input=act3.output, stride=(2,2), name="pool3")

    flat  = Flatten(input=pool3.output)
    fc1   = BinaryDense(input=flat.output, n_in=200*4*4, n_out=500, name="fc1")
    act4  = Activation(input=fc1.output, activation="relu", name="act4")
    fc2   = BinaryDense(input=act4.output, n_in=500, n_out=10, name="fc2")
    softmax  = Activation(input=fc2.output, activation="softmax", name="softmax")

    # loss
    xent     = T.nnet.nnet.categorical_crossentropy(softmax.output, y)
    cost     = xent.mean()

    # errors 
    y_pred   = T.argmax(softmax.output, axis=1)
    errors   = T.mean(T.neq(y, y_pred))

    # updates + clipping (+-1) 
    params   = conv1.params + conv2.params + conv3.params + fc1.params + fc2.params 
    params_bin = conv1.params_bin + conv2.params_bin + conv3.params_bin + fc1.params_bin + fc2.params_bin
    grads    = [T.grad(cost, param) for param in params_bin] # calculate grad w.r.t binary parameters

    updates  = []
    for p,g in zip(params, grads):
        updates.append(
                (p, clip_weights(p - eta*g)) #sgd + clipping update
            )

    # compiling train, predict and test fxns     
    train   = theano.function(
                inputs  = [x,y],
                outputs = cost,
                updates = updates
            )
    predict = theano.function(
                inputs  = [x],
                outputs = y_pred
            )
    test    = theano.function(
                inputs  = [x,y],
                outputs = errors
            )

    # train 
    checkpoint = ModelCheckpoint(folder="snapshots")
    logger = Logger("logs/{}".format(time()))
    for epoch in range(num_epochs):
        
        print "Epoch: ", epoch
        print "LR: ", eta
        epoch_hist = {"loss": []}
        
        t = tqdm(range(0, len(train_x), batch_size))
        for lower in t:
            upper = min(len(train_x), lower + batch_size)
            loss  = train(train_x[lower:upper], train_y[lower:upper].astype(np.int32))     
            t.set_postfix(loss="{:.2f}".format(float(loss)))
            epoch_hist["loss"].append(loss.astype(np.float32))
        
        # epoch loss
        average_loss = sum(epoch_hist["loss"])/len(epoch_hist["loss"])         
        t.set_postfix(loss="{:.2f}".format(float(average_loss)))
        logger.log_scalar(
                tag="Training Loss", 
                value= average_loss,
                step=epoch
                )

        # validation accuracy 
        val_acc  =  1.0 - test(valid_x, valid_y.astype(np.int32))
        print "Validation Accuracy: ", val_acc
        logger.log_scalar(
                tag="Validation Accuracy", 
                value= val_acc,
                step=epoch
                )  
        checkpoint.check(val_acc, params)

    # Report Results on test set (w/ best val acc file)
    best_val_acc_filename = checkpoint.best_val_acc_filename
    print "Using ", best_val_acc_filename, " to calculate best test acc."
    load_model(path=best_val_acc_filename, params=params)
    test_acc = 1.0 - test(test_x, test_y.astype(np.int32))    
    print "Test accuracy: ",test_acc
예제 #6
0
            entropy += dist.entropy().mean()

            log_probs.append(log_prob)
            values.append(value)
            rewards.append(torch.FloatTensor(reward).unsqueeze(1).to(device))
            masks.append(torch.FloatTensor(1 - done).unsqueeze(1).to(device))

            states.append(state)
            actions.append(action)

            state = next_state
            frame_idx += 1

            if frame_idx % 1000 == 0:
                test_reward = np.mean([test_env() for _ in range(1)])
                logger.log_scalar('Reward', test_reward, frame_idx)
    #				plot(frame_idx, test_rewards)

        next_state = torch.FloatTensor(next_state).to(device)
        _, next_value = model(next_state)
        returns = compute_gae(next_value, rewards, masks, values)

        returns = torch.cat(returns).detach()
        log_probs = torch.cat(log_probs).detach()
        values = torch.cat(values).detach()
        states = torch.cat(states)
        actions = torch.cat(actions)
        advantage = returns - values
        ppo_update(ppo_epochs, mini_batch_size, states, actions, log_probs,
                   returns, advantage)
    # save model when training ends
def main():
    tLog = Logger("./logs")
    trainFeatures=h5py.File("./data/train_feat.hdf5",'r')["train_feat"]
    trainLabels=h5py.File("./data/train_label.hdf5", 'r')['train_label']
    valFeatures=h5py.File("./data/val_feat_v2.hdf5",'r')['val_feat']
    valLabels=h5py.File("./data/val_label.hdf5", 'r')['val_label']
    print("Loading is done")
    print("trainFeatures.shape", trainFeatures.shape)
    print("trainLabels.shape", trainLabels.shape)
    print("valFeatures.shape", valFeatures.shape)
    print("valLabels.shape", valLabels.shape)

    batchSize=64
    model=stDecoder(batchSize, 512, 51)
    model=model.to(DEVICE)

    criterion = stAttentionLoss(0.1, 0.01)
    #criterion =nn.CrossEntropyLoss()
    criterion=criterion.to(DEVICE)

    optimizer=torch.optim.Adam(model.parameters())

    indexList=list(range(trainFeatures.shape[0]))
    batches=trainFeatures.shape[0]//batchSize
    epochs=50
    batchID=0

    val(model, valFeatures, valLabels, batchSize)
    for epoch in range(epochs):

        random.shuffle(indexList)
        begin=time.time()

        for j in range(batches):
            model.train()

            optimizer.zero_grad()

            batchIndexList=indexList[(j*batchSize):(j+1)*batchSize]
            batchIndexList.sort()

            videos=torch.from_numpy(trainFeatures[ batchIndexList ])
            videos=videos.to(DEVICE)

            labels=torch.from_numpy(trainLabels[ batchIndexList ]).long()
            labels=labels.to(DEVICE)

            logits, alphas, betas=model(videos)

            #print("alphas", alphas)
            #print("betas", betas[0])
            #print("logits", logits)
            #print("labels", labels)

            #loss = criterion(logits, labels)
            loss = criterion(logits, labels, alphas, betas)

            loss.backward()

            optimizer.step()

            batchID+=1
            if batchID%20 ==0 :
                print("batch %d loss is %f" %(batchID, loss.cpu().detach().numpy()))
                train_prediction = logits.cpu().detach().argmax(dim=1)
                train_accuracy = (train_prediction.numpy()==labels.cpu().numpy()).mean()
                print("train_accracy is %f" % train_accuracy)
                tr_info = { 'Train Loss': loss.cpu().detach().numpy(), 'Train Accuracy': train_accuracy }
                for tag, value in tr_info.items():
                    tLog.log_scalar(tag, value, batchID+1)

        end=time.time()

        print("Epoch %d training time: %.2fs" %(epoch,(end-begin)) )
        valAcc=val(model, valFeatures, valLabels, batchSize)
        val_info= {"Validation Accuracy": valAcc}
        for tag, value in val_info.items():
            tLog.log_scalar(tag, value, epoch+1)
예제 #8
0
        # cv2.imwrite("./test_results/test_t_input.png" ,
        #             np.uint8(input_image_t[:, :, 0:3]*255))  # output transmission layer
        # cv2.imwrite("./test_results/test_r_output.png",
        #             np.uint8(output_image_t[:, :, 0:3]*255))  # output reflection layer

    sum_p /= cnt
    sum_g /= cnt
    sum_d /= cnt
    sum_grad /= cnt
    sum_l1 /= cnt
    sum_loss /= cnt
    sum_ssim /= num_dev
    sum_psnr /= num_dev

    # print('==========', sum_p, sum_g, sum_d)
    logger.log_scalar('generator loss', sum_g, epoch)
    logger.log_scalar('perceptual loss', sum_p, epoch)
    logger.log_scalar('discriminator loss', sum_d, epoch)
    logger.log_scalar('gradient loss', sum_grad, epoch)
    logger.log_scalar('L1 loss', sum_l1, epoch)
    logger.log_scalar('total loss', sum_loss, epoch)
    logger.log_scalar('SSIM', sum_ssim, epoch)
    logger.log_scalar('PSNR', sum_psnr, epoch)

    # save model and images every epoch
    if epoch % ARGS.save_model_freq == 0:
        os.makedirs("%s/%04d" % (task, epoch))
        saver.save(sess, "%s/model.ckpt" % task)
        saver.save(sess, "%s/%04d/model.ckpt" % (task, epoch))
def main():

    train_x, train_y, valid_x, valid_y, test_x, test_y = get_mnist()

    num_epochs = args.epochs
    eta = args.lr
    batch_size = args.batch_size

    # input
    x = T.matrix("x")
    y = T.ivector("y")

    #x.tag.test_value = np.random.randn(3, 784).astype("float32")
    #y.tag.test_value = np.array([1,2,3])
    #drop_switch.tag.test_value = 0
    #import ipdb; ipdb.set_trace()
    hidden_1 = BinaryDense(input=x, n_in=784, n_out=2048, name="hidden_1")
    act_1 = Activation(input=hidden_1.output, activation="relu", name="act_1")
    hidden_2 = BinaryDense(input=act_1.output,
                           n_in=2048,
                           n_out=2048,
                           name="hidden_2")
    act_2 = Activation(input=hidden_2.output, activation="relu", name="act_2")
    hidden_3 = BinaryDense(input=act_2.output,
                           n_in=2048,
                           n_out=2048,
                           name="hidden_3")
    act_3 = Activation(input=hidden_3.output, activation="relu", name="act_3")
    output = BinaryDense(input=act_3.output,
                         n_in=2048,
                         n_out=10,
                         name="output")
    softmax = Activation(input=output.output,
                         activation="softmax",
                         name="softmax")

    # loss
    xent = T.nnet.nnet.categorical_crossentropy(softmax.output, y)
    cost = xent.mean()

    # errors
    y_pred = T.argmax(softmax.output, axis=1)
    errors = T.mean(T.neq(y, y_pred))

    # updates + clipping (+-1)
    params_bin = hidden_1.params_bin + hidden_2.params_bin + hidden_3.params_bin
    params = hidden_1.params + hidden_2.params + hidden_3.params
    grads = [T.grad(cost, param)
             for param in params_bin]  # calculate grad w.r.t binary parameters
    updates = []
    for p, g in zip(
            params, grads
    ):  # gradient update on full precision weights (NOT binarized wts)
        updates.append((p, clip_weights(p - eta * g))  #sgd + clipping update
                       )

    # compiling train, predict and test fxns
    train = theano.function(inputs=[x, y], outputs=cost, updates=updates)
    predict = theano.function(inputs=[x], outputs=y_pred)
    test = theano.function(inputs=[x, y], outputs=errors)

    # train
    checkpoint = ModelCheckpoint(folder="snapshots")
    logger = Logger("logs/{}".format(time()))
    for epoch in range(num_epochs):

        print "Epoch: ", epoch
        print "LR: ", eta
        epoch_hist = {"loss": []}

        t = tqdm(range(0, len(train_x), batch_size))
        for lower in t:
            upper = min(len(train_x), lower + batch_size)
            loss = train(train_x[lower:upper],
                         train_y[lower:upper].astype(np.int32))
            t.set_postfix(loss="{:.2f}".format(float(loss)))
            epoch_hist["loss"].append(loss.astype(np.float32))

        # epoch loss
        average_loss = sum(epoch_hist["loss"]) / len(epoch_hist["loss"])
        t.set_postfix(loss="{:.2f}".format(float(average_loss)))
        logger.log_scalar(tag="Training Loss", value=average_loss, step=epoch)

        # validation accuracy
        val_acc = 1.0 - test(valid_x, valid_y.astype(np.int32))
        print "Validation Accuracy: ", val_acc
        logger.log_scalar(tag="Validation Accuracy", value=val_acc, step=epoch)
        checkpoint.check(val_acc, params)

    # Report Results on test set
    best_val_acc_filename = checkpoint.best_val_acc_filename
    print "Using ", best_val_acc_filename, " to calculate best test acc."
    load_model(path=best_val_acc_filename, params=params)
    test_acc = 1.0 - test(test_x, test_y.astype(np.int32))
    print "Test accuracy: ", test_acc
class TensorboardCallback(Callback):
    def __init__(self,
                 path,
                 args=None,
                 events_dir=None,
                 max_step=None,
                 save_period=10):
        self.save_period = save_period
        self.path = path
        train_dir = os.path.join(path, 'training')
        if not os.path.exists(train_dir): os.makedirs(train_dir)
        self.train_logger = Logger(train_dir)
        valid_dir = os.path.join(path, 'validation')
        if not os.path.exists(valid_dir): os.makedirs(valid_dir)
        self.valid_logger = Logger(valid_dir)
        if args:
            text = 'Parameters\n---------\n'
            for (key, val) in args.items():
                text += '- ' + key + ' = ' + str(val) + '\n'
            self.train_logger.log_text('Description', text)
            self.valid_logger.log_text('Description', text)
        if events_dir and max_step:
            events_files = [
                F for F in scan_dir(events_dir, '')[1]
                if os.path.basename(F).startswith('events')
            ]
            for events_file in events_files:
                parent_dir = os.path.dirname(events_file).split(os.sep)[-1]
                if 'training' == parent_dir:
                    train_events_file = events_file
                elif 'validation' == parent_dir:
                    valid_events_file = events_file
            self.train_logger.copyFrom(train_events_file, max_step=max_step)
            self.valid_logger.copyFrom(valid_events_file, max_step=max_step)

    def on_epoch_begin(self, epoch, logs={}):
        self.starttime = time()

    def on_epoch_end(self, epoch, logs={}):
        self.train_logger.log_scalar("Speed", time() - self.starttime, epoch)
        self.train_logger.log_scalar("sparse_categorical_accuracy_%",
                                     logs['sparse_categorical_accuracy'] * 100,
                                     epoch)
        self.train_logger.log_scalar("loss", logs['loss'], epoch)
        self.valid_logger.log_scalar("Speed", time() - self.starttime, epoch)
        self.valid_logger.log_scalar(
            "sparse_categorical_accuracy_%",
            logs['val_sparse_categorical_accuracy'] * 100, epoch)
        self.valid_logger.log_scalar("loss", logs['val_loss'], epoch)
        # Model save
        if ((epoch + 1) % self.save_period) == 0:
            self.model.save(
                os.path.join(self.path, 'save_' + str(epoch) + '.h5'))
            _, oldsaves = scan_dir(self.path, '.h5')
            for save in oldsaves:
                try:
                    if int(save.split('.')[-2].split('_')[-1]) < epoch:
                        os.remove(save)
                except:
                    continue
예제 #11
0
            # print("next state observation: ",observation[:3])###

            # Digitize the observation to get a state
            joint1_position, joint2_position, joint3_position  = observation[:3]
            nextState = build_state([to_bin(joint1_position, joint1_bins),
                            to_bin(joint2_position, joint2_bins),
                            to_bin(joint3_position, joint3_bins)])

            # print("nextState", nextState)
            # summary = tf.Summary(value=[tf.Summary.Value(tag="episode_reward",simple_value=episode_reward)])
            # summary_writer.add_summary(summary, i_episode * max_number_of_steps + t)

            if done:
                last_time_steps = numpy.append(last_time_steps, [int(t + 1)])
                break
            else:
                # Q-learn stuff
                #qlearn.learn(state, action, reward, nextState)
                qlearn.learn(state, action, reward, nextState, save_model_with_prefix, it)
                state = nextState

                it += 1 #####

        logger.log_scalar("episode_reward", episode_reward, i_episode)
        print("episode reward: ",episode_reward)

# l = last_time_steps.tolist()
# l.sort()
# print("Overall score: {:0.2f}".format(last_time_steps.mean()))
# print("Best 100 score: {:0.2f}".format(reduce(lambda x, y: x + y, l[-100:]) / len(l[-100:])))