Exemple #1
0
    def medicines(self, args):
        log.info("Export medicines")

        # Get filename
        if not "filename" in args:
            log.error("No filename provided")
            exit()

        # log.info("Func medicines")
        template_filename = "gui/templates/medicines_report.html"

        html_string = inventory.medicines.utils.export_html(template_filename)

        if "html" in args and args["html"]:
            try:
                with open(args["filename"].name + ".html", "w") as fdesc:
                    fdesc.write(html_string)
            except IOError as error:
                log.error("File not writable: %s", error)

        try:
            with open("gui/templates/report.css", "r") as fdesc:
                css_string = fdesc.read()
        except IOError as error:
            log.error("CSS file not readable: %s", error)
            exit()

        inventory.medicines.utils.export_pdf(
            pdf_filename=args["filename"].name,
            html_string=html_string,
            css_string=css_string)
Exemple #2
0
    def add_player(self, id_, name):

        # set first user as admin
        if not self.players:
            self.admin = id_

        # add player to list
        self.players.append([id_, name, 0])

        log.info("joined [%s] as [%s]" % (id_, name))
Exemple #3
0
 def add_bid(self, id_, bid):
     for player in self.players:
         if player[0] == id_:
             if player[2] < bid:
                 player[2] = bid
                 log.info("[%s][%s] changed bid from %s to %s" %
                          (player[0], player[1], player[2], bid))
                 yield
             else:
                 log.warning(
                     "[%s][%s] tried to bid from %s to %s. Not cool, bro. Not cool."
                     % (player[0], player[1], player[2], bid))
                 yield
     log.error("add_bid: could not find [%s] on list" % id_)
    log.infov('Policy optimization...' )

    policy.update_dataset_statistics(exp_data)
    for j in range(num_iter_policy):
        _, list_costs, list_moments = learn_policy_pilco(env, dynamics, policy, policy_optimizer, K=K, T= 1000, gamma=0.99,
                                                   moment_matching=True,   grad_norm = grad_clip, pre_prcess=True , shaping_state_delta= shaping_state_delta)

        # Loggings
        if (j + 1) % log_interval_policy == 1 or (j + 1) == args.num_iter_policy:

            loss_mean = torch.sum( torch.cat(list_costs)) .data.cpu().numpy()[0]
            grad_norm = _grad_norm(policy)
            log_str ='[Itr #{}/{} policy optim # {}/{} ]: loss mean: {:.5f},   grad norm:{:.3f}'

            log.info(log_str.format( (i+1),args.num_iter_algo,
                                  (j+1),args.num_iter_policy,
                                  loss_mean,   grad_norm ))

    cost_mean ,cost_std = test_episodic_cost2(env, policy,dynamics, N=5, T=T, render=True)
    log.info('Policy Test : # {}  cost mean {:.5f}  cost std {:.5f} '.format((i+1) ,cost_mean,cost_std ))

    # Execute system and record data
    for num in range(10):
        exp_data.push(rollout(env, policy, max_steps=T))
    
    # Save model
    save_dir = log_dir
    utils.save_net_param(policy, save_dir, name='policy_'+str(i))
    utils.save_net_param(dynamics, save_dir, name='dynamics_' + str(i))

    # Record data
Exemple #5
0
 def equipment(self, args):
     log.info("Equipment")
     if "list" in args:
         for item in inventory.models.Equipment.objects.all():
             print(item.pk, item)
         return
Exemple #6
0
 def molecule(self, args):
     log.info("Molecule")
     if "list" in args:
         for item in inventory.models.Molecule.objects.all():
             print(item.pk, item)
         return
Exemple #7
0
 def allowance(self, args):
     log.info("Allowance")
     if "list" in args:
         for item in inventory.models.Allowance.objects.all():
             print(item.pk, item.name)
         return
Exemple #8
0
def train_dynamics_model_pilco2(dynamics, dynamics_optimizer, trainset, epochs=1, batch_size=1, eval_fn=None,
                               logger=None, **kwargs):
    # Create dynamics and its optimizer
    dynamics.set_sampling(sampling=False)
    
    log.infov('Dynamics training...')
    # Loss
    #criterion = nn.MSELoss()  # MSE/SmoothL1
    
    dynamics.update_dataset_statistics(trainset)
    
    batch_size = trainset.data.shape[0] if trainset.data.shape[0] < batch_size else batch_size
    # Create Dataloader
    trainloader = Data.DataLoader(trainset, batch_size=batch_size, shuffle=True, drop_last=True)
    
    (_, _), (x_test, y_test) = load_data()
    (_, _), (x_test2, y_test2) = load_data(
        dir_name='/home/drl/PycharmProjects/DeployedProjects/deepPILCO/MB/data/log-test1.csv', data_num=1000)
    
    log.infov('Num of rollout: {} Data set size: {}'.format(len(trainset.buffer), trainset.data.shape[0]))
    #dynamics.set_sampling(sampling= False)
    dynamics.train()
    list_train_loss = []
    for epoch in range(epochs):  # Loop over dataset multiple times
        running_train_losses = []
        
        start_time = time.time()
        for i, data in enumerate(trainloader):  # Loop over batches of data
            # Get input batch
            X, Y = data
            
            
            # Loss
            loss  = dynamics.get_loss( X, Y, pre_prcess = kwargs['pre_process'])
            
            
            # Backward pass
            loss.backward()
            
            # Update params
            dynamics_optimizer.step()
            
            # Accumulate running losses
            running_train_losses.append(loss.data[0])  # Take out value from 1D Tensor
        
        # Record the mean of training and validation losses in the batch
        batch_train_loss = np.mean(running_train_losses)
        list_train_loss.append(batch_train_loss)
        
        time_duration = time.time() - start_time
        # Logging: Only first, middle and last
        
        if epoch % LOG_EVERY_N_EPOCH == 0:
            if dynamics.env.spec.id == 'HalfCheetah-v2':
                eval_mse = plot_train(x_test, y_test, dyn_model=dynamics, pre_process=kwargs['pre_process'], plot=False)
                eval_mse2 = plot_train(x_test2, y_test2, dyn_model=dynamics, pre_process=kwargs['pre_process'],
                                       plot=False)
            # log.info('[Epoch # {:3d} ({:.1f} s)] Train loss: {:.8f} Eval loss1: {:.8f} Eval loss2: {:.8f}'.format(epoch + 1, time_duration, batch_train_loss, eval_mse, eval_mse2))
            else:
                eval_mse = 0
                eval_mse2 = 0
            if logger is not None:
                logger.log({'epoch': epoch,
                            'time_duration': time_duration,
                            'Train loss': batch_train_loss,
                            'Eval loss': eval_mse,
                            'Eval loss_export': eval_mse2,
                            })
                logger.write(display=False)
        
        if epoch == 0 or epoch == epochs // 2 or epoch == epochs - 1 or epoch % 5 == 0:
            log.info(
                '[Epoch # {:3d} ({:.1f} s)] Train loss: {:.8f} Eval loss1: {:.8f} Eval loss2: {:.8f}'.format(epoch + 1,
                                                                                                             time_duration,
                                                                                                             batch_train_loss,
                                                                                                             eval_mse,
                                                                                                             eval_mse2))
        
        if epoch % PLOT_EVERY_N_EPOCH == 0:
            if kwargs['plot_train'] is not None:
                if callable(kwargs['plot_train']):
                    if epoch == 0:
                        plt.ion()
                    kwargs['plot_train'](dynamics)
    
    if logger is not None:
        logger.close()
    log.info('Finished training dynamics model. ')
    return np.array(list_train_loss)
Exemple #9
0
def train_dynamics_model_pilco(dynamics, dynamics_optimizer, trainset, epochs=1, batch_size=1, eval_fn=None,logger=None ,**kwargs):
   
   # Create dynamics and its optimizer
    dynamics.set_sampling(sampling=False)
    
    log.infov('Dynamics training...')
    # Loss
    criterion = nn.MSELoss()  # MSE/SmoothL1
    
    dynamics.update_dataset_statistics(trainset)
    
    batch_size = trainset.data.shape[0] if trainset.data.shape[0] < batch_size else batch_size
    # Create Dataloader
    trainloader = Data.DataLoader(trainset, batch_size=batch_size, shuffle=True, drop_last=True)


    
    log.infov('Num of rollout: {} Data set size: {}'.format(len(trainset.buffer), trainset.data.shape[0]))
    dynamics.train()
    list_train_loss = []
    for epoch in range(epochs):  # Loop over dataset multiple times
        running_train_losses = []
        
        start_time = time.time()
        for i, data in enumerate(trainloader):  # Loop over batches of data
            # Get input batch
            X, Y = data
            
            # Wrap data tensors as Variable and send to GPU
            X = Variable(X).cuda()
            Y = Variable(Y).cuda()
            
            # Zero out the parameter gradients
            dynamics_optimizer.zero_grad()
            
            # Forward pass
            outputs = dynamics.predict_Y(X, delta_target=True, pre_prcess=kwargs[
                'pre_process'])  # delta_target, return state difference for training
            
            # Loss
            loss = criterion(outputs, Y)
            M = Y.shape[0]
            N = X.shape[0]

           # loss = gaussian_log_likelihood(Y,outputs)
            
            reg = 0  # dropout_gp_kl(dynamics , input_lengthscale=1.0, hidden_lengthscale=1.0)

           # loss = -loss/M + reg/N
            # Backward pass
            loss.backward()
            
            # Update params
            dynamics_optimizer.step()
            
            # Accumulate running losses
            running_train_losses.append(loss.data[0])  # Take out value from 1D Tensor
        
        # Record the mean of training and validation losses in the batch
        batch_train_loss = np.mean(running_train_losses)
        list_train_loss.append(batch_train_loss)
        
        time_duration = time.time() - start_time
        # Logging: Only first, middle and last
    
        if epoch % LOG_EVERY_N_EPOCH == 0:
            
            if logger is not None:
                logger.log({'epoch': epoch,
                            'time_duration':time_duration,
                            'Train loss': batch_train_loss,
 
                            })
                logger.write(display=False)
            
        if epoch == 0 or epoch == epochs // 2 or epoch == epochs - 1:
            log.info(
                '[Epoch # {:3d} ({:.1f} s)] Train loss: {:.8f}  '.format(epoch + 1,
                                                                                                             time_duration,
                                                                                                             batch_train_loss))

        

    if logger is not None:
        logger.close()
    log.info('Finished training dynamics model. ')
    return np.array(list_train_loss)
Exemple #10
0
# data = np.concatenate((exp_data.buffer[0], exp_data.buffer[1],exp_data.buffer[2],exp_data.buffer[3],exp_data.buffer[4]), axis=0)
# exp_logger.log_table2csv(data)

for itr in range(n_iter_algo):
    reward_sums = []
    for n_mpc in range(N_MPC):
        data_MPC, reward_sum = MPC_rollout(env,
                                           mpc_controller,
                                           dynamics,
                                           horizon=max_timestep,
                                           render=False,
                                           use_prob=USE_PROB_PREDICT)
        exp_data.push(data_MPC)

        log.info(
            'itr {} : The num of sampling rollout : {} Accumulated Reward :{:.4f} '
            .format(itr, n_mpc, reward_sum))

        reward_sums.append(reward_sum)
    log.infov("Itr {}/{} Accumulated Reward: {:.4f}   ".format(
        itr, n_iter_algo,
        sum(reward_sums) / N_MPC))
    # Train dynamics
    train_dynamics_model_pilco(dynamics,
                               dynamics_optimizer,
                               exp_data,
                               epochs=num_itr_dyn,
                               batch_size=dyn_batch_size,
                               plot_train=None,
                               pre_process=pre_process)  # plot_train_ion
    # Save model
Exemple #11
0
from core.utils import log
from flask import Flask, render_template, request, redirect, url_for
from core.auction_house import AuctionHouse

agent = None

# mark new run in logfile
log.info("Defining flask application")

# define application
app = Flask(__name__, template_folder='templates')


# --------------------------------------------- ENDPOINTS ----------------------------------------------------------
# address for game logon
@app.route('/', methods=['GET', 'POST'])
def login():

    # check if login attempt or start page get
    if request.method == 'GET':
        return render_template('login.html')

    if request.method == 'POST':
        # check if player has logged in
        id_ = request.remote_addr
        name = request.form['name']
        if not agent.check_if_in(id_):
            # add player
            agent.add_player(id_=id_, name=name)
        # join the server
        return redirect(url_for('auction'))
Exemple #12
0
from core.utils.utils import _grad_norm
from core.my_envs.cartpole_swingup import *
from core import utils
from core.utils import log, logging_output

from my_envs.mujoco import *

torch.set_default_tensor_type('torch.Tensor')

env_name = 'HalfCheetahTrack-v2'
T = 1000

# Set up environment
env = gym.make(env_name)

# Create Policy
#policy = controller.BNNPolicyGRU(env, hidden_size=[64, 64,64], drop_prob=0.1, activation= 'relu') .cuda()
policy = controller.BNNPolicyGRU_PPO(env,
                                     hidden_size=[64, 64, 64],
                                     drop_prob=0.1,
                                     log_std=-2.5).cuda()

#policy_optimizer = optim.Adam(policy.parameters(), lr=lr_policy, weight_decay =1e-5 )  # 1e-2, RMSprop
policy.load_state_dict(
    torch.load(
        'log-files/HalfCheetahTrack-v2/Aug-06_21:43:53train._PILCO_lrp0.001_drop0.1-EXP_1_GRU_PPO/policy_9.pkl'
    ))

cost_mean, cost_std = test_episodic_cost2(env, policy, N=5, T=T, render=True)
log.info('Policy Test :  cost mean {:.5f}  cost std {:.5f} '.format(
    cost_mean, cost_std))