def medicines(self, args): log.info("Export medicines") # Get filename if not "filename" in args: log.error("No filename provided") exit() # log.info("Func medicines") template_filename = "gui/templates/medicines_report.html" html_string = inventory.medicines.utils.export_html(template_filename) if "html" in args and args["html"]: try: with open(args["filename"].name + ".html", "w") as fdesc: fdesc.write(html_string) except IOError as error: log.error("File not writable: %s", error) try: with open("gui/templates/report.css", "r") as fdesc: css_string = fdesc.read() except IOError as error: log.error("CSS file not readable: %s", error) exit() inventory.medicines.utils.export_pdf( pdf_filename=args["filename"].name, html_string=html_string, css_string=css_string)
def add_player(self, id_, name): # set first user as admin if not self.players: self.admin = id_ # add player to list self.players.append([id_, name, 0]) log.info("joined [%s] as [%s]" % (id_, name))
def add_bid(self, id_, bid): for player in self.players: if player[0] == id_: if player[2] < bid: player[2] = bid log.info("[%s][%s] changed bid from %s to %s" % (player[0], player[1], player[2], bid)) yield else: log.warning( "[%s][%s] tried to bid from %s to %s. Not cool, bro. Not cool." % (player[0], player[1], player[2], bid)) yield log.error("add_bid: could not find [%s] on list" % id_)
log.infov('Policy optimization...' ) policy.update_dataset_statistics(exp_data) for j in range(num_iter_policy): _, list_costs, list_moments = learn_policy_pilco(env, dynamics, policy, policy_optimizer, K=K, T= 1000, gamma=0.99, moment_matching=True, grad_norm = grad_clip, pre_prcess=True , shaping_state_delta= shaping_state_delta) # Loggings if (j + 1) % log_interval_policy == 1 or (j + 1) == args.num_iter_policy: loss_mean = torch.sum( torch.cat(list_costs)) .data.cpu().numpy()[0] grad_norm = _grad_norm(policy) log_str ='[Itr #{}/{} policy optim # {}/{} ]: loss mean: {:.5f}, grad norm:{:.3f}' log.info(log_str.format( (i+1),args.num_iter_algo, (j+1),args.num_iter_policy, loss_mean, grad_norm )) cost_mean ,cost_std = test_episodic_cost2(env, policy,dynamics, N=5, T=T, render=True) log.info('Policy Test : # {} cost mean {:.5f} cost std {:.5f} '.format((i+1) ,cost_mean,cost_std )) # Execute system and record data for num in range(10): exp_data.push(rollout(env, policy, max_steps=T)) # Save model save_dir = log_dir utils.save_net_param(policy, save_dir, name='policy_'+str(i)) utils.save_net_param(dynamics, save_dir, name='dynamics_' + str(i)) # Record data
def equipment(self, args): log.info("Equipment") if "list" in args: for item in inventory.models.Equipment.objects.all(): print(item.pk, item) return
def molecule(self, args): log.info("Molecule") if "list" in args: for item in inventory.models.Molecule.objects.all(): print(item.pk, item) return
def allowance(self, args): log.info("Allowance") if "list" in args: for item in inventory.models.Allowance.objects.all(): print(item.pk, item.name) return
def train_dynamics_model_pilco2(dynamics, dynamics_optimizer, trainset, epochs=1, batch_size=1, eval_fn=None, logger=None, **kwargs): # Create dynamics and its optimizer dynamics.set_sampling(sampling=False) log.infov('Dynamics training...') # Loss #criterion = nn.MSELoss() # MSE/SmoothL1 dynamics.update_dataset_statistics(trainset) batch_size = trainset.data.shape[0] if trainset.data.shape[0] < batch_size else batch_size # Create Dataloader trainloader = Data.DataLoader(trainset, batch_size=batch_size, shuffle=True, drop_last=True) (_, _), (x_test, y_test) = load_data() (_, _), (x_test2, y_test2) = load_data( dir_name='/home/drl/PycharmProjects/DeployedProjects/deepPILCO/MB/data/log-test1.csv', data_num=1000) log.infov('Num of rollout: {} Data set size: {}'.format(len(trainset.buffer), trainset.data.shape[0])) #dynamics.set_sampling(sampling= False) dynamics.train() list_train_loss = [] for epoch in range(epochs): # Loop over dataset multiple times running_train_losses = [] start_time = time.time() for i, data in enumerate(trainloader): # Loop over batches of data # Get input batch X, Y = data # Loss loss = dynamics.get_loss( X, Y, pre_prcess = kwargs['pre_process']) # Backward pass loss.backward() # Update params dynamics_optimizer.step() # Accumulate running losses running_train_losses.append(loss.data[0]) # Take out value from 1D Tensor # Record the mean of training and validation losses in the batch batch_train_loss = np.mean(running_train_losses) list_train_loss.append(batch_train_loss) time_duration = time.time() - start_time # Logging: Only first, middle and last if epoch % LOG_EVERY_N_EPOCH == 0: if dynamics.env.spec.id == 'HalfCheetah-v2': eval_mse = plot_train(x_test, y_test, dyn_model=dynamics, pre_process=kwargs['pre_process'], plot=False) eval_mse2 = plot_train(x_test2, y_test2, dyn_model=dynamics, pre_process=kwargs['pre_process'], plot=False) # log.info('[Epoch # {:3d} ({:.1f} s)] Train loss: {:.8f} Eval loss1: {:.8f} Eval loss2: {:.8f}'.format(epoch + 1, time_duration, batch_train_loss, eval_mse, eval_mse2)) else: eval_mse = 0 eval_mse2 = 0 if logger is not None: logger.log({'epoch': epoch, 'time_duration': time_duration, 'Train loss': batch_train_loss, 'Eval loss': eval_mse, 'Eval loss_export': eval_mse2, }) logger.write(display=False) if epoch == 0 or epoch == epochs // 2 or epoch == epochs - 1 or epoch % 5 == 0: log.info( '[Epoch # {:3d} ({:.1f} s)] Train loss: {:.8f} Eval loss1: {:.8f} Eval loss2: {:.8f}'.format(epoch + 1, time_duration, batch_train_loss, eval_mse, eval_mse2)) if epoch % PLOT_EVERY_N_EPOCH == 0: if kwargs['plot_train'] is not None: if callable(kwargs['plot_train']): if epoch == 0: plt.ion() kwargs['plot_train'](dynamics) if logger is not None: logger.close() log.info('Finished training dynamics model. ') return np.array(list_train_loss)
def train_dynamics_model_pilco(dynamics, dynamics_optimizer, trainset, epochs=1, batch_size=1, eval_fn=None,logger=None ,**kwargs): # Create dynamics and its optimizer dynamics.set_sampling(sampling=False) log.infov('Dynamics training...') # Loss criterion = nn.MSELoss() # MSE/SmoothL1 dynamics.update_dataset_statistics(trainset) batch_size = trainset.data.shape[0] if trainset.data.shape[0] < batch_size else batch_size # Create Dataloader trainloader = Data.DataLoader(trainset, batch_size=batch_size, shuffle=True, drop_last=True) log.infov('Num of rollout: {} Data set size: {}'.format(len(trainset.buffer), trainset.data.shape[0])) dynamics.train() list_train_loss = [] for epoch in range(epochs): # Loop over dataset multiple times running_train_losses = [] start_time = time.time() for i, data in enumerate(trainloader): # Loop over batches of data # Get input batch X, Y = data # Wrap data tensors as Variable and send to GPU X = Variable(X).cuda() Y = Variable(Y).cuda() # Zero out the parameter gradients dynamics_optimizer.zero_grad() # Forward pass outputs = dynamics.predict_Y(X, delta_target=True, pre_prcess=kwargs[ 'pre_process']) # delta_target, return state difference for training # Loss loss = criterion(outputs, Y) M = Y.shape[0] N = X.shape[0] # loss = gaussian_log_likelihood(Y,outputs) reg = 0 # dropout_gp_kl(dynamics , input_lengthscale=1.0, hidden_lengthscale=1.0) # loss = -loss/M + reg/N # Backward pass loss.backward() # Update params dynamics_optimizer.step() # Accumulate running losses running_train_losses.append(loss.data[0]) # Take out value from 1D Tensor # Record the mean of training and validation losses in the batch batch_train_loss = np.mean(running_train_losses) list_train_loss.append(batch_train_loss) time_duration = time.time() - start_time # Logging: Only first, middle and last if epoch % LOG_EVERY_N_EPOCH == 0: if logger is not None: logger.log({'epoch': epoch, 'time_duration':time_duration, 'Train loss': batch_train_loss, }) logger.write(display=False) if epoch == 0 or epoch == epochs // 2 or epoch == epochs - 1: log.info( '[Epoch # {:3d} ({:.1f} s)] Train loss: {:.8f} '.format(epoch + 1, time_duration, batch_train_loss)) if logger is not None: logger.close() log.info('Finished training dynamics model. ') return np.array(list_train_loss)
# data = np.concatenate((exp_data.buffer[0], exp_data.buffer[1],exp_data.buffer[2],exp_data.buffer[3],exp_data.buffer[4]), axis=0) # exp_logger.log_table2csv(data) for itr in range(n_iter_algo): reward_sums = [] for n_mpc in range(N_MPC): data_MPC, reward_sum = MPC_rollout(env, mpc_controller, dynamics, horizon=max_timestep, render=False, use_prob=USE_PROB_PREDICT) exp_data.push(data_MPC) log.info( 'itr {} : The num of sampling rollout : {} Accumulated Reward :{:.4f} ' .format(itr, n_mpc, reward_sum)) reward_sums.append(reward_sum) log.infov("Itr {}/{} Accumulated Reward: {:.4f} ".format( itr, n_iter_algo, sum(reward_sums) / N_MPC)) # Train dynamics train_dynamics_model_pilco(dynamics, dynamics_optimizer, exp_data, epochs=num_itr_dyn, batch_size=dyn_batch_size, plot_train=None, pre_process=pre_process) # plot_train_ion # Save model
from core.utils import log from flask import Flask, render_template, request, redirect, url_for from core.auction_house import AuctionHouse agent = None # mark new run in logfile log.info("Defining flask application") # define application app = Flask(__name__, template_folder='templates') # --------------------------------------------- ENDPOINTS ---------------------------------------------------------- # address for game logon @app.route('/', methods=['GET', 'POST']) def login(): # check if login attempt or start page get if request.method == 'GET': return render_template('login.html') if request.method == 'POST': # check if player has logged in id_ = request.remote_addr name = request.form['name'] if not agent.check_if_in(id_): # add player agent.add_player(id_=id_, name=name) # join the server return redirect(url_for('auction'))
from core.utils.utils import _grad_norm from core.my_envs.cartpole_swingup import * from core import utils from core.utils import log, logging_output from my_envs.mujoco import * torch.set_default_tensor_type('torch.Tensor') env_name = 'HalfCheetahTrack-v2' T = 1000 # Set up environment env = gym.make(env_name) # Create Policy #policy = controller.BNNPolicyGRU(env, hidden_size=[64, 64,64], drop_prob=0.1, activation= 'relu') .cuda() policy = controller.BNNPolicyGRU_PPO(env, hidden_size=[64, 64, 64], drop_prob=0.1, log_std=-2.5).cuda() #policy_optimizer = optim.Adam(policy.parameters(), lr=lr_policy, weight_decay =1e-5 ) # 1e-2, RMSprop policy.load_state_dict( torch.load( 'log-files/HalfCheetahTrack-v2/Aug-06_21:43:53train._PILCO_lrp0.001_drop0.1-EXP_1_GRU_PPO/policy_9.pkl' )) cost_mean, cost_std = test_episodic_cost2(env, policy, N=5, T=T, render=True) log.info('Policy Test : cost mean {:.5f} cost std {:.5f} '.format( cost_mean, cost_std))