def train(args, n_actors, batch_queue, prios_queue, param_queue): env = wrapper.make_atari(args.env) env = wrapper.wrap_atari_dqn(env, args) utils.set_global_seeds(args.seed, use_torch=True) model = DuelingDQN(env, args).to(args.device) # model.load_state_dict(torch.load('model_30h.pth')) tgt_model = DuelingDQN(env, args).to(args.device) tgt_model.load_state_dict(model.state_dict()) writer = SummaryWriter(comment="-{}-learner".format(args.env)) optimizer = torch.optim.Adam(model.parameters(), args.lr) # optimizer = torch.optim.RMSprop(model.parameters(), args.lr, alpha=0.95, eps=1.5e-7, centered=True) check_connection(n_actors) param_queue.put(model.state_dict()) learn_idx = 0 ts = time.time() tb_dict = { k: [] for k in ['loss', 'grad_norm', 'max_q', 'mean_q', 'min_q'] } while True: *batch, idxes = batch_queue.get() loss, prios, q_values = utils.compute_loss(model, tgt_model, batch, args.n_steps, args.gamma) grad_norm = utils.update_parameters(loss, model, optimizer, args.max_norm) prios_queue.put((idxes, prios)) batch, idxes, prios = None, None, None learn_idx += 1 tb_dict["loss"].append(float(loss)) tb_dict["grad_norm"].append(float(grad_norm)) tb_dict["max_q"].append(float(torch.max(q_values))) tb_dict["mean_q"].append(float(torch.mean(q_values))) tb_dict["min_q"].append(float(torch.min(q_values))) if args.soft_target_update: tau = args.tau for p_tgt, p in zip(tgt_model.parameters(), model.parameters()): p_tgt.data *= 1 - tau p_tgt.data += tau * p elif learn_idx % args.target_update_interval == 0: print("Updating Target Network..") tgt_model.load_state_dict(model.state_dict()) if learn_idx % args.save_interval == 0: print("Saving Model..") torch.save(model.state_dict(), "model.pth") if learn_idx % args.publish_param_interval == 0: param_queue.put(model.state_dict()) if learn_idx % args.tb_interval == 0: bps = args.tb_interval / (time.time() - ts) print("Step: {:8} / BPS: {:.2f}".format(learn_idx, bps)) writer.add_scalar("learner/BPS", bps, learn_idx) for k, v in tb_dict.items(): writer.add_scalar(f'learner/{k}', np.mean(v), learn_idx) v.clear() ts = time.time()
def maml_train(raw_phi_u, raw_phi_i, raw_phi_r, u_grad_list, i_grad_list, r_grad_list, global_lr): """ Update the global model parameters :param raw_phi_u: global user parameter :param raw_phi_i: global item parameter :param raw_phi_r: global rating parameter :param u_grad_list: list of user gradients :param i_grad_list: list of item gradients :param r_grad_list: list of rating gradients :param global_lr: global learning rate """ phi_u = update_parameters(raw_phi_u, u_grad_list, global_lr) phi_i = update_parameters(raw_phi_i, i_grad_list, global_lr) phi_r = update_parameters(raw_phi_r, r_grad_list, global_lr) return phi_u, phi_i, phi_r
def initiate_sced_model(day_idx, data_path,start, valid_id, gen_df, genth_df, bus_df,\ branch_df, ptdf_dict, wind_generator_names,\ margcost_df, blockmargcost_df, FlexibleRampFactor,load_scaling_factor, wind_scaling_factor,\ blockmargcost_dict, blockoutputlimit_dict,\ load_s_df, hourly_load_df,hourly_load_dict, input_mode): shift = 0 slot = 1 bus_slot_load_dict, slot_load_dict, hourly_load_dict, total_hourly_load_dict,\ hourly_load_df, genforren_dict, horizon, RampUpRequirement_dict, RampDnRequirement_dict = \ update_parameters(data_path,day_idx,start+shift, FlexibleRampFactor, load_scaling_factor, wind_scaling_factor, input_mode, 'real-time') slot_load_sced = slot_load_dict[1 + shift, slot] bus_slot_load_sced = extract_dictionary_for_sced(bus_slot_load_dict, 1, 2, 0, slot, start, shift) genforren_sced = extract_dictionary_for_sced(genforren_dict, 1, 2, 0, slot, start, shift) #print('sced genforren',genforren_sced) sced_model = build_sced_model(start, slot, valid_id, gen_df, genth_df, bus_df,\ branch_df, ptdf_dict, wind_generator_names,\ margcost_df, blockmargcost_df, FlexibleRampFactor,\ blockmargcost_dict, blockoutputlimit_dict,\ genforren_sced, load_s_df, hourly_load_df,\ hourly_load_dict,total_hourly_load_dict,\ slot_load_sced, bus_slot_load_sced) return sced_model
def train(args, n_actors, batch_queue, prios_queue, param_queue): env = RunTagEnv(width=5, height=5, number_of_subordinates=1, max_steps=1000) #env = wrapper.make_atari(args.env) #env = wrapper.wrap_atari_dqn(env, args) utils.set_global_seeds(args.seed, use_torch=True) model = DuelingDQN(env).to(args.device) tgt_model = DuelingDQN(env).to(args.device) tgt_model.load_state_dict(model.state_dict()) writer = SummaryWriter(comment="-{}-learner".format(args.env)) # optimizer = torch.optim.Adam(model.parameters(), args.lr) optimizer = torch.optim.RMSprop(model.parameters(), args.lr, alpha=0.95, eps=1.5e-7, centered=True) check_connection(n_actors) param_queue.put(model.state_dict()) learn_idx = 0 ts = time.time() while True: *batch, idxes = batch_queue.get() loss, prios = utils.compute_loss(model, tgt_model, batch, args.n_steps, args.gamma) grad_norm = utils.update_parameters(loss, model, optimizer, args.max_norm) print('Updated parameters!') prios_queue.put((idxes, prios)) batch, idxes, prios = None, None, None learn_idx += 1 writer.add_scalar("learner/loss", loss, learn_idx) writer.add_scalar("learner/grad_norm", grad_norm, learn_idx) if learn_idx % args.target_update_interval == 0: print("Updating Target Network..") tgt_model.load_state_dict(model.state_dict()) if learn_idx % args.save_interval == 0: print("Saving Model..") torch.save(model.state_dict(), "model.pth") if learn_idx % args.publish_param_interval == 0: param_queue.put(model.state_dict()) if learn_idx % args.bps_interval == 0: bps = args.bps_interval / (time.time() - ts) print("Step: {:8} / BPS: {:.2f}".format(learn_idx, bps)) writer.add_scalar("learner/BPS", bps, learn_idx) ts = time.time()
def train(self): utils.set_global_seeds(self.seed, use_torch=True) learn_idx = 0 while True: beta = self.beta_by_frame(learn_idx) states, actions, rewards, next_states, dones, weights, idxes = self.buffer.sample( self.batch_size, beta) states = torch.FloatTensor(states).to(self.device) actions = torch.LongTensor(actions).to(self.device) rewards = torch.FloatTensor(rewards).to(self.device) next_states = torch.FloatTensor(next_states).to(self.device) dones = torch.FloatTensor(dones).to(self.device) weights = torch.FloatTensor(weights).to(self.device) batch = (states, actions, rewards, next_states, dones, weights) loss, prios = utils.compute_loss(self.model, self.tgt_model, batch, self.n_step, self.gamma) self.scheduler.step() grad_norm = utils.update_parameters(loss, self.model, self.optimizer, self.max_norm) self.buffer.update_priorities(idxes, prios) batch, idxes, prios = None, None, None learn_idx += 1 self.writer.add_scalar("learner/loss", loss, learn_idx) self.writer.add_scalar("learner/grad_norm", grad_norm, learn_idx) if learn_idx % self.target_update_interval == 0: print("Updating Target Network..") self.tgt_model.load_state_dict(self.model.state_dict()) if learn_idx % self.save_interval == 0: print("Saving Model..") torch.save(self.model.state_dict(), "model{}.pth".format(learn_idx)) if learn_idx % self.publish_param_interval == 0: self.batch_recorder.set_worker_weights( copy.deepcopy(self.model)) if learn_idx >= self.max_step: torch.save(self.model.state_dict(), "model{}.pth".format(learn_idx)) self.batch_recorder.cleanup() break
def optimize(X, Y, a_prev, parameters, learning_rate=0.01): """ Execute one step of the optimization to train the model. Arguments: X -- list of integers, where each integer is a number that maps to a character in the vocabulary. Y -- list of integers, exactly the same as X but shifted one index to the left. a_prev -- previous hidden state. parameters -- python dictionary containing: Wax -- Weight matrix multiplying the input, numpy array of shape (n_a, n_x) Waa -- Weight matrix multiplying the hidden state, numpy array of shape (n_a, n_a) Wya -- Weight matrix relating the hidden-state to the output, numpy array of shape (n_y, n_a) b -- Bias, numpy array of shape (n_a, 1) by -- Bias relating the hidden-state to the output, numpy array of shape (n_y, 1) learning_rate -- learning rate for the model. Returns: loss -- value of the loss function (cross-entropy) gradients -- python dictionary containing: dWax -- Gradients of input-to-hidden weights, of shape (n_a, n_x) dWaa -- Gradients of hidden-to-hidden weights, of shape (n_a, n_a) dWya -- Gradients of hidden-to-output weights, of shape (n_y, n_a) db -- Gradients of bias vector, of shape (n_a, 1) dby -- Gradients of output bias vector, of shape (n_y, 1) a[len(X)-1] -- the last hidden state, of shape (n_a, 1) """ ### START CODE HERE ### # Forward propagate through time (≈1 line) loss, cache = rnn_forward(X, Y, a_prev, parameters) # Backpropagate through time (≈1 line) gradients, a = rnn_backward(X, Y, parameters, cache) # Clip your gradients between -5 (min) and 5 (max) (≈1 line) gradients = clip(gradients, maxValue=5) # Update parameters (≈1 line) parameters = update_parameters(parameters, gradients, learning_rate) ### END CODE HERE ### return loss, gradients, a[len(X) - 1]
def run_sced(day_idx,data_path,previous_dispatch,wind_scaling_factor, input_mode, sced_model,ha_instance, valid_id, gen_df, genth_df, bus_df, branch_df, ptdf_dict,\ wind_generator_names, margcost_df, blockmargcost_df, FlexibleRampFactor,\ blockmargcost_dict, blockoutputlimit_dict, load_s_df,slot_load_dict,\ hourly_load_df, hourly_load_dict,total_hourly_load_dict,\ bus_slot_load_dict, genforren_dict, start, slot, shift=0): """ sced_instance = run_sced(sced_model, ha_instance, valid_id, gen_df, genth_df, bus_df, branch_df, ptdf_dict,\ wind_generator_names, margcost_df, blockmargcost_df, FlexibleRampFactor,\ blockmargcost_dict, blockoutputlimit_dict, load_s_df,slot_load_dict,\ hourly_load_df, hourly_load_dict,total_hourly_load_dict,\ bus_slot_load_dict, genforren_dict, start, slot, shift=0) """ load_scaling_factor = 1 bus_slot_load_dict, slot_load_dict, hourly_load_dict, total_hourly_load_dict,\ hourly_load_df, genforren_dict, horizon, RampUpRequirement_dict, RampDnRequirement_dict = \ update_parameters(data_path,day_idx,start+shift, FlexibleRampFactor, load_scaling_factor, wind_scaling_factor, input_mode, 'real-time') #slot_load_sced = slot_load_dict[1+shift,slot] #bus_slot_load_sced = extract_dictionary_for_sced(bus_slot_load_dict, 1,2, 0, slot, start, shift) #genforren_sced = extract_dictionary_for_sced(genforren_dict, 1, 2, 0, slot, start, shift) sced_instance = sced_model.create_instance() #sced_instance = remove_param_constraints(sced_instance) sced_instance = reset_sced_parameters(previous_dispatch, ha_instance, sced_instance, bus_slot_load_dict, slot_load_dict, genforren_dict, start, slot, shift=0) #print(genforren_dict) sced_results = lp_solver.solve(sced_instance) #sced_instance = remove_param_constraints(sced_instance) for g in sced_instance.ThermalGenerators: previous_dispatch[g] = sced_instance.PowerGenerated[g].value #print(previous_dispatch) return sced_instance, sced_results, previous_dispatch
def da_input_data(wind_penetration,day_idx, data_path, FlexibleRampFactor, load_scaling_factor,\ start=1, input_mode='static', mode='day-ahead'): """ This function extracts and outputs all relevant input data for day-ahead market simulation Syntaxe: load_scaling_factor,start,valid_id, FlexibleRampFactor, ReserveFactor,\ RegulatingReserveFactor, gen_df, genth_df, bus_df, branch_df, ptdf_dict,\ wind_generator_names, margcost_df, blockmargcost_df, blockmargcost_dict,\ blockoutputlimit_dict, genforren_dict, load_s_df, hourly_load_df,\ hourly_load_dict, total_hourly_load_dict, slot_load_dict, RampUpRequirement_dict,\ RampDnRequirement_dict, bus_slot_load_dict, horizon =\ da_input_data(day_idx, data_path, data_path, day_idx,start,\ FlexibleRampFactor, load_scaling_factor, start=1, mode='day-ahead', input_mode='static') """ wind_scaling_factor, load_scaling_factor,start,valid_id, FlexibleRampFactor, ReserveFactor, RegulatingReserveFactor,\ gen_df, genth_df, bus_df, branch_df, ptdf_dict, \ wind_generator_names, margcost_df, blockmargcost_df,\ blockmargcost_dict, blockoutputlimit_dict, genforren_dict,\ load_s_df, hourly_load_df, hourly_load_dict, \ total_hourly_load_dict, slot_load_dict,\ RampUpRequirement_dict, RampDnRequirement_dict = \ get_model_input_data(start,day_idx, data_path, wind_penetration) bus_slot_load_dict, slot_load_dict, hourly_load_dict, total_hourly_load_dict,\ hourly_load_df, genforren_dict, horizon, RampUpRequirement_dict, RampDnRequirement_dict = \ update_parameters(data_path,day_idx,start, FlexibleRampFactor,\ load_scaling_factor, wind_scaling_factor, input_mode, 'day-ahead') return wind_scaling_factor, load_scaling_factor,start,valid_id, FlexibleRampFactor, ReserveFactor, RegulatingReserveFactor,\ gen_df, genth_df, bus_df, branch_df, ptdf_dict, \ wind_generator_names, margcost_df, blockmargcost_df,\ blockmargcost_dict, blockoutputlimit_dict, genforren_dict,\ load_s_df, hourly_load_df, hourly_load_dict,\ total_hourly_load_dict, slot_load_dict,\ RampUpRequirement_dict, RampDnRequirement_dict, bus_slot_load_dict, horizon
def train(args): dataset = omniglot(args.folder, shots=args.num_shots, ways=args.num_ways, shuffle=True, test_shots=15, meta_train=True, download=args.download) dataloader = BatchMetaDataLoader(dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) model = ConvolutionalNeuralNetwork(1, args.num_ways, hidden_size=args.hidden_size) model.to(device=args.device) model.train() meta_optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) # Training loop with tqdm(dataloader, total=args.num_batches) as pbar: for batch_idx, batch in enumerate(pbar): model.zero_grad() train_inputs, train_targets = batch['train'] train_inputs = train_inputs.to(device=args.device) train_targets = train_targets.to(device=args.device) test_inputs, test_targets = batch['test'] test_inputs = test_inputs.to(device=args.device) test_targets = test_targets.to(device=args.device) outer_loss = torch.tensor(0., device=args.device) accuracy = torch.tensor(0., device=args.device) for task_idx, (train_input, train_target, test_input, test_target) in enumerate( zip(train_inputs, train_targets, test_inputs, test_targets)): train_logit = model(train_input) inner_loss = F.cross_entropy(train_logit, train_target) model.zero_grad() params = update_parameters(model, inner_loss, step_size=args.step_size, first_order=args.first_order) test_logit = model(test_input, params=params) outer_loss += F.cross_entropy(test_logit, test_target) with torch.no_grad(): accuracy += get_accuracy(test_logit, test_target) outer_loss.div_(args.batch_size) accuracy.div_(args.batch_size) outer_loss.backward() meta_optimizer.step() pbar.set_postfix(accuracy='{0:.4f}'.format(accuracy.item())) if batch_idx >= args.num_batches: break # Save model if args.output_folder is not None: filename = os.path.join( args.output_folder, 'maml_omniglot_' '{0}shot_{1}way.pt'.format(args.num_shots, args.num_ways)) with open(filename, 'wb') as f: state_dict = model.state_dict() torch.save(state_dict, f)
def model(X, Y, learning_rate=0.3, num_iterations=30000, lambd=0, keep_prob=1): """ 使用三层网络,激活函数为:LINEAR->RELU->LINEAR->RELU->LINEAR->SIGMOID. 第一个隐层:20个神经元 第二个隐层:3个神经元 输出层:1个神经元 """ grads = {} costs = [] m = X.shape[1] layers_dims = [X.shape[0], 20, 3, 1] # 初始化网络参数 parameters = initialize_parameters(layers_dims) # 梯度下降循环逻辑 for i in range(0, num_iterations): # 前向传播计算 # LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SIGMOID. # 如果keep_prob=1,进行正常前向传播 # 如果keep_prob<1,说明需要进行droupout计算 if keep_prob == 1: a3, cache = forward_propagation(X, parameters) elif keep_prob < 1: a3, cache = forward_propagation_with_dropout(X, parameters, keep_prob) # 计算损失 # 如果传入lambd不为0,判断加入正则化 if lambd == 0: cost = compute_cost(a3, Y) else: cost = compute_cost_with_regularization(a3, Y, parameters, lambd) # 只允许选择一个,要么L2正则化,要么Droupout assert (lambd == 0 or keep_prob == 1) if lambd == 0 and keep_prob == 1: grads = backward_propagation(X, Y, cache) elif lambd != 0: grads = backward_propagation_with_regularization(X, Y, cache, lambd) elif keep_prob < 1: grads = backward_propagation_with_dropout(X, Y, cache, keep_prob) # 更新参数 parameters = update_parameters(parameters, grads, learning_rate) # 每10000词打印损失结果 if i % 10000 == 0: print("迭代次数为 {}: 损失结果大小:{}".format(i, cost)) costs.append(cost) # 画出损失变化结果图 plt.plot(costs) plt.ylabel('损失') plt.xlabel('迭代次数') plt.title("损失变化图,学习率为" + str(learning_rate)) plt.show() return parameters
def train(args): dataset = miniimagenet(args.folder, shots=args.num_shots, ways=args.num_ways, shuffle=True, test_shots=15, meta_train=True, download=args.download) dataloader = BatchMetaDataLoader(dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) model = ConvolutionalNeuralNetwork(3, 84, args.num_ways, hidden_size=args.hidden_size) model.to(device=args.device) model.train() meta_optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) # Training loop with tqdm(dataloader, total=args.num_batches) as pbar: for batch_idx, batch in enumerate(pbar): model.zero_grad() train_inputs, train_targets = batch['train'] train_inputs = train_inputs.to(device=args.device) train_targets = train_targets.to(device=args.device) test_inputs, test_targets = batch['test'] test_inputs = test_inputs.to(device=args.device) test_targets = test_targets.to(device=args.device) outer_loss = torch.tensor(0., device=args.device) accuracy = torch.tensor(0., device=args.device) for task_idx, (train_input, train_target, test_input, test_target) in enumerate( zip(train_inputs, train_targets, test_inputs, test_targets)): train_logit = model(train_input) inner_loss = F.cross_entropy(train_logit, train_target) # writer.add_scalar('Loss/inner_loss', np.random.random(), task_idx) grid = torchvision.utils.make_grid(train_input) writer.add_image('images', grid, 0) writer.add_graph(model, train_input) model.zero_grad() params = update_parameters(model, inner_loss, step_size=args.step_size, first_order=args.first_order) test_logit = model(test_input, params=params) outer_loss += F.cross_entropy(test_logit, test_target) # writer.add_scalar('Loss/outer_loss', np.random.random(), n_iter) for name, grads in model.meta_named_parameters(): writer.add_histogram(name, grads, batch_idx) with torch.no_grad(): accuracy += get_accuracy(test_logit, test_target) writer.add_histogram('meta parameters', grads, batch_idx) outer_loss.div_(args.batch_size) accuracy.div_(args.batch_size) outer_loss.backward() meta_optimizer.step() pbar.set_postfix(accuracy='{0:.4f}'.format(accuracy.item())) writer.add_scalar('Accuracy/test', accuracy.item(), batch_idx) if batch_idx >= args.num_batches: break writer.close() # Save model if args.output_folder is not None: filename = os.path.join( args.output_folder, 'maml_omniglot_' '{0}shot_{1}way.pt'.format(args.num_shots, args.num_ways)) with open(filename, 'wb') as f: state_dict = model.state_dict() torch.save(state_dict, f)
def run_hour_ahead_sequence(day_idx,data_path,result_path, previous_dispatch,input_mode, start,valid_id, FlexibleRampFactor, ReserveFactor,\ RegulatingReserveFactor, load_scaling_factor, wind_scaling_factor,\ gen_df, genth_df, bus_df, branch_df, ptdf_dict,\ wind_generator_names, margcost_df, blockmargcost_df,\ blockmargcost_dict, blockoutputlimit_dict, genforren_dict,\ load_s_df, hourly_load_df, hourly_load_dict,\ total_hourly_load_dict, slot_load_dict,\ RampUpRequirement_dict, RampDnRequirement_dict, wrp_status): #start=1 #slot =1 print('Updating parameters for hour-ahead model ...') update_time_init = time.time() bus_slot_load_dict, slot_load_dict, hourly_load_dict, total_hourly_load_dict,\ hourly_load_df, genforren_dict, horizon, RampUpRequirement_dict, RampDnRequirement_dict = \ update_parameters(data_path,day_idx,start, FlexibleRampFactor,\ load_scaling_factor, wind_scaling_factor, input_mode, mode='hour-ahead') print('Update done! Time: ', time.time() - update_time_init) print('Building hour-ahead model ...') ha_model_time_init = time.time() model2 = build_scuc_model(start,valid_id, FlexibleRampFactor, ReserveFactor, RegulatingReserveFactor,\ gen_df, genth_df, bus_df, branch_df, ptdf_dict, wind_generator_names,\ margcost_df, blockmargcost_df, blockmargcost_dict, blockoutputlimit_dict,\ genforren_dict, load_s_df, hourly_load_df, hourly_load_dict, total_hourly_load_dict,\ slot_load_dict, RampUpRequirement_dict, RampDnRequirement_dict, wrp_status['ha']) print('Done building hour-ahead model! Time:', time.time() - ha_model_time_init) print('Initiating SCED model ...') sced_model_time_init = time.time() sced_model = initiate_sced_model(day_idx,data_path,start,valid_id, gen_df, genth_df, bus_df,\ branch_df, ptdf_dict, wind_generator_names,\ margcost_df, blockmargcost_df, FlexibleRampFactor,load_scaling_factor, wind_scaling_factor,\ blockmargcost_dict, blockoutputlimit_dict,\ load_s_df, hourly_load_df,hourly_load_dict,input_mode) print('Done initiating SCED model! Time: ', time.time() - sced_model_time_init) sced_instance = sced_model.create_instance() obj_dict = dict() ha_obj = dict() rt_obj = dict() Hourly_FixedCost = dict() Hourly_ProductionCost = dict() Hourly_RampingCost = dict() Demand = [] SlotDemand = [] WindPowerGenerated = [] SlotWindPowerGenerated = [] WindPowerForecasted = [] TotalFlexRampRequired = [] TotalFlexRampDnRequired = [] WindTotalFlexRamp = [] WindTotalFlexRampDn = [] TotalFlexRampProvided = [] WindTotalCurtailments = [] WindCurtailments = [] WindFlexRamp = [] WindFlexRampDn = [] rt_demand = dict() rt_load_curtailment = dict() FuelType = get_fuel_type_list(list(gen_df.index)) Generation_by_fueltype = pd.DataFrame(0, index=range(1, 25), columns=FuelType) ha_previous_dispatch = dict() shadow_prices = dict() congestion_prices = dict() LMPs = dict() Dispatch = dict() for start in range(1, 23): print('********************************* Hour ', start, '**********************************') print('Updating input data for hour-ahead instance ...') ha_update_new_time_init = time.time() bus_slot_load_dict, slot_load_dict, hourly_load_dict, total_hourly_load_dict,\ hourly_load_df, genforren_dict, horizon, RampUpRequirement_dict, RampDnRequirement_dict = \ update_parameters(data_path,day_idx,start, FlexibleRampFactor, load_scaling_factor, wind_scaling_factor,\ input_mode, mode='hour-ahead') print('Done updating input data for hour-ahead instance! Time: ', time.time() - ha_update_new_time_init) print('Creating hour-ahead instance ...') ha_instance_time_init = time.time() ha_instance = model2.create_instance() print('Done creating hour-ahead instance! Time: ', time.time() - ha_instance_time_init) print('Resetting hour-ahead instance ...') ha_instance_reset_time_init = time.time() ha_instance = reset_instance(start, ha_previous_dispatch, ha_instance,\ slot_load_dict, hourly_load_dict, total_hourly_load_dict,\ hourly_load_df, genforren_dict,RampUpRequirement_dict,\ RampDnRequirement_dict) print('Done with hour-ahead instance reset! Time: ', time.time() - ha_instance_reset_time_init) print('Solving hour-ahead instance ...') t0 = time.time() results = mip_solver.solve(ha_instance) t1 = time.time() total_time = t1 - t0 print('Done solving hour-ahead instance! Time:', total_time) print('*** CONDITIONING THE HOUR-AHEAD RESULTS ***') misc_time_init = time.time() results.write(num=1) TotalRampCost = sum(ha_instance.RampingCost[t].value for t in ha_instance.TimePeriods) print('Objective: ', ha_instance.TotalProductionCost.value) ha_obj[start]={'fixed_cost': sum(ha_instance.StartupCost[g, 1].value + ha_instance.ShutdownCost[g, 1].value\ for g in ha_instance.ThermalGenerators),\ 'ramp_cost': ha_instance.RampingCost[1].value,\ 'prod_cost':sum(ha_instance.ProductionCost[g, 1].value\ for g in ha_instance.ThermalGenerators)} #print(value(ha_instance.TimeStart)) print('') #TotWindGen = [sum(ha_instance.PowerGenerated[g,t].value\ # for g in ha_instance.WindGenerators) for t in ha_instance.TimePeriods] #print('Total wind generation over this horizon: ',TotWindGen) #print('Total thermal generation: ', [sum(ha_instance.PowerGenerated[g,t].value\ # for g in ha_instance.ThermalGenerators) for t in ha_instance.TimePeriods]) #print('') #print('Thermal generation costs: ', [ha_instance.ProductionCost[g,t].value\ # for g in ha_instance.ThermalGenerators for t in ha_instance.TimePeriods]) #print('') #print('Ramping cost: ', [ha_instance.RampingCost[t].value for t in ha_instance.TimePeriods]) print('Total ramp cost: ', TotalRampCost) obj_dict[ start] = ha_instance.TotalFixedCost.value + ha_instance.TotalProductionCost.value + TotalRampCost Hourly_ProductionCost[start] = sum(ha_instance.ProductionCost[g, 1].value\ for g in ha_instance.ThermalGenerators) Hourly_RampingCost[start] = ha_instance.RampingCost[1].value Hourly_FixedCost[start] = sum(ha_instance.StartupCost[g, 1].value + ha_instance.ShutdownCost[g, 1].value\ for g in ha_instance.ThermalGenerators) for g in ha_instance.ThermalGenerators: ha_previous_dispatch[g] = ha_instance.PowerGenerated[g, 1].value #print('PREV: ',ha_previous_dispatch) Demand.append(value(ha_instance.Demand[1])) SlotDemand.append([ value(ha_instance.SlotDemand[1, s]) for s in ha_instance.TimeSlots ]) WindPowerGenerated.append( sum(ha_instance.PowerGenerated[g, 1].value for g in ha_instance.WindGenerators)) SlotWindPowerGenerated.append([sum(ha_instance.PowerGenerated[g,1].value\ for g in ha_instance.WindGenerators) for s in ha_instance.TimeSlots]) WindPowerForecasted.append([sum(ha_instance.PowerForecast[g,1,s].value\ for g in ha_instance.WindGenerators) for s in ha_instance.TimeSlots]) WindTotalFlexRamp.append([sum(ha_instance.FlexibleRampUpAvailable[g,1,s].value - ha_instance.WindRpCurtailment[g,1,s].value\ for g in ha_instance.WindGenerators) for s in ha_instance.TimeSlots]) WindTotalFlexRampDn.append([sum(ha_instance.FlexibleRampDnAvailable[g,1,s].value\ for g in ha_instance.WindGenerators) for s in ha_instance.TimeSlots]) WindFlexRamp.append([[ha_instance.FlexibleRampUpAvailable[g,1,s].value - ha_instance.WindRpCurtailment[g,1,s].value\ for s in ha_instance.TimeSlots] for g in ha_instance.WindGenerators ]) WindFlexRampDn.append([[ha_instance.FlexibleRampDnAvailable[g,1,s].value\ for s in ha_instance.TimeSlots] for g in ha_instance.WindGenerators ]) WindTotalCurtailments.append([sum(ha_instance.WindRpCurtailment[g,1,s].value\ for g in ha_instance.WindGenerators) for s in ha_instance.TimeSlots]) WindCurtailments.append([[ha_instance.WindRpCurtailment[g,1,s].value\ for s in ha_instance.TimeSlots] for g in ha_instance.WindGenerators ]) TotalFlexRampProvided.append([sum(ha_instance.FlexibleRampUpAvailable[g,1,s].value\ for g in ha_instance.ThermalGenerators|ha_instance.WindGenerators) for s in ha_instance.TimeSlots]) TotalFlexRampRequired.append([ ha_instance.FlexibleRampUpRequirement[1, s].value for s in ha_instance.TimeSlots ]) TotalFlexRampDnRequired.append([ ha_instance.FlexibleRampDnRequirement[1, s].value for s in ha_instance.TimeSlots ]) #print('actual bid for wind0:', ha_instance.PowerGenerated['wind0',1].value) for i in FuelType: iset = [x for x in ha_instance.AllGenerators if x.startswith(i)] t = start Generation_by_fueltype.loc[t, i] = sum( ha_instance.PowerGenerated[g, 1].value for g in iset) """New!!!""" print('*** End of Conditioning! Time: ', time.time() - misc_time_init, ' ***') print( '*** Starting Real-time Security Constrained Economic Dispatch ***' ) overall_sced_time_init = time.time() for slot in range(1, 7): print('Running SCED for slot ', slot, ' ...') sced_slot_time_init = time.time() sced_instance,sced_results,previous_dispatch =\ run_sced(day_idx,data_path,previous_dispatch,wind_scaling_factor, input_mode, sced_model,ha_instance, valid_id, gen_df, genth_df, bus_df,\ branch_df, ptdf_dict, wind_generator_names, margcost_df, blockmargcost_df,\ FlexibleRampFactor,blockmargcost_dict, blockoutputlimit_dict,\ load_s_df,slot_load_dict, hourly_load_df, hourly_load_dict,\ total_hourly_load_dict, bus_slot_load_dict, genforren_dict, start, slot, shift=0) LMPs, shadow_prices, congestion_prices =\ compute_LMPs(sced_instance, ptdf_dict, shadow_prices, congestion_prices,\ LMPs, start,slot,shift=0) rt_demand[start,slot] = [start, slot, value(sced_instance.SlotDemand), sum(value(sced_instance.PowerForecast[g])\ for g in sced_instance.WindGenerators)] rt_load_curtailment[start, slot] = [start, slot, sum(sced_instance.BusCurtailment[b].value\ for b in sced_instance.LoadBuses)] rt_obj[start,slot]={'curtailment_cost': sced_instance.TotalCurtailmentCost.value,\ 'prod_cost':sced_instance.TotalProductionCost.value} for g in sced_instance.AllGenerators: Dispatch[g, start, slot] = [ g, start, slot, sced_instance.PowerGenerated[g].value ] #print('passed bid',value(sced_instance.EnergyBid['wind0'])) print('Done with SCED and LMP computation for slot ', slot, '! Time: ', time.time() - sced_slot_time_init) # Write out LMPs for this hour and clear the LMP dictionary write_out_lmps_hourly(wrp_status, day_idx, result_path, LMPs, start) LMPs = dict() if start == 22: for i in range(1, 3): print('********************************* Hour ', start + i, ' **********************************') print('*** CONDITIONING THE HOUR-AHEAD RESULTS ***') misc_time_init = time.time() ha_obj[start+i]={'fixed_cost': sum(ha_instance.StartupCost[g, 1+i].value + ha_instance.ShutdownCost[g, 1].value\ for g in ha_instance.ThermalGenerators),\ 'ramp_cost': ha_instance.RampingCost[1+i].value,\ 'prod_cost':sum(ha_instance.ProductionCost[g, 1+i].value for g in ha_instance.ThermalGenerators)} Hourly_ProductionCost[start+i] = sum(ha_instance.ProductionCost[g, 1+i].value\ for g in ha_instance.ThermalGenerators) Hourly_RampingCost[start + i] = ha_instance.RampingCost[1 + i].value Hourly_FixedCost[start+i] = sum(ha_instance.StartupCost[g, 1+i].value + ha_instance.ShutdownCost[g, 1+1].value\ for g in ha_instance.ThermalGenerators) Demand.append(value(ha_instance.Demand[1 + i])) SlotDemand.append([value(ha_instance.SlotDemand[1+i,s])\ for s in ha_instance.TimeSlots] ) WindPowerGenerated.append(sum(ha_instance.PowerGenerated[g,1+i].value\ for g in ha_instance.WindGenerators)) SlotWindPowerGenerated.append([sum(ha_instance.PowerGenerated[g,1+1].value\ for g in ha_instance.WindGenerators) for s in ha_instance.TimeSlots]) WindPowerForecasted.append([sum(ha_instance.PowerForecast[g,1+i,s].value\ for g in ha_instance.WindGenerators) for s in ha_instance.TimeSlots]) WindTotalFlexRamp.append([sum(ha_instance.FlexibleRampUpAvailable[g,1+i,s].value - ha_instance.WindRpCurtailment[g,1+i,s].value\ for g in ha_instance.WindGenerators) for s in ha_instance.TimeSlots]) WindTotalFlexRampDn.append([sum(ha_instance.FlexibleRampDnAvailable[g,1+i,s].value\ for g in ha_instance.WindGenerators) for s in ha_instance.TimeSlots]) WindTotalCurtailments.append([sum(ha_instance.WindRpCurtailment[g,1+i,s].value\ for g in ha_instance.WindGenerators) for s in ha_instance.TimeSlots]) WindCurtailments.append([[ha_instance.WindRpCurtailment[g,1+i,s].value\ for s in ha_instance.TimeSlots] for g in ha_instance.WindGenerators ]) WindFlexRamp.append([[ha_instance.FlexibleRampUpAvailable[g,1+i,s].value - ha_instance.WindRpCurtailment[g,1+i,s].value\ for s in ha_instance.TimeSlots] for g in ha_instance.WindGenerators ]) WindFlexRampDn.append([[ha_instance.FlexibleRampDnAvailable[g,1+i,s].value\ for s in ha_instance.TimeSlots] for g in ha_instance.WindGenerators ]) TotalFlexRampProvided.append([sum(ha_instance.FlexibleRampUpAvailable[g,1+i,s].value\ for g in ha_instance.WindGenerators) for s in ha_instance.TimeSlots]) TotalFlexRampRequired.append([ha_instance.FlexibleRampUpRequirement[1+i,s].value\ for s in ha_instance.TimeSlots]) TotalFlexRampDnRequired.append([ha_instance.FlexibleRampDnRequirement[1+i,s].value\ for s in ha_instance.TimeSlots]) for j in FuelType: iset = [ x for x in ha_instance.AllGenerators if x.startswith(j) ] Generation_by_fueltype.loc[start + i, j] = sum( ha_instance.PowerGenerated[g, 1 + i].value for g in iset) for g in ha_instance.ThermalGenerators: ha_previous_dispatch[g] = ha_instance.PowerGenerated[ g, 1 + i].value print('*** End of Conditioning! Time: ', time.time() - misc_time_init, ' ***') """New!!!""" for slot in range(1, 7): print('Running SCED for slot ', slot, ' ...') sced_slot_time_init = time.time() sced_instance,sced_results,previous_dispatch =\ run_sced(day_idx,data_path,previous_dispatch,wind_scaling_factor, input_mode,sced_model,ha_instance, valid_id, gen_df, genth_df, bus_df, branch_df, ptdf_dict,\ wind_generator_names, margcost_df, blockmargcost_df, FlexibleRampFactor,\ blockmargcost_dict, blockoutputlimit_dict, load_s_df,slot_load_dict,\ hourly_load_df, hourly_load_dict,total_hourly_load_dict,\ bus_slot_load_dict, genforren_dict, start, slot, i) LMPs, shadow_prices, congestion_prices =\ compute_LMPs(sced_instance, ptdf_dict, shadow_prices,\ congestion_prices, LMPs, start,slot,i) rt_demand[start+i,slot] = [start+i, slot, value(sced_instance.SlotDemand),\ sum(value(sced_instance.PowerForecast[g])\ for g in sced_instance.WindGenerators)] rt_load_curtailment[start+i, slot] = [start+i, slot, sum(sced_instance.BusCurtailment[b].value\ for b in sced_instance.LoadBuses)] rt_obj[start+i,slot]={'curtailment_cost': sced_instance.TotalCurtailmentCost.value,\ 'prod_cost':sced_instance.TotalProductionCost.value} for g in sced_instance.AllGenerators: Dispatch[g, start + i, slot] = [ g, start + i, slot, sced_instance.PowerGenerated[g].value ] print('Done with SCED and LMP computation for slot ', slot, '! Time: ', time.time() - sced_slot_time_init) write_out_lmps_hourly(wrp_status, day_idx, result_path, LMPs, start, i) LMPs = dict() print('SCED done for the hour! Time: ', time.time() - overall_sced_time_init) print('Additional conditioning ...') additional_misc_time_init = time.time() WindFlexRamp_arr = np.array(WindFlexRamp).swapaxes(1, 2) WindFlexRampDn_arr = np.array(WindFlexRampDn).swapaxes(1, 2) #WindFlexRamp_arr = WindFlexRamp.swapaxes(1,2) nt, ns, nw = np.shape(WindFlexRamp_arr) WindFlexRamp = np.array( [WindFlexRamp_arr[t, s, :] for t in range(nt) for s in range(ns)]) WindFlexRampDn = np.array( [WindFlexRampDn_arr[t, s, :] for t in range(nt) for s in range(ns)]) WindCurtailments_arr = np.array(WindCurtailments).swapaxes(1, 2) nt, ns, nw = np.shape(WindCurtailments_arr) WindCurtailments = np.array( [WindCurtailments_arr[t, s, :] for t in range(nt) for s in range(ns)]) print('End of conditioning! Time: ', time.time() - additional_misc_time_init) return sced_instance, ha_instance, obj_dict, ha_obj, rt_obj, Demand, SlotDemand, WindPowerForecasted,\ WindPowerGenerated,SlotWindPowerGenerated,WindTotalFlexRamp,WindTotalFlexRampDn,\ TotalFlexRampRequired, TotalFlexRampDnRequired,TotalFlexRampProvided,\ Generation_by_fueltype, WindFlexRamp, WindFlexRampDn,Hourly_ProductionCost,\ Hourly_RampingCost, Hourly_FixedCost, WindTotalCurtailments, WindCurtailments,\ LMPs, shadow_prices, congestion_prices, Dispatch, rt_demand, rt_load_curtailment
def train(args, n_actors, batch_queue, prios_queue, param_queue): """ thread to fill parameter queue """ def _fill_param(): while True: model_dict = {} state_dict = model.state_dict() for k, v in state_dict.items(): model_dict[k] = v.cpu().numpy() param_queue.put(model_dict) env = wrapper.make_atari(args.env) env = wrapper.wrap_atari_dqn(env, args) utils.set_global_seeds(args.seed, use_torch=True) model = DuelingDQN(env).to(args.device) tgt_model = DuelingDQN(env).to(args.device) tgt_model.load_state_dict(model.state_dict()) writer = SummaryWriter(comment="-{}-learner".format(args.env)) # optimizer = torch.optim.Adam(model.parameters(), args.lr) optimizer = torch.optim.RMSprop(model.parameters(), args.lr, alpha=0.95, eps=1.5e-7, centered=True) model_dict = {} state_dict = model.state_dict() for k, v in state_dict.items(): model_dict[k] = v.cpu().numpy() param_queue.put(model_dict) threading.Thread(target=_fill_param).start() learn_idx = 0 ts = time.time() while True: #if batch_queue.empty(): # print("batch queue size:{}".format(batch_queue.qsize())) *batch, idxes = batch_queue.get() loss, prios = utils.compute_loss(model, tgt_model, batch, args.n_steps, args.gamma) grad_norm = utils.update_parameters(loss, model, optimizer, args.max_norm) prios_queue.put((idxes, prios)) batch, idxes, prios = None, None, None learn_idx += 1 if learn_idx % args.tensorboard_update_interval == 0: writer.add_scalar("learner/loss", loss, learn_idx) writer.add_scalar("learner/grad_norm", grad_norm, learn_idx) if learn_idx % args.target_update_interval == 0: print("Updating Target Network..") tgt_model.load_state_dict(model.state_dict()) if learn_idx % args.save_interval == 0: print("Saving Model..") torch.save(model.state_dict(), "model.pth") if learn_idx % args.publish_param_interval == 0: param_queue.get() if learn_idx % args.bps_interval == 0: bps = args.bps_interval / (time.time() - ts) print("Step: {:8} / BPS: {:.2f}".format(learn_idx, bps)) writer.add_scalar("learner/BPS", bps, learn_idx) ts = time.time()