def get_images(self): # pdb.set_trace() depth_image1 = self.depth_images[self.frame_id] t = self.task[self.frame_id] self.depth_time = utils.get_time_str(t) self.thermal_time = utils.get_time_str(t) return depth_image1
def test(args, model, test_loader, epoch, start_time, log_file, train_epochs, train_losses, train_accuracy, valid_epochs, valid_losses, valid_accuracy, lr_change): model.eval() test_loss = 0 correct = 0 with torch.no_grad(): counter = 0 for data, target in test_loader: data, target = data.to(args.device), target.to(args.device) output = model(data) test_loss += nn.NLLLoss(reduction='sum')(output, target).item() # sum up batch loss pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability counter += len(pred) correct += pred.eq(target.view_as(pred)).sum().item() test_loss /= counter test_accuracy = correct/counter valid_epochs.append(epoch) valid_losses.append(test_loss) valid_accuracy.append(test_accuracy) # Get time elapsed curr_time = time.time() curr_time_str, elapsed_str = utils.get_time_str(start_time, curr_time) log = '\n[{}] : Elapsed [{}] : Epoch {}:\tVALIDATION Loss: {:.4f}, Accuracy: {:.4f} ({}/{})\n'.format( curr_time_str, elapsed_str, epoch, test_loss, test_accuracy, correct, counter) print(log) log_file.write(log) log_file.flush() utils.make_plots(args.out_path, train_epochs, train_losses, train_accuracy, valid_epochs, valid_losses, valid_accuracy, lr_change)
def run_2_agents_easy(num=1): """one trained agent, one non-trained agent opt['domain'] = domain """ from Domains import RCCarBarriers # agent_paths = [] # for i in range(3): # p = train_agent("params/Car/", i) # agent_paths.append(p) # print agent_paths agent_paths = [ './Results/Mixed_ActionsB/agent0/Aug16_03-49-898192', './Results/Mixed_ActionsB/agent2/Aug16_03-50-036324', ] result_path = "./Results/CarMixed2/combine/" + get_time_str() for i in range(1, 1 + num): exp = generate_meta_experiment(i, agent_paths[:1], result_path, expdomain=RCCarBarriers, unique=False, max_episode=2000) exp.run(visualize_steps=0, visualize_performance=0, debug_on_sigurg=True) # exp.plot() exp.save() print result_path dom = exp.domain return result_path
def run_2_agents_barriers_3(num=1): from Domains import RCCarBarrier_2 # agent_paths = [] # for i in range(3): # p = train_agent("params/Car/", i) # agent_paths.append(p) # print agent_paths agent_paths = [ './Results/Mixed_ActionsB/agent0/Aug16_03-49-898192', './Results/Mixed_ActionsB/agent1/Aug16_03-50-596823', './Results/Mixed_ActionsB/agent2/Aug16_03-50-036324', './Results/Mixed_ActionsB/agent2/Aug16_03-50-036324', './Results/Mixed_ActionsB/agent2/Aug16_03-50-036324', './Results/Mixed_ActionsB/agent2/Aug16_03-50-036324' ] result_path = "./Results/CarMixed2/combine/" + get_time_str() for i in range(1, 1 + num): exp = generate_meta_experiment(i, agent_paths, result_path, expdomain=RCCarBarrier_2, unique=False) exp.run(visualize_steps=0, visualize_performance=0, debug_on_sigurg=True) # exp.plot() exp.save() print result_path dom = exp.domain return result_path
def run_4_agents(num=1): from Domains import GridWorld4 # agent_paths = [] # for i in range(4): # p = train_agent("params/gridworld/", i) # agent_paths.append(p) # print agent_paths agent_paths = [ './Results/Mixed_Actions3/agent0/Aug02_11-11-650788', './Results/Mixed_Actions3/agent1/Aug02_11-11-866788', './Results/Mixed_Actions3/agent2/Aug02_11-11-932882', './Results/Mixed_Actions3/agent3/Aug02_11-12-741840' ] result_path = "./Results/Mixed4/combine/" + get_time_str() for i in range(1, 1 + num): exp = generate_meta_experiment(i, agent_paths, result_path, expdomain=GridWorld4, unique=False) exp.run(visualize_steps=0, debug_on_sigurg=True) exp.save() print result_path
def run_2_slideturn_turn(num=1): # agent_paths = [] # for i in range(2): # p = train_agent("params/Car/", i) # agent_paths.append(p) # print agent_paths agent_paths = [ './Results/Mixed_ActionsB/agent0/Aug21_11-38-389943', './Results/Mixed_ActionsB/agent1/Aug21_11-43-003799' ] result_path = "./Results/CarSlideTurn/combine/" + get_time_str() for i in range(1, 1 + num): exp = generate_meta_experiment(i, agent_paths, result_path, max_episode=1000, expdomain=RCCarRightTurn, unique=False) exp.run(visualize_steps=0, visualize_performance=0, debug_on_sigurg=True) # exp.plot() exp.save() print result_path
def run_2_slideturn_mixed(num=1): """One agent trained on RightTurn, other on LeftSlide - comparing on stitched domain""" from Domains import RCCarSlideTurn #, RCCarSlideInvert # agent_paths = [] # for i in range(2): # p = train_agent("params/Car/", i) # agent_paths.append(p) # print agent_paths agent_paths = [ './Results/Mixed_ActionsB/agent0/Aug21_11-38-389943', './Results/Mixed_ActionsB/agent1/Aug21_11-43-003799' ] result_path = "./Results/CarSlideTurn/localrbf/increasedres/" + get_time_str( ) for i in range(1, 1 + num): exp = generate_meta_experiment(i, agent_paths, result_path, max_episode=1000, expdomain=RCCarSlideTurn, unique=False) exp.run(visualize_steps=0, visualize_performance=0, debug_on_sigurg=True) # exp.plot() exp.save() print result_path
def get_server_stat(api_url): r = requests.get(api_url) stats = utils.load_json_str(r.text) update_time_unix = utils.str_to_num(stats.get('updated')) if not update_time_unix: update_time = 'unknown' else: update_time = utils.get_time_str(update_time_unix) + ' UTC' try: stat = stats.get('servers')[4] except TypeError: stat_str = 'unknown' else: network_rx, rx_measure = utils.select_max_measure( utils.str_to_num(stat.get('network_rx'))) network_tx, tx_measure = utils.select_max_measure( utils.str_to_num(stat.get('network_tx'))) network_in = utils.str_to_num(stat.get('network_in')) // utils.GB network_out = utils.str_to_num(stat.get('network_out')) // utils.GB memory_total = utils.str_to_num(stat.get('memory_total')) // 1024 memory_used = utils.str_to_num(stat.get('memory_used')) // 1024 stat_str = 'mem: %dM / %dM\nnetwork: %.1f%s / %.1f%s\nbandwith: %dG / %dG\n' % ( memory_used, memory_total, network_tx, tx_measure, network_rx, rx_measure, network_out, network_in) return stat_str, update_time
def _evaluate(experiment, model_setup, session, eval_losses, losses_to_record): loss_records_eval = {loss: [] for loss in losses_to_record} print("\n%s ------- EVALUATING %s -------" % (utils.get_time_str(), model_setup.model_name)) print("\t\t" + "\t".join([l for l in losses_to_record])) eval_iter = 0 # Loops until the iterator ends while True: # Record losses every x iterations try: cur_val_losses = session.run([eval_losses[l_name] for l_name in losses_to_record]) except tf.errors.OutOfRangeError: break to_print = [eval_iter] + cur_val_losses print_form = "Iter%04d:" + "\t%.2f" * len(losses_to_record) print(print_form % tuple(to_print)) # Store losses for later display for i, loss_name in enumerate(losses_to_record): loss_records_eval[loss_name].append(cur_val_losses[i]) eval_iter += 1 return loss_records_eval
def save_once(step, pring_log=True): save_path = os.path.join(ckpt_dir, get_time_str()) saver_all.save(sess=sess, save_path=save_path, global_step=step, write_meta_graph=False) if pring_log: print('save:', save_path) return save_path
def generate_meta_experiment(exp_id, agent_paths, path, unique=True, expdomain=None): opt = {} if unique: opt["path"] = os.path.join(path, get_time_str()) else: opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 10000 opt["num_policy_checks"] = 40 opt["checks_per_policy"] = 20 # start_at = np.array([4, 6]) agents = load_all_agents(agent_paths, pretrained=True, load_confidence=True) for a in agents: a.policy.epsilon = 0 if expdomain: actual_domain = expdomain(mapname=mapname, terrain_augmentation=False, noise=0.1) else: actual_domain = GridWorldMixed(mapname=mapname, terrain_augmentation=False, noise=0.1) domain = PolicyMixer(actual_domain, agents) representation = Tabular(domain) policy = eGreedy(representation) # , tau=.1) opt['agent'] = Q_Learning(policy, representation, discount_factor=0.9, initial_learn_rate=0.8, lambda_=0.5, learn_rate_decay_mode='boyan', boyan_N0=2380) opt['domain'] = domain experiment = Experiment(**opt) path_join = lambda s: os.path.join(opt["path"], s) if not os.path.exists(opt["path"]): os.makedirs(opt["path"]) shutil.copy(inspect.getsourcefile(inspect.currentframe()), path_join("experiment.py")) return experiment
def crawl_vtvprograms(start_date, end_date): url_fmt = "https://vtv.vn/lich-phat-song-ngay-{}-thang-{}-nam-{}.htm" title_prefix = "Xem truyền hình trực tiếp kênh" data = {} for date in utils.generate_datetime_objs(start_date, end_date): data_by_date = {} # Crawl single page day, month, year = utils.get_specific_fmt_time(date) # print("(day={}, month={}, year={})".format(day, month, year)) url = url_fmt.format(day, month, year) root = html.document_fromstring(requests.get(url).content) # Find channels titles = root.xpath("//ul[@class = 'list-channel']//a/@title") channels = [title[len(title_prefix) + 1:] for title in titles] # print(channels) # Find programs program_elms = root.xpath("//div[@id = 'wrapper']/ul[@class = 'programs']") for program_elm, channel in zip(program_elms, channels): # print("Channel = ", channel) li_elms = program_elm.xpath("./li") program_list = [] for li in li_elms: duration = li.xpath("@duration")[0] or '' start_time = li.cssselect("span.time")[0].text or '' program_name = li.cssselect("span.title")[0].text or '' program_genre = li.cssselect("a.genre") if program_genre is None or len(program_genre) == 0: program_genre = '' else: # print(html.tostring(program_genre[0], encoding='utf-8')) program_genre = program_genre[0].text # print(program_genre) # print("Start time = {}, duration = {}, program = {}".format(start_time, duration, program)) program_list.append(DEFAULT_DELIMITER.join([start_time, program_name, program_genre])) data_by_date.update({channel: program_list}) date_str = utils.get_time_str(date) data.update({date_str: data_by_date}) print("Crawl broadcast schedule of date {} done".format(date_str)) # break return data
def train(args, model, train_loader, optimizer, epoch, start_time, log_file, train_epochs, train_losses, train_accuracy, valid_epochs, valid_losses, valid_accuracy, lr_change): model.train() for batch_idx, (data, target) in enumerate(train_loader): # Get data data, target = data.to(args.device), target.to(args.device) # Get model output optimizer.zero_grad() output = model(data) # Calc loss loss = nn.NLLLoss()(output, target) # Backprop loss.backward() optimizer.step() # Log, Plot if (epoch*len(train_loader) + batch_idx) % args.log_interval == 0: # Check loss, accuracy train_epochs.append(epoch + batch_idx/len(train_loader)) train_losses.append(loss.item()) pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability train_accuracy.append(pred.eq(target.view_as(pred)).sum().item()/len(pred)) # Get time elapsed curr_time = time.time() curr_time_str, elapsed_str = utils.get_time_str(start_time, curr_time) # Log log = '[{}] : Elapsed [{}]: Epoch: {} [{}/{} ({:.0f}%)]\tTRAIN Loss: {:.6f}\tAccuracy: {:.4f}\n'.format( curr_time_str, elapsed_str, epoch, batch_idx, len(train_loader), 100.*batch_idx/len(train_loader), train_losses[-1], train_accuracy[-1]) print(log) log_file.write(log) log_file.flush() utils.mem_check() utils.make_plots(args.out_path, train_epochs, train_losses, train_accuracy, valid_epochs, valid_losses, valid_accuracy, lr_change) # Save models if (epoch*len(train_loader) + batch_idx) % args.model_save_interval == 0: model_name = os.path.join(args.out_path, 'model_epoch_{:04d}_batch_{:05d}_of_{:05d}.pth'.format(epoch, batch_idx, len(train_loader))) print("Saving model", model_name) torch.save(model.state_dict(), model_name)
def report_label_distribution(name: str, labels: List[Dict[int, int]], label_name_map: Dict[int, str], report_count: int) -> None: logname = os.path.join('reports', utils.get_time_str(), 'distr_{}'.format(name)) report = '{} set label distribution:\n\ntotal: {}\n'.format( name, sum(map(len, labels))) counter = Counter[int](utils.flatten(l.values() for l in labels)) most_common = counter.most_common(report_count) table_entries = [(count, label_name_map[key]) for (key, count) in most_common] report += tabulate(table_entries, headers=['Count', 'Label']) + '\n' utils.write(report, logname)
def send(self): while True: msg = self.msg_queue.get() time_str = get_time_str() print('[send email]', time_str, msg) body = f"<html><body><h1>{time_str}</h1><p>{msg}</p></body></html>" # ------------------------------------------------------------------------------------------------- msg = MIMEText(body, 'html', 'utf-8') msg['From'] = self._format_addr('TraderMonitor <%s>' % self.from_addr) msg['To'] = self._format_addr('<%s>' % self.to_addr) msg['Subject'] = Header('sfl_trader', 'utf-8').encode() server = smtplib.SMTP_SSL(self.smtp_server, 465) # server.set_debuglevel(1) server.login(self.from_addr, self.password) server.sendmail(self.from_addr, [self.to_addr], msg.as_string()) server.quit()
def init_logger(self, log_dir=None, level=logging.INFO): """Init the logger. Args: log_dir(str, optional): Log file directory. If not specified, no log file will be used. level (int or str): See the built-in python logging module. Returns: :obj:`~logging.Logger`: Python logger. """ logger = get_logger(level, name=__name__) if log_dir: filename = '{}.log'.format(get_time_str()) log_file = osp.join(log_dir, filename) logger_file_handler(logger, log_file, level=level) return logger
def generate_multinomial_experiment(exp_id, agent_paths, path, unique=True, expdomain=None): opt = {} if unique: opt["path"] = os.path.join(path, get_time_str()) else: opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 8000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 1 agents = load_all_agents(agent_paths, pretrained=True, load_confidence=True) for a in agents: assert type(a.policy).__name__ == "DynamicsConfidenceGreedy" a.policy.epsilon = 0 if expdomain: domain = expdomain(mapname=mapname, noise=0.1) else: domain = GridWorldMixed(mapname=mapname, noise=0.1) representation = IncrementalTabular(domain) policy = MAgentMultinomial(representation, agents) # , tau=.1) print "$" * 10 print "You are currently running {}".format(policy.__class__) opt['agent'] = NoopAgent(representation=representation, policy=policy) opt['domain'] = domain experiment = Experiment(**opt) path_join = lambda s: os.path.join(opt["path"], s) if not os.path.exists(opt["path"]): os.makedirs(opt["path"]) shutil.copy(inspect.getsourcefile(inspect.currentframe()), path_join("experiment.py")) return experiment
def send(self): while True: jsondata = self.msg_queue.get() msg = jsondata['msg'] to_addr = jsondata['to'] time_str = get_time_str() print('[send email]', time_str, msg) body = f"<html><body>{msg}</body></html>" # ------------------------------------------------------------------------------------------------- msg = MIMEText(body, 'html', 'utf-8') msg['From'] = self._format_addr("{name} {address}".format(name=self.from_name, address=self.from_addr)) msg['To'] = ','.join(to_addr) msg['Subject'] = Header(self.subject, 'utf-8').encode() msg.add_header('reply-to', self.reply_to) server = smtplib.SMTP_SSL(self.smtp_server, self.smtp_port) server.set_debuglevel(1) server.login(self.from_addr, self.password) server.sendmail(self.from_addr, to_addr, msg.as_string()) server.quit()
def init_logger(self, log_dir=None, level=logging.INFO): """Init the logger. Args: log_dir(str, optional): Log file directory. If not specified, no log file will be used. level (int or str): See the built-in python logging module. Returns: :obj:`~logging.Logger`: Python logger. """ logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s', level=level) logger = logging.getLogger(__name__) if log_dir and self.rank == 0: filename = '{}.log'.format(get_time_str()) log_file = osp.join(log_dir, filename) self._add_file_handler(logger, log_file, level=level) return logger
def login(username, passwd): print("Trying to login... " + utils.get_time_str()) data = dict(username=username, password=passwd) s = requests.Session() response = s.post(const.login_url, data=data, headers=const.headers, timeout=5) reply = response.json() reply_code = reply["reply_code"] if reply_code == 1: print("User {} logged in successfully!".format( reply["userinfo"]["username"])) elif reply_code == 6: print("User {} has logged in!".format(reply["userinfo"]["username"])) elif reply_code == 3: print("Authentication Failed!") else: print(reply)
def make_experiment(agent_paths=["./"], sublearning=False, exp_id=3, path="./Results/Confidence2/", temp=0.10517212721767522, discount_factor=0.7, lambda_=0.0, init_state=None): opt = {} opt["path"] = os.path.join(path, get_time_str()) opt["exp_id"] = exp_id opt["max_steps"] = 40000 opt["num_policy_checks"] = 10 opt["checks_per_policy"] = 20 # start_at = np.array([4, 6]) agents = load_all_agents(agent_paths, pretrained=True, load_confidence=True) for i, a in enumerate(agents): # import ipdb; ipdb.set_trace() a.policy.epsilon = i * 0.02 + 0.1 # a.learn_rate_decay_mode = 'boyan' # a.learn_rate = a.initial_learn_rate = 0.9 # a.boyan_N0 = 3000 a.learn_rate_decay_mode = 'dabney' domain = RCCarModified(noise=0.1, init_state=(-2, 0.8, 0, 2.5)) representation = IncrementalTabular(domain) policy = MultiAgentConfidence(representation, agents, tau=.1) print "$" * 10 print "You are currently running {}".format(policy.__class__) opt['agent'] = MetaAgent(representation=representation, policy=policy, learn_rate_decay_mode="const") opt['domain'] = domain experiment = Experiment(**opt) return experiment
def report_rates_on_epoch(self, label: str, epno: int, batch_results: BatchResult, report_params: ReportParameters) -> None: report_str = 'Total #preds: {}\n'.format(batch_results.total_weight) true_pred = batch_results.weighted_true_preds false_miss = batch_results.weighted_n_labs - batch_results.weighted_true_preds false_pred = batch_results.weighted_n_preds - batch_results.weighted_true_preds # true_miss = (batch_results.total_weight - batch_results.weighted_n_labs) - false_pred report_for_i = lambda i: list( map(lambda x: x[i], (report_params.label_name_map, true_pred, false_miss, false_pred))) report_str += tabulate( list(map(report_for_i, range(report_params.top_k))), headers=['Label', '#Correct', '#Missed', '#Falsely Predicted']) report_str += '\n' utils.write( report_str, os.path.join('reports', utils.get_time_str(), 'epoch_{}_{}'.format(epno, label)))
def inference(model, data_loader, dataset_name, device='cuda', output_folder=None, expected_results=(), expected_results_sigma_tol=4): device = torch.device(device) num_devices = get_world_size() logger = logging.getLogger("RetinaNet.inference") dataset = data_loader.dataset logger.info("Start evaluation on {} dataset({} images).".format(dataset_name, len(dataset))) total_timer = Timer() inference_timer = Timer() total_timer.tic() predictions = compute_on_dataset(model, data_loader, device, inference_timer) # wait for all processes to complete before measuring the time synchronize() total_time = total_timer.toc() total_time_str = get_time_str(total_time) logger.info( "Total run time: {} ({} s / img per device, on {} devices)".format( total_time_str, total_time * num_devices / len(dataset), num_devices ) ) predictions = accumulate_predictions_from_multiple_gpus(predictions) if not is_main_process(): return if output_folder: torch.save(predictions, os.path.join(output_folder, "predictions.pth")) extra_args = dict( expected_results=expected_results, expected_results_sigma_tol=expected_results_sigma_tol, ) return evaluate(dataset=dataset, predictions=predictions, output_folder=output_folder, **extra_args)
def __init__(self, final_results_path="./Results/StartTimeExperimentMaha/", subagent_path_root='./GeneratedAgents/RCCarConfidenceMaha/', seed=1): self.seed = seed self.logger = logging.getLogger("StartingExperiment") self.all_agent_paths = [] self.delay = 10000 # max_steps after beginning meta experiments self.max_start_time = 10000 self.num_policy_checks = 25 # how many start times we evaluate self.meta_iter_per_check = 200 # for the meta agents self.results = [] self.final_results_path = os.path.join(final_results_path, get_time_str()) try: os.makedirs(self.final_results_path) except Exception: print "[ERR]: Path not created" pass self.subagent_path_root = subagent_path_root steps = self.max_start_time / self.num_policy_checks self.start_times = np.r_[steps:self.max_start_time + 1:steps]
def generate_meta_experiment(exp_id, agent_paths, path, unique=True, expdomain=None, max_episode=5000): opt = {} if unique: opt["path"] = os.path.join(path, get_time_str()) else: opt["path"] = path opt["exp_id"] = exp_id # opt["max_steps"] = 50000 opt["max_episode"] = max_episode opt["num_policy_checks"] = 50 opt["checks_per_policy"] = 1 # start_at = np.array([4, 6]) agents = load_all_agents(agent_paths, pretrained=True, load_confidence=False) for a in agents: a.policy.epsilon = 0 a.policy.turnOffExploration() if expdomain: actual_domain = expdomain(noise=0.1) else: actual_domain = RCCarModified(noise=0.1) domain = RCPolicyMixer(actual_domain, agents) # representation = MahaRBF(domain, # num_rbfs=3000, # # state_dimensions=np.array([0,1,3]), # const_feature=False, # resolution_min=21, # resolution_max=21, # include_border=True, # normalize=True, # seed=exp_id) # representation = RandomLocalBases(domain, gaussian_kernel, # num=100, # normalization=True, # resolution_max=20, # seed=exp_id) representation = NonparametricLocalBases(domain, gaussian_kernel, normalization=True) policy = eGreedy(representation, 0.05) #, epsilon=0.1) # , tau=.1) # policy = GibbsPolicy(representation) opt['agent'] = Q_Learning(policy, representation, discount_factor=0.8, initial_learn_rate=.8, lambda_=0.1, learn_rate_decay_mode='boyan', boyan_N0=500) opt['domain'] = domain experiment = ExperimentMod(**opt) path_join = lambda s: os.path.join(opt["path"], s) if not os.path.exists(opt["path"]): os.makedirs(opt["path"]) shutil.copy(inspect.getsourcefile(inspect.currentframe()), path_join("experiment.py")) return experiment
def save_index_maps(index_maps: graph_construction.IndexMaps) -> None: path = os.path.join(utils.DIRNAME, 'index_maps') os.makedirs(path, exist_ok=True) with open(os.path.join(path, utils.get_time_str()), 'wb') as f: pickle.dump(index_maps, f)
def make_experiment(exp_id=1, path="./Results/Temp/MetaHP/", unique=False, max_episode=300, num_rbfs=4000, initial_learn_rate=0.9, lambda_=0.7, resolution=20): opt = {} if unique: opt["path"] = os.path.join(path, get_time_str()) else: opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 30000 # opt["max_episode"] = max_episode opt["num_policy_checks"] = 10 opt["checks_per_policy"] = 1 # start_at = np.array([4, 6]) from Domains import RCCarSlideTurn expdomain = Domains.RCCarSlideTurn agent_paths = [ '/home/jarvis/work/clipper/models/rl/Results/Mixed_ActionsB/agent0/Aug21_11-38-389943', '/home/jarvis/work/clipper/models/rl/Results/Mixed_ActionsB/agent1/Aug21_11-43-003799' ] agents = load_all_agents(agent_paths, pretrained=True, load_confidence=False) for a in agents: a.policy.epsilon = 0 # a.policy.turnOffExploration() if expdomain: actual_domain = expdomain(noise=0.) else: actual_domain = RCCarModified(noise=0.1) domain = RCPolicyMixer(actual_domain, agents) representation = MahaRBF( domain, num_rbfs=int(num_rbfs), # state_dimensions=np.array([0,1,3]), const_feature=False, resolution_min=resolution, resolution_max=resolution, include_border=True, normalize=True, seed=exp_id) policy = eGreedy(representation) #, epsilon=0.1) # , tau=.1) opt['agent'] = Q_Learning(policy, representation, discount_factor=0.8, initial_learn_rate=initial_learn_rate, lambda_=lambda_, learn_rate_decay_mode='const') opt['domain'] = domain experiment = Experiment(**opt) # path_join = lambda s: os.path.join(opt["path"], s) # if not os.path.exists(opt["path"]): # os.makedirs(opt["path"]) # shutil.copy(inspect.getsourcefile(inspect.currentframe()), path_join("experiment.py")) return experiment
if __name__ == '__main__': # exp0 = make_experiment("params/gridworld/", yaml_file="agent0.yml", # result_path="./Results/Mixed/agent0", save=True) #TODO # assert mapname in exp0.domain.mapname, "Not using correct map!" # exp0.run(visualize_performance=0) # representation = exp0.agent.representation # representation.dump_to_directory(exp0.full_path) # TODO # result_path1 = "./Results/Mixed/agent1" # exp1 = make_experiment("params/gridworld/", yaml_file="agent1.yml", result_path=result_path1, save=True) # assert mapname in exp1.domain.mapname, "Not using correct map!" # exp1.run(visualize_performance=0) # representation = exp1.agent.representation # representation.dump_to_directory(exp1.full_path) # agent_paths = [exp0.full_path, exp1.full_path] # print agent_paths # agent_paths = ['./Results/Mixed/agent0/Jul26_05-40-200563', './Results/Mixed/agent1/Jul26_05-40-636595'] # 11x11 1 # agent_paths = ['./Results/Mixed/agent0/Jul29_02-39-566096', './Results/Mixed/agent1/Jul29_02-40-516981'] # 11x11 2 agent_paths = [ "./Results/Mixed/agent0/Jul29_02-48-334415/", "./Results/Mixed/agent1/Jul29_02-48-430882/" ] path = run_exp_trials(generate_meta_experiment, agent_paths, "./Results/Meta/Harder11x11maze/" + get_time_str()) print path # exp.run(visualize_performance=0, debug_on_sigurg=True) # exp.save() # exp.plot() # import joblib