def run(save_loc="ER_40spin/eco", graph_save_loc="_graphs/validation/ER_40spin_p15_100graphs.pkl", batched=True, max_batch_size=None): print("\n----- Running {} -----\n".format(os.path.basename(__file__))) #################################################### # NETWORK LOCATION #################################################### # info_str = "train_mpnn" date = datetime.datetime.now().strftime("%Y-%m") data_folder = os.path.join(save_loc, 'data') network_folder = os.path.join(save_loc, 'network') print("data folder :", data_folder) print("network folder :", network_folder) test_save_path = os.path.join(network_folder, 'test_scores.pkl') network_save_path = os.path.join(network_folder, 'network_best.pth') print("network params :", network_save_path) #################################################### # NETWORK SETUP #################################################### network_fn = MPNN network_args = { 'n_layers': 3, 'n_features': 64, 'n_hid_readout': [], 'tied_weights': False } #################################################### # SET UP ENVIRONMENTAL AND VARIABLES #################################################### gamma = 0.95 step_factor = 2 env_args = { 'observables': DEFAULT_OBSERVABLES, 'reward_signal': RewardSignal.BLS, 'extra_action': ExtraAction.NONE, 'optimisation_target': OptimisationTarget.CUT, 'spin_basis': SpinBasis.BINARY, 'norm_rewards': True, 'memory_length': None, 'horizon_length': None, 'stag_punishment': None, 'basin_reward': 1. / 40, 'reversible_spins': True } #################################################### # LOAD VALIDATION GRAPHS #################################################### graphs_test = load_graph_set(graph_save_loc) #################################################### # SETUP NETWORK TO TEST #################################################### test_env = ising_env.make("SpinSystem", SingleGraphGenerator(graphs_test[0]), graphs_test[0].shape[0] * step_factor, **env_args) device = "cuda" if torch.cuda.is_available() else "cpu" torch.device(device) print("Set torch default device to {}.".format(device)) network = network_fn(n_obs_in=test_env.observation_space.shape[1], **network_args).to(device) network.load_state_dict(torch.load(network_save_path, map_location=device)) for param in network.parameters(): param.requires_grad = False network.eval() print( "Sucessfully created agent with pre-trained MPNN.\nMPNN architecture\n\n{}" .format(repr(network))) #################################################### # TEST NETWORK ON VALIDATION GRAPHS #################################################### results, results_raw, history = test_network(network, env_args, graphs_test, device, step_factor, return_raw=True, return_history=True, batched=batched, max_batch_size=max_batch_size) results_fname = "results_" + os.path.splitext( os.path.split(graph_save_loc)[-1])[0] + ".pkl" results_raw_fname = "results_" + os.path.splitext( os.path.split(graph_save_loc)[-1])[0] + "_raw.pkl" history_fname = "results_" + os.path.splitext( os.path.split(graph_save_loc)[-1])[0] + "_history.pkl" for res, fname, label in zip( [results, results_raw, history], [results_fname, results_raw_fname, history_fname], ["results", "results_raw", "history"]): save_path = os.path.join(data_folder, fname) res.to_pickle(save_path) print("{} saved to {}".format(label, save_path))
def run(save_loc="ER_100spin/s2v"): print("\n----- Running {} -----\n".format(os.path.basename(__file__))) #################################################### # SET UP ENVIRONMENTAL AND VARIABLES #################################################### gamma = 1 step_fact = 1 env_args = { 'observables': [Observable.SPIN_STATE], 'reward_signal': RewardSignal.DENSE, 'extra_action': ExtraAction.NONE, 'optimisation_target': OptimisationTarget.CUT, 'spin_basis': SpinBasis.BINARY, 'norm_rewards': True, 'memory_length': None, 'horizon_length': None, 'stag_punishment': None, 'basin_reward': None, 'reversible_spins': False } #################################################### # SET UP TRAINING AND TEST GRAPHS #################################################### n_spins_train = 100 train_graph_generator = RandomErdosRenyiGraphGenerator( n_spins=n_spins_train, p_connection=0.15, edge_type=EdgeType.DISCRETE) #### # Pre-generated test graphs #### graph_save_loc = "_graphs/testing/ER_100spin_p15_50graphs.pkl" graphs_test = load_graph_set(graph_save_loc) n_tests = len(graphs_test) test_graph_generator = SetGraphGenerator(graphs_test, ordered=True) #################################################### # SET UP TRAINING AND TEST ENVIRONMENTS #################################################### train_envs = [ ising_env.make("SpinSystem", train_graph_generator, int(n_spins_train * step_fact), **env_args) ] n_spins_test = train_graph_generator.get().shape[0] test_envs = [ ising_env.make("SpinSystem", test_graph_generator, int(n_spins_test * step_fact), **env_args) ] #################################################### # SET UP FOLDERS FOR SAVING DATA #################################################### data_folder = os.path.join(save_loc, 'data') network_folder = os.path.join(save_loc, 'network') mk_dir(data_folder) mk_dir(network_folder) # print(data_folder) network_save_path = os.path.join(network_folder, 'network.pth') test_save_path = os.path.join(network_folder, 'test_scores.pkl') loss_save_path = os.path.join(network_folder, 'losses.pkl') #################################################### # SET UP AGENT #################################################### nb_steps = 8000000 network_fn = lambda: MPNN(n_obs_in=train_envs[0].observation_space.shape[1 ], n_layers=3, n_features=64, n_hid_readout=[], tied_weights=False) agent = DQN( train_envs, network_fn, init_network_params=None, init_weight_std=0.01, double_dqn=True, clip_Q_targets=True, replay_start_size=1500, replay_buffer_size=10000, # 20000 gamma=gamma, # 1 update_target_frequency=2500, # 500 update_learning_rate=False, initial_learning_rate=1e-4, peak_learning_rate=1e-4, peak_learning_rate_step=20000, final_learning_rate=1e-4, final_learning_rate_step=200000, update_frequency=32, # 1 minibatch_size=64, # 128 max_grad_norm=None, weight_decay=0, update_exploration=True, initial_exploration_rate=1, final_exploration_rate=0.05, # 0.05 final_exploration_step=800000, # 40000 adam_epsilon=1e-8, logging=False, loss="mse", save_network_frequency=400000, network_save_path=network_save_path, evaluate=True, test_envs=test_envs, test_episodes=n_tests, test_frequency=50000, # 10000 test_save_path=test_save_path, test_metric=TestMetric.MAX_CUT, seed=None) print("\n Created DQN agent with network:\n\n", agent.network) ############# # TRAIN AGENT ############# start = time.time() agent.learn(timesteps=nb_steps, verbose=True) print(time.time() - start) agent.save() ############ # PLOT - learning curve ############ data = pickle.load(open(test_save_path, 'rb')) data = np.array(data) fig_fname = os.path.join(network_folder, "training_curve") plt.plot(data[:, 0], data[:, 1]) plt.xlabel("Training run") plt.ylabel("Mean reward") if agent.test_metric == TestMetric.ENERGY_ERROR: plt.ylabel("Energy Error") elif agent.test_metric == TestMetric.BEST_ENERGY: plt.ylabel("Best Energy") elif agent.test_metric == TestMetric.CUMULATIVE_REWARD: plt.ylabel("Cumulative Reward") elif agent.test_metric == TestMetric.MAX_CUT: plt.ylabel("Max Cut") elif agent.test_metric == TestMetric.FINAL_CUT: plt.ylabel("Final Cut") plt.savefig(fig_fname + ".png", bbox_inches='tight') plt.savefig(fig_fname + ".pdf", bbox_inches='tight') plt.clf() ############ # PLOT - losses ############ data = pickle.load(open(loss_save_path, 'rb')) data = np.array(data) fig_fname = os.path.join(network_folder, "loss") N = 50 data_x = np.convolve(data[:, 0], np.ones((N, )) / N, mode='valid') data_y = np.convolve(data[:, 1], np.ones((N, )) / N, mode='valid') plt.plot(data_x, data_y) plt.xlabel("Timestep") plt.ylabel("Loss") plt.yscale("log") plt.grid(True) plt.savefig(fig_fname + ".png", bbox_inches='tight') plt.savefig(fig_fname + ".pdf", bbox_inches='tight')
def run(save_loc="pretrained_agent/s2v", network_save_loc="experiments_new/pretrained_agent/networks/s2v/network_best_ER_200spin.pth", graph_save_loc="_graphs/benchmarks/ising_125spin_graphs.pkl", batched=True, max_batch_size=5): print("\n----- Running {} -----\n".format(os.path.basename(__file__))) #################################################### # FOLDER LOCATIONS #################################################### print("save location :", save_loc) print("network params :", network_save_loc) mk_dir(save_loc) #################################################### # NETWORK SETUP #################################################### network_fn = MPNN network_args = { 'n_layers': 3, 'n_features': 64, 'n_hid_readout': [], 'tied_weights': False } #################################################### # SET UP ENVIRONMENTAL AND VARIABLES #################################################### step_factor = 1 env_args = {'observables':[Observable.SPIN_STATE], 'reward_signal':RewardSignal.DENSE, 'extra_action':ExtraAction.NONE, 'optimisation_target':OptimisationTarget.CUT, 'spin_basis':SpinBasis.BINARY, 'norm_rewards':True, 'memory_length':None, 'horizon_length':None, 'stag_punishment':None, 'basin_reward':None, 'reversible_spins':False} #################################################### # LOAD VALIDATION GRAPHS #################################################### graphs_test = load_graph_set(graph_save_loc) #################################################### # SETUP NETWORK TO TEST #################################################### test_env = ising_env.make("SpinSystem", SingleGraphGenerator(graphs_test[0]), graphs_test[0].shape[0] * step_factor, **env_args) device = "cuda" if torch.cuda.is_available() else "cpu" torch.device(device) print("Set torch default device to {}.".format(device)) network = network_fn(n_obs_in=test_env.observation_space.shape[1], **network_args).to(device) network.load_state_dict(torch.load(network_save_loc, map_location=device)) for param in network.parameters(): param.requires_grad = False network.eval() print("Sucessfully created agent with pre-trained MPNN.\nMPNN architecture\n\n{}".format(repr(network))) #################################################### # TEST NETWORK ON VALIDATION GRAPHS #################################################### results, results_raw, history = test_network(network, env_args, graphs_test, device, step_factor, return_raw=True, return_history=True, n_attempts=50, batched=batched, max_batch_size=max_batch_size) results_fname = "results_" + os.path.splitext(os.path.split(graph_save_loc)[-1])[0] + ".pkl" results_raw_fname = "results_" + os.path.splitext(os.path.split(graph_save_loc)[-1])[0] + "_raw.pkl" history_fname = "results_" + os.path.splitext(os.path.split(graph_save_loc)[-1])[0] + "_history.pkl" for res, fname, label in zip([results, results_raw, history], [results_fname, results_raw_fname, history_fname], ["results", "results_raw", "history"]): save_path = os.path.join(save_loc, fname) res.to_pickle(save_path) print("{} saved to {}".format(label, save_path))