コード例 #1
0
ファイル: train_s2v.py プロジェクト: tomdbar/eco-dqn
def run(save_loc="ER_100spin/s2v"):

    print("\n----- Running {} -----\n".format(os.path.basename(__file__)))

    ####################################################
    # SET UP ENVIRONMENTAL AND VARIABLES
    ####################################################

    gamma = 1
    step_fact = 1

    env_args = {
        'observables': [Observable.SPIN_STATE],
        'reward_signal': RewardSignal.DENSE,
        'extra_action': ExtraAction.NONE,
        'optimisation_target': OptimisationTarget.CUT,
        'spin_basis': SpinBasis.BINARY,
        'norm_rewards': True,
        'memory_length': None,
        'horizon_length': None,
        'stag_punishment': None,
        'basin_reward': None,
        'reversible_spins': False
    }

    ####################################################
    # SET UP TRAINING AND TEST GRAPHS
    ####################################################

    n_spins_train = 100

    train_graph_generator = RandomErdosRenyiGraphGenerator(
        n_spins=n_spins_train, p_connection=0.15, edge_type=EdgeType.DISCRETE)

    ####
    # Pre-generated test graphs
    ####
    graph_save_loc = "_graphs/testing/ER_100spin_p15_50graphs.pkl"
    graphs_test = load_graph_set(graph_save_loc)
    n_tests = len(graphs_test)

    test_graph_generator = SetGraphGenerator(graphs_test, ordered=True)

    ####################################################
    # SET UP TRAINING AND TEST ENVIRONMENTS
    ####################################################

    train_envs = [
        ising_env.make("SpinSystem", train_graph_generator,
                       int(n_spins_train * step_fact), **env_args)
    ]

    n_spins_test = train_graph_generator.get().shape[0]
    test_envs = [
        ising_env.make("SpinSystem", test_graph_generator,
                       int(n_spins_test * step_fact), **env_args)
    ]

    ####################################################
    # SET UP FOLDERS FOR SAVING DATA
    ####################################################

    data_folder = os.path.join(save_loc, 'data')
    network_folder = os.path.join(save_loc, 'network')

    mk_dir(data_folder)
    mk_dir(network_folder)
    # print(data_folder)
    network_save_path = os.path.join(network_folder, 'network.pth')
    test_save_path = os.path.join(network_folder, 'test_scores.pkl')
    loss_save_path = os.path.join(network_folder, 'losses.pkl')

    ####################################################
    # SET UP AGENT
    ####################################################

    nb_steps = 8000000

    network_fn = lambda: MPNN(n_obs_in=train_envs[0].observation_space.shape[1
                                                                             ],
                              n_layers=3,
                              n_features=64,
                              n_hid_readout=[],
                              tied_weights=False)

    agent = DQN(
        train_envs,
        network_fn,
        init_network_params=None,
        init_weight_std=0.01,
        double_dqn=True,
        clip_Q_targets=True,
        replay_start_size=1500,
        replay_buffer_size=10000,  # 20000
        gamma=gamma,  # 1
        update_target_frequency=2500,  # 500
        update_learning_rate=False,
        initial_learning_rate=1e-4,
        peak_learning_rate=1e-4,
        peak_learning_rate_step=20000,
        final_learning_rate=1e-4,
        final_learning_rate_step=200000,
        update_frequency=32,  # 1
        minibatch_size=64,  # 128
        max_grad_norm=None,
        weight_decay=0,
        update_exploration=True,
        initial_exploration_rate=1,
        final_exploration_rate=0.05,  # 0.05
        final_exploration_step=800000,  # 40000
        adam_epsilon=1e-8,
        logging=False,
        loss="mse",
        save_network_frequency=400000,
        network_save_path=network_save_path,
        evaluate=True,
        test_envs=test_envs,
        test_episodes=n_tests,
        test_frequency=50000,  # 10000
        test_save_path=test_save_path,
        test_metric=TestMetric.MAX_CUT,
        seed=None)

    print("\n Created DQN agent with network:\n\n", agent.network)

    #############
    # TRAIN AGENT
    #############
    start = time.time()
    agent.learn(timesteps=nb_steps, verbose=True)
    print(time.time() - start)

    agent.save()

    ############
    # PLOT - learning curve
    ############
    data = pickle.load(open(test_save_path, 'rb'))
    data = np.array(data)

    fig_fname = os.path.join(network_folder, "training_curve")

    plt.plot(data[:, 0], data[:, 1])
    plt.xlabel("Training run")
    plt.ylabel("Mean reward")
    if agent.test_metric == TestMetric.ENERGY_ERROR:
        plt.ylabel("Energy Error")
    elif agent.test_metric == TestMetric.BEST_ENERGY:
        plt.ylabel("Best Energy")
    elif agent.test_metric == TestMetric.CUMULATIVE_REWARD:
        plt.ylabel("Cumulative Reward")
    elif agent.test_metric == TestMetric.MAX_CUT:
        plt.ylabel("Max Cut")
    elif agent.test_metric == TestMetric.FINAL_CUT:
        plt.ylabel("Final Cut")

    plt.savefig(fig_fname + ".png", bbox_inches='tight')
    plt.savefig(fig_fname + ".pdf", bbox_inches='tight')

    plt.clf()

    ############
    # PLOT - losses
    ############
    data = pickle.load(open(loss_save_path, 'rb'))
    data = np.array(data)

    fig_fname = os.path.join(network_folder, "loss")

    N = 50
    data_x = np.convolve(data[:, 0], np.ones((N, )) / N, mode='valid')
    data_y = np.convolve(data[:, 1], np.ones((N, )) / N, mode='valid')

    plt.plot(data_x, data_y)
    plt.xlabel("Timestep")
    plt.ylabel("Loss")

    plt.yscale("log")
    plt.grid(True)

    plt.savefig(fig_fname + ".png", bbox_inches='tight')
    plt.savefig(fig_fname + ".pdf", bbox_inches='tight')
コード例 #2
0
def run(save_loc="GBMLGG_100/s2v"):

    print("\n----- Running {} -----\n".format(os.path.basename(__file__)))

    ####################################################
    # SET UP ENVIRONMENTAL AND VARIABLES
    ####################################################

    gamma = 1
    step_fact = 1

    env_args = {
        'observables': [Observable.SPIN_STATE],
        'reward_signal': RewardSignal.DENSE,
        'extra_action': ExtraAction.NONE,
        'optimisation_target': OptimisationTarget.PVALUE,
        'spin_basis': SpinBasis.BINARY,
        'norm_rewards': False,
        'memory_length': None,
        'horizon_length': None,
        'stag_punishment': None,
        'basin_reward': None,
        'reversible_spins': False
    }

    ####################################################
    # SET UP TRAINING AND TEST GRAPHS
    ####################################################

    k = 4
    n_spins_train = k

    # train_graph_generator = RandomErdosRenyiGraphGenerator(n_spins=n_spins_train,p_connection=0.15,edge_type=EdgeType.DISCRETE)

    ####
    # Pre-generated test graphs
    ####
    # graph_save_loc = "/home2/wsdm/gyy/eco-dqn_v1/_graphs/testing/ER_200spin_p15_50graphs.pkl"
    # graphs_test = load_graph_set(graph_save_loc)
    # n_tests = len(graphs_test)
    n_tests = 2

    # test_graph_generator = SetGraphGenerator(graphs_test, ordered=True)

    ####################################################
    # SET UP TRAINING AND TEST ENVIRONMENTS
    ####################################################
    train_list = [('COADREAD', 100, 15), ('GBMLGG', 100, 6), ('STAD', 100, 15)]

    test_list = [
        'HNSC', 'ACC', 'LGG', 'KIPAN', 'UVM', 'CESC', 'BRCA', 'UCEC', 'OV',
        'DLBC', 'STAD', 'UCS', 'PRAD', 'CHOL', 'PAAD', 'TGCT', 'LUAD', 'STES',
        'GBMLGG', 'LIHC', 'BLCA', 'KIRC', 'KIRP', 'COAD', 'GBM', 'THCA',
        'READ', 'PCPG', 'COADREAD', 'LUSC', 'KICH', 'SARC'
    ]
    # test_list = [('GBMLGG', 400, 5), ('GBMLGG', 500, 5), ('GBMLGG', 600, 5), ('GBMLGG', 700, 5)]
    # test_list = ['STAD', 'GBMLGG', 'COADREAD']

    # mut_file_path = '/home2/wsdm/gyy/comet_v1/example_datasets/temp/{}_our_pnum={}.m2'
    mut_file_path = '/home2/wsdm/gyy/comet_v1/example_datasets/our/{}_our.m2'

    test_envs = [
        ising_env.make("SpinSystem",
                       mut_file_path.format(cancer_name),
                       int(n_spins_train * step_fact),
                       minFreq=5,
                       **env_args) for cancer_name in test_list
    ]
    '''
    test_envs = [ising_env.make("SpinSystem",
                                      mut_file_path.format(cancer_name, str(pnum)),
                                      int(n_spins_train*step_fact),
                                      minFreq=minfreq,
                                      **env_args) for cancer_name, pnum, minfreq in test_list]
    '''
    '''
    n_spins_test = train_graph_generator.get().shape[0]
    test_envs = [ising_env.make("SpinSystem",
                                mut_file_path,
                                int(n_spins_test*step_fact),
                                **env_args)]
    '''

    ####################################################
    # SET UP FOLDERS FOR SAVING DATA
    ####################################################

    data_folder = os.path.join(save_loc, 'data')
    network_folder = os.path.join(save_loc, 'network')

    mk_dir(data_folder)
    mk_dir(network_folder)
    # print(data_folder)
    network_save_path = os.path.join(network_folder, 'network.pth')
    test_save_path = os.path.join(network_folder, 'test_scores.pkl')
    loss_save_path = os.path.join(network_folder, 'losses.pkl')

    ####################################################
    # SET UP AGENT
    ####################################################

    nb_steps = 10000000

    network_fn = lambda: MPNN(n_obs_in_g=test_envs[0].observation_space.shape[
        1] + 1,
                              n_layers=2,
                              n_features=32,
                              n_hid_readout=[],
                              tied_weights=False)

    agent = DQN(
        test_envs,
        network_fn,
        init_network_params=None,
        init_weight_std=0.5,
        double_dqn=False,
        clip_Q_targets=True,
        replay_start_size=200,
        replay_buffer_size=3200,  # 20000
        gamma=gamma,  # 1
        update_target_frequency=10,  # 500
        update_learning_rate=True,
        initial_learning_rate=1e-2,
        peak_learning_rate=1e-2,
        peak_learning_rate_step=2000,
        final_learning_rate=1e-3,
        final_learning_rate_step=4000,
        update_frequency=4,  # 1
        minibatch_size=64,  # 128
        max_grad_norm=None,
        weight_decay=0,
        update_exploration=True,
        initial_exploration_rate=1,
        final_exploration_rate=0.1,  # 0.05
        final_exploration_step=10000,  # 40000
        adam_epsilon=1e-8,
        logging=False,
        loss="mse",
        save_network_frequency=4000,
        network_save_path=network_save_path,
        evaluate=True,
        test_envs=test_envs,
        test_episodes=n_tests,
        test_frequency=500,  # 10000
        test_save_path=test_save_path,
        test_metric=TestMetric.CUMULATIVE_REWARD,
        seed=None)

    print("\n Created DQN agent with network:\n\n", agent.network)

    #############
    # EVAL AGENT
    #############

    agent.load(
        '/home2/wsdm/gyy/eco-dqn_v2/experiments/GBMLGG_100/train/GBMLGG_100/s2v/network/network32000.pth'
    )
    agent.evaluate_agent()