コード例 #1
0
ファイル: ACCovContWorker.py プロジェクト: xcegin/RL-ADT-DP
    def __init__(self, name, globalAC, sess, global_rewards, global_episodes,
                 model_path):
        self.number = str(name)
        self.summary_writer = tf.summary.FileWriter("logs/train_" + str(name))
        self.name = "worker_" + str(name)
        self.global_rewards = global_rewards
        self.global_episodes = global_episodes
        self.increment = self.global_episodes.assign_add(1)

        self.model_path = model_path

        with open('vectors_cov.pkl', 'rb') as fh:
            self.embeddings, self.embed_lookup = pickle.load(fh)
            self.num_feats = len(self.embeddings[0])

        self.AC = ACNet(self.name, sess, self.num_feats,
                        globalAC)  # create ACNet for each worker
        self.sess = sess
        self.episode_rewards = []
        self.episode_coverages = []
        self.episode_lengths = []
        self.a_loss = []
        self.c_loss = []

        self.batcher = Batcher()
        self.env = Enviroment()
        self.avgFunctions = {}
コード例 #2
0
    def __init__(self, name, trainer, model_path, global_episodes, global_rewards):
        self.name = "worker_" + str(name)
        self.number = name
        self.model_path = model_path
        self.trainer = trainer
        self.global_rewards = global_rewards
        self.global_episodes = global_episodes
        self.increment = self.global_episodes.assign_add(1)
        self.episode_rewards = []
        self.episode_coverages = []
        self.episode_lengths = []
        self.episode_mean_values = []
        self.batcher = Batcher()
        self.summary_writer = tf.summary.FileWriter("logs/train_" + str(self.number))

        with open('vectors_cov.pkl', 'rb') as fh:
            self.embeddings, self.embed_lookup = pickle.load(fh)
            self.num_feats = len(self.embeddings[0])

        # Create the local copy of the network and the tensorflow op to copy global paramters to local network
        self.local_AC = AConv_Network(self.name, trainer, self.num_feats)
        self.update_local_ops = update_target_graph('global', self.name)
        self.avgFunctions = {}

        self.sleep_time = 0.028
        self.env = Enviroment()
コード例 #3
0
ファイル: sampe_all.py プロジェクト: xcegin/RL-ADT-DP
import pickle

from Environment.enviroment import Enviroment
from Environment.enviromentWalkerRedLabel import enviromentWalkerContext
from Vectorizer.Sample import SampleVisitor
from Vectorizer.train import learn_vectors

# Creates the embeddings for the given dataset we specified in the Enviroment class

env = Enviroment()
m = 0
Samples = []
while m < len(env.listOfFiles):
    env.prepareNextFile()
    for table in env.listOfTables:
        tableRows = []
        # Extract values for each column according to one row
        for row in table:
            dictForRow = env.mergeDictsInRow(row)
            sampleVisitor = SampleVisitor(enviromentWalkerContext(),
                                          dictForRow, env.expressions,
                                          env.rootTreeAdtNode.name)
            lists = sampleVisitor.traverse_tree(env.rootTreeAdtNode)
            lists = [x for x in lists if x != []]
            Samples += lists
    m += 1
embed_file = open('vectors_nextDate.pkl', 'wb')
lists = learn_vectors(Samples, 'Vectorizer/logs')
pickle.dump(lists, embed_file)
embed_file.close()
コード例 #4
0
ファイル: ACWorker.py プロジェクト: xcegin/RL-ADT-DP
    def work(self, max_episode_length, gamma, global_AC, sess, coord, saver):
        episode_count = sess.run(self.global_episodes)
        total_steps = 0
        print("Starting worker " + str(self.number))
        with sess.as_default(), sess.graph.as_default():
            while not coord.should_stop():
                sess.run(self.update_local_ops)
                self.env = Enviroment()
                episode_buffer = []
                episode_values = []
                episode_frames = []
                episode_reward = 0
                episode_coverage = 0
                max_c = 0
                max_r = 0
                episode_step_count = 0
                d = False
                m = 0
                # self.env.new_episode()

                while m < len(self.env.listOfFiles):
                    m += 1
                    self.env.prepareNextFileConv()
                    self.env.currentNumOfTable = 0
                    rnn_state = self.local_AC.state_init
                    self.batch_rnn_state = rnn_state
                    while self.env.currentNumOfTable < len(
                            self.env.listOfTables):
                        self.env.startTable()
                        self.env.currentNumOfRow = 0
                        for currentRow in self.env.listOfTableVectors[
                                self.env.currentNumOfTable - 1]:
                            nodes, children = traverse_tree_in_dfs_inorder(
                                currentRow, self.embeddings, self.embed_lookup,
                                0, [], [], -1)
                            gen = yield_some_stuff(nodes, children)
                            batches = list(enumerate(batch_samples(gen, 1)))
                            iterator = iter(batches)
                            batch = next(iterator, None)
                            if isinstance(batch[0], int):
                                num, batch = batch
                            nodes, children = batch
                            nodes = nodes[0]
                            complexity = getNumOfReasonableNodes(currentRow)
                            # complexity = int(complexity ** (1/3) * (len(self.env.listOfTables[0]) ** (1/3)))
                            complexity = int(complexity**(1 / 2))
                            if complexity == 0:
                                complexity = 1
                            for i in range(complexity):
                                toBeAppended = deepcopy(nodes)
                                nodes = nodes + toBeAppended
                            epF = 0
                            while epF < len(nodes):
                                s = nodes[epF]
                                #vectorMatrixWithCov = self.getCovVector()
                                #s = numpy.append(s, vectorMatrixWithCov)
                                # Take an action using probabilities from policy network output.
                                a_dist, v, rnn_state = sess.run(
                                    [
                                        self.local_AC.policy,
                                        self.local_AC.value,
                                        self.local_AC.state_out
                                    ],
                                    feed_dict={
                                        self.local_AC.inputs: [s],
                                        self.local_AC.state_in[0]:
                                        rnn_state[0],
                                        self.local_AC.state_in[1]: rnn_state[1]
                                    })
                                a = np.random.choice(a_dist[0], p=a_dist[0])
                                a = np.argmax(a_dist == a)

                                r, d, c = self.env.step(a, m - 1)
                                total_steps += 1

                                episode_buffer.append([s, a, r, s, d, v[0, 0]])
                                episode_values.append(v[0, 0])

                                total_steps += 1
                                episode_step_count += 1

                                if d or epF + 1 == len(nodes):
                                    # Since we don't know what the true final return is, we "bootstrap" from our current
                                    # value estimation.
                                    v1 = sess.run(
                                        self.local_AC.value,
                                        feed_dict={
                                            self.local_AC.inputs: [s],
                                            self.local_AC.state_in[0]:
                                            rnn_state[0],
                                            self.local_AC.state_in[1]:
                                            rnn_state[1]
                                        })[0, 0]
                                    v_l, p_l, e_l, g_n, v_n, adv = self.train(
                                        global_AC, episode_buffer, sess, gamma,
                                        v1)
                                    episode_buffer = []
                                    sess.run(self.update_local_ops)
                                if epF + 1 == len(nodes) or d:
                                    episode_reward += r
                                    episode_coverage += c
                                    max_r += 1
                                    max_c += 1
                                    break
                                epF += 1

                self.episode_rewards.append(episode_reward)
                self.episode_coverage.append(episode_coverage)
                self.episode_lengths.append(episode_step_count)
                self.episode_mean_values.append(np.mean(episode_values))

                # Update the network using the experience buffer at the end of the episode.
                if len(episode_buffer) != 0:
                    v_l, p_l, e_l, g_n, v_n, adv = self.train(
                        global_AC, episode_buffer, sess, gamma, 0.0)

                # Periodically save gifs of episodes, model parameters, and summary statistics.
                if episode_count % 2 == 0 and episode_count != 0:
                    if episode_count % 100 == 0 and self.name == 'worker_0':
                        saver.save(
                            sess, self.model_path + '/model-' +
                            str(episode_count) + '.cptk')
                        print("Saved Model")

                    mean_reward = np.mean(self.episode_rewards[-2:])
                    mean_coverage = np.mean(self.episode_coverage[-2:])
                    mean_length = np.mean(self.episode_lengths[-2:])
                    mean_value = np.mean(self.episode_mean_values[-2:])

                    summary = tf.Summary()
                    summary.value.add(tag='Perf/Reward',
                                      simple_value=float(mean_reward))
                    summary.value.add(tag='Perf/Coverage',
                                      simple_value=float(mean_coverage))
                    summary.value.add(tag='Perf/Length',
                                      simple_value=float(mean_length))
                    summary.value.add(tag='Perf/Value',
                                      simple_value=float(mean_value))
                    summary.value.add(tag='Losses/Value Loss',
                                      simple_value=float(v_l))
                    summary.value.add(tag='Losses/Policy Loss',
                                      simple_value=float(p_l))
                    summary.value.add(tag='Losses/Entropy',
                                      simple_value=float(e_l))
                    summary.value.add(tag='Losses/Advantage',
                                      simple_value=float(adv))
                    summary.value.add(tag='Losses/Grad Norm',
                                      simple_value=float(g_n))
                    summary.value.add(tag='Losses/Var Norm',
                                      simple_value=float(v_n))
                    summary.value.add(tag='Perf/Max Coverage',
                                      simple_value=float(max_c))
                    summary.value.add(tag='Perf/Max Reward',
                                      simple_value=float(max_r))
                    self.summary_writer.add_summary(summary, episode_count)

                    self.summary_writer.flush()

                # if self.name == 'worker_0':
                sess.run(self.increment)
                episode_count += 1