예제 #1
0
    def genDAG(cls, alpha=0.2, taskCount=30):
        ENVs = []
        DAG = CybershakeDAGGen.getDAG()
        for i in range(100):
            CybershakeWorklfow = Workflow(taskCount=taskCount,
                                          alpha=alpha,
                                          DAG=DAG)
            env = Environment(taskCount=taskCount,
                              save=False,
                              alpha=alpha,
                              workflow=CybershakeWorklfow)
            ENVs.append(env)

        dbfile = open('Cybershake-' + str(alpha) + '-ENVs', 'wb')
        pickle.dump(ENVs, dbfile)
        dbfile.close()
예제 #2
0
from Env.Workflow import Workflow
from Env.Environment import Environment
import numpy as np
import pickle

taskCount = 30
alpha = 0.8
ENVs = []

with open('./LIGO-Origin', 'rb') as file:
    OriginWorkflow = pickle.load(file)

for i in range(100):
    # used for other
    env = Environment(taskCount=taskCount,
                      save=False,
                      alpha=alpha,
                      workflow=OriginWorkflow[i].workflow)

    # used for LIGO
    # env = Environment(taskCount=taskCount, save=False, alpha=alpha, workflow=OriginWorkflow[i])
    ENVs.append(env)

dbfile = open('LIGO-' + str(alpha) + '-ENVs', 'wb')
pickle.dump(ENVs, dbfile)
dbfile.close()

#name = 'Epig'
#alpha = 0.8
#ENVs = []
#DAG = XMLtoDAG("Epigenomics_24.xml", taskCount=24).getDAG()
#
예제 #3
0
from Env.Environment import Environment
import numpy as np
import time

env = Environment(taskCount=10, alpha=0.5)
env.workflow.print()

cpt = env.workflow.CPTime

print('cptime:', cpt)
print('forwardCP:', env.workflow.forwardCP)
print('DL: ', env.workflowbak.DeadLine)
print('\n\n')

t = time.time()
actions = []
while True:

    taskNos = env.getNewTasks()
    if len(taskNos) == 0:
        env.spanTimeProcess()
        done, reward = env.isDone()
        if done:
            ob = env.getObservation()

            print('Actions: ', actions, 'Reward: ', reward)
            print('DeadLine:', env.workflow.DeadLine, ' Makespan: ',
                  env.currentTime)
            print('Cost: ', env.totalCost)
            break
    else:
예제 #4
0
    def run(self):
        POPULATE_FLAG = True
        #MEMORY_SIZE = self.shared['memory'].capacity
        MEMORY_SIZE = self.MEMORY_SIZE
        #POPULATE_MAX = MEMORY_SIZE
        POPULATE_MAX = 1
        populate_num = 0
        #PLOT_FREQ = int(MEMORY_SIZE/32)
        PLOT_FREQ = 20
        R = 0. # for plotting
        step = 0
        Repi = 0
        self.env = Environment()
        self.env.set_all()
        self.env.reset()
        last_time = time.time()
        prev_plot = self.steps_done
        pretrain = True
        #PRETRAIN_MAX = self.MEMORY_SIZE // 2
        PRETRAIN_MAX = 2000


        if os.path.isfile('save/checkpoint_improver.pth.tar'):
            print("=> loading checkpoint '{}'".format('save/checkpoint_improver.pth.tar'))
            checkpoint = torch.load('save/checkpoint_improver.pth.tar')
            print("=> loaded checkpoint '{}'"
                  .format('save/checkpoint_improver.pth.tar'))
        else:
            print("=> no checkpoint found at '{}'".format('save/checkpoint_improver.pth.tar'))

        while True:
            if POPULATE_FLAG:
                if pretrain:
                    if populate_num == PRETRAIN_MAX:
                        pretrain = False
                        self.shared['SENT_FLAG'] = 0
                        POPULATE_FLAG = False
                else:
                    if populate_num == POPULATE_MAX:
                        # reset populate num
                        self.shared['SENT_FLAG'] = 0
                        POPULATE_FLAG = False
            else:
                if self.shared['SENT_FLAG']:
                    # evaluator sent the weights
                    print('copying...')
                    self.net.load_state_dict(self.shared['weights'])
                    POPULATE_FLAG = True
                    populate_num = 0
                    # after evaluating the policy for one round, set the epislon smaller
                    next_step = self.steps_done + 1
                    if next_step != 0:  # loop to back
                        # then set eps_threshold to a small value
                        self.steps_done = next_step
            self.save_checkpoint({
                'state_dict': self.net.state_dict(),
            })
            #print('loop took {0} seconds' . format(time.time()-last_time))

            state = self.env.get_state()  # this is to avoid time delay
            last_time = time.time()
            # 0.003s
            state_torch = torch.from_numpy(state).type(FloatTensor) # convert to torch and normalize
            state_torch = state_torch.unsqueeze(0).type(FloatTensor)
            action = self.behavior_policy(state_torch)

            next_state, r, done = self.env.step(action)  # 0.03s

            if len(self.memory) == MEMORY_SIZE:
                self.memory.pop(0)
            self.memory.append((state, [action], [r], \
                                        next_state, [1-done])
                            )
            #print(len(self.memory))

            # 0.001s
            if POPULATE_FLAG:
                populate_num += 1
            # plot the average reward in PLOT_FREQ episodes
            Repi += r
            if done:
                step += 1
                R += Repi
                Repi = 0. # reset episode reward
            if step and self.steps_done > prev_plot:
                print('average rewards after {} train: {}' . format(self.steps_done-prev_plot, R/step))
                self.plotter.plot_train_rewards(R/step)
                # 2-3s
                R = 0.
                step = 0
                prev_plot = self.steps_done

        self.plotter.terminate()
예제 #5
0
class Improver(object):
    def __init__(self, net, memory_size, memory, shared):
        self.net = net  # Deep Net
        self.memory = memory
        self.shared = shared  # shared resources, {'memory', 'SENT_FLAG', 'weights'}
        #self.env = env
        self.plotter = Plotter(folder='DQN/plot/cartpole_simple/exp4')
        self.steps_done = 0

        # hyperparameters:
        self.EPS_START = 1.
        self.EPS_END = 0.05
        self.EPS_DECAY = 50   # DECAY larger: slower
        self.MEMORY_SIZE = memory_size

    def behavior_policy(self, state):
        # We can add epislon here to decay the randomness
        # We store the tensor of size 1x1
        sample = random.random()
        eps_threshold = self.EPS_END + (self.EPS_START - self.EPS_END) * \
                        math.exp(-1. * self.steps_done / self.EPS_DECAY)
        #print('threshold: {}' . format(eps_threshold))

        if sample > eps_threshold:
            return self.policy(state)
        else:
            #return LongTensor([[self.env.action_space.sample()]])
            return self.env.action_space.sample()
    def policy(self, state):
        # return tensor of size 1x1
        res = self.net(Variable(state, volatile=True).type(FloatTensor)).data
        #print('policy values: {}, {}' . format(res[0][0], res[0][1]))
        #print('policy value: {}' . format(res.max(1)[0][0]))
        return res.max(1)[1][0]
        #return res.max(1)[1].view(1,1)
        #return self.net(Variable(state, volatile=True).type(FloatTensor))\
        #                .data.max(1)[1].view(1,1)

    def save_checkpoint(self, state, filename='save/checkpoint_improver.pth.tar'):
        torch.save(state, filename)

    def run(self):
        POPULATE_FLAG = True
        #MEMORY_SIZE = self.shared['memory'].capacity
        MEMORY_SIZE = self.MEMORY_SIZE
        #POPULATE_MAX = MEMORY_SIZE
        POPULATE_MAX = 1
        populate_num = 0
        #PLOT_FREQ = int(MEMORY_SIZE/32)
        PLOT_FREQ = 20
        R = 0. # for plotting
        step = 0
        Repi = 0
        self.env = Environment()
        self.env.set_all()
        self.env.reset()
        last_time = time.time()
        prev_plot = self.steps_done
        pretrain = True
        #PRETRAIN_MAX = self.MEMORY_SIZE // 2
        PRETRAIN_MAX = 2000


        if os.path.isfile('save/checkpoint_improver.pth.tar'):
            print("=> loading checkpoint '{}'".format('save/checkpoint_improver.pth.tar'))
            checkpoint = torch.load('save/checkpoint_improver.pth.tar')
            print("=> loaded checkpoint '{}'"
                  .format('save/checkpoint_improver.pth.tar'))
        else:
            print("=> no checkpoint found at '{}'".format('save/checkpoint_improver.pth.tar'))

        while True:
            if POPULATE_FLAG:
                if pretrain:
                    if populate_num == PRETRAIN_MAX:
                        pretrain = False
                        self.shared['SENT_FLAG'] = 0
                        POPULATE_FLAG = False
                else:
                    if populate_num == POPULATE_MAX:
                        # reset populate num
                        self.shared['SENT_FLAG'] = 0
                        POPULATE_FLAG = False
            else:
                if self.shared['SENT_FLAG']:
                    # evaluator sent the weights
                    print('copying...')
                    self.net.load_state_dict(self.shared['weights'])
                    POPULATE_FLAG = True
                    populate_num = 0
                    # after evaluating the policy for one round, set the epislon smaller
                    next_step = self.steps_done + 1
                    if next_step != 0:  # loop to back
                        # then set eps_threshold to a small value
                        self.steps_done = next_step
            self.save_checkpoint({
                'state_dict': self.net.state_dict(),
            })
            #print('loop took {0} seconds' . format(time.time()-last_time))

            state = self.env.get_state()  # this is to avoid time delay
            last_time = time.time()
            # 0.003s
            state_torch = torch.from_numpy(state).type(FloatTensor) # convert to torch and normalize
            state_torch = state_torch.unsqueeze(0).type(FloatTensor)
            action = self.behavior_policy(state_torch)

            next_state, r, done = self.env.step(action)  # 0.03s

            if len(self.memory) == MEMORY_SIZE:
                self.memory.pop(0)
            self.memory.append((state, [action], [r], \
                                        next_state, [1-done])
                            )
            #print(len(self.memory))

            # 0.001s
            if POPULATE_FLAG:
                populate_num += 1
            # plot the average reward in PLOT_FREQ episodes
            Repi += r
            if done:
                step += 1
                R += Repi
                Repi = 0. # reset episode reward
            if step and self.steps_done > prev_plot:
                print('average rewards after {} train: {}' . format(self.steps_done-prev_plot, R/step))
                self.plotter.plot_train_rewards(R/step)
                # 2-3s
                R = 0.
                step = 0
                prev_plot = self.steps_done

        self.plotter.terminate()
예제 #6
0
from learn_Scientific import RLagent_Scientific
from Env.Environment import Environment, Workflow
from ScientificWorkflow.XMLProcess import XMLtoDAG
import copy

taskCount = 24
alpha = 0.4
DAG = XMLtoDAG("../ScientificWorkflow/Epigenomics_24.xml",
               taskCount=taskCount).getDAG()
montageWorklfow = Workflow(taskCount=taskCount, alpha=alpha, DAG=DAG)
env = Environment(taskCount=taskCount, alpha=alpha, workflow=montageWorklfow)

mt = RLagent_Scientific(env, taskCount, alpha, hiddenSize=60, perfix='Epig')
mt.epsilon = 0.3
mt.epsilon_end = 0.05
mt.epsilon_decay = 200
mt.train()
예제 #7
0
from model import DQN
from Env.Environment import Environment
import torch
import torch.autograd as autograd
from utils import sample_action
import numpy as np
from ICPCP import ICPCP

the_model = torch.load('./tmp/100-0.8.pth', map_location='cpu')

dqn = DQN()
dqn.load_state_dict(the_model)

env = Environment(taskCount=100, alpha=0.8)
done = False

var_phi = autograd.Variable(torch.Tensor(6), volatile=True)

RL_fail = 0
RL_cost = []
Random_fail = 0
Random_cost = []
for i in range(100):
    RL_actions = []
    He_actions = []
    while True:
        taskNos = env.getNewTasks()
        if len(taskNos) == 0:
            env.spanTimeProcess()
            done, r = env.isDone()
        else:
예제 #8
0
from learn_Scientific import RLagent_Scientific
from Env.Environment import Environment, Workflow
from ScientificWorkflow import generateCybershakeENV
from ScientificWorkflow.XMLProcess import XMLtoDAG
import copy
import numpy as np

taskCount = 30
alpha = 0.6
DAG = generateCybershakeENV.CybershakeDAGGen.getDAG()
wf = Workflow(taskCount=taskCount, alpha=alpha, DAG=DAG)
env = Environment(taskCount=taskCount, alpha=alpha, workflow=wf)

mt = RLagent_Scientific(env,
                        taskCount,
                        alpha,
                        hiddenSize=80,
                        perfix='CyberShake')
mt.epsilon = 0.3
mt.epsilon_end = 0.05
mt.epsilon_decay = 200
mt.train()