Ejemplo n.º 1
0
def extract_feat(resource, work_packages):
    work_packages = [[
        p.getX(),
        p.getY(),
        p.getUrgency(),
        p.getWorkingTime(),
        p.getUrgency(),
        p.getId()
    ] for p in work_packages]
    rx, ry, rid = resource.getPosition()
    gnn_feature = np.zeros((define.get_value('package_num') + 1,
                            define.get_value('package_num') + 1, 4))

    for p_i in work_packages:
        i = p_i[5]
        for p_j in work_packages:
            j = p_j[5]
            gnn_feature[i, j, 0] = p_i[2]
            gnn_feature[i, j, 1] = p_j[2]
            gnn_feature[
                i, j,
                2] = define.dis(p_i[0], p_j[0], p_i[1],
                                p_j[1]) / define.get_value('speed') + p_j[3]
            gnn_feature[i, j, 3] = 1
    for p in work_packages:
        i = p[5]
        gnn_feature[-1, i, 2] = define.dis(
            p[0], rx, p[1], ry) / define.get_value('speed') + p[3]
        gnn_feature[i, -1, 2] = define.dis(p[0], rx, p[1],
                                           ry) / define.get_value('speed')
    return torch.from_numpy(gnn_feature).unsqueeze(0).float()
Ejemplo n.º 2
0
 def __init__(self, id=None, x=None, y=None, timeLimit=None, speed=None):
     self.__id = id
     self.__currentX = x
     self.__currentY = y
     self.__initialX = x
     self.__initialY = y
     self.__workingTime = 0
     self.__timeLimit = define.get_value(
         'time_limit') if timeLimit is None else timeLimit
     self.__speed = define.get_value('speed') if speed is None else speed
     self.__pid = -1
Ejemplo n.º 3
0
def wrapper(idx):
    r_pos, locs, ps = data[idx]
    resource = [
        Resource("resource",
                 r_pos[0],
                 r_pos[1],
                 timeLimit=define.get_value('time_limit'),
                 speed=define.get_value('speed'))
    ]
    packages = []
    for i, (loc, p) in enumerate(zip(locs, ps)):
        packages.append(WorkPackage(i, loc[0], loc[1], p, 0))
    return packages, resource
Ejemplo n.º 4
0
def data_wrapper(package_num, seed):
    resource, packages = func_dict[define.get_value('func_type')](package_num,
                                                                  seed)
    ret_resource = [
        Resource("resource",
                 resource[0],
                 resource[1],
                 timeLimit=define.get_value('time_limit'),
                 speed=define.get_value('speed'))
    ]
    ret_packages = []
    for i, p in enumerate(packages):
        ret_packages.append(WorkPackage(i, p[0], p[1], p[2], p[3]))
    return ret_packages, ret_resource
Ejemplo n.º 5
0
    def __init__(self,
                 resource,
                 packages,
                 net=None,
                 is_dqn=None,
                 device='cpu',
                 base=None,
                 time_limit=None,
                 dis_matrix=None,
                 feature=None):
        self.__resource = copy.deepcopy(resource)
        self.__workPackages = []
        self.__totalUrgency = 0
        self.__existWorkPackages = {}
        self.__is_dqn = is_dqn
        self.__net = net
        self.__packages = packages
        self.device = device
        self.dis_matrix = None
        self.q = None
        self.score = None
        self.base = base
        self.dis_matrix = dis_matrix
        self.q = None
        self.score = None
        self.base = base
        self.feature = feature
        self.all_urgency = np.array([wk.getUrgency() for wk in packages])
        self.package_num = define.get_value('package_num')
        self.time_limit = define.get_value('time_limit')
        self.speed = define.get_value('speed')
        if self.__net is not None and dis_matrix is None:
            tmp_X = np.zeros((self.package_num, self.package_num, 2))
            tmp_Y = np.zeros((self.package_num, self.package_num, 2))
            tmp_time = np.zeros((self.package_num, self.package_num))

            for wk in packages:
                tmp_X[wk.getId(), :, 0] = wk.getX()
                tmp_X[wk.getId(), :, 1] = wk.getY()
                tmp_Y[:, wk.getId(), 0] = wk.getX()
                tmp_Y[:, wk.getId(), 1] = wk.getY()
                tmp_time[:, wk.getId()] = wk.getWorkingTime()
            self.dis_matrix = np.sqrt(np.sum(np.square(tmp_X - tmp_Y),
                                             axis=2)) / self.speed + tmp_time
            self.feature = feature_extractor.feature_extractor2(
                [[resource.getInitialX(),
                  resource.getInitialY()], packages, self.time_limit,
                 [resource.getInitialX(),
                  resource.getInitialY()]])
Ejemplo n.º 6
0
    def step(self, action):
        if self.mask[0, action] == 1:
            done = 1
            reward = 0
            print('seed')
        else:
            package = self.packages[action]
            self.times = self.path.getResourceNeedTime(
                package) + self.path.getResourceWorkingTime()
            if self.done == 1 or self.times + self.path.getReturnTime(
                    package) > define.get_value('time_limit'):
                done = 1
                self.done = 1
                reward = 0
            else:
                done = 0
                self.mask[0, action] = 1
                reward = package.getUrgency()
                self.path.addWorkPackage(package)
                self.path.setResourceWorkingTime(self.times)
                self.path.setResourcePosition(package.getX(), package.getY(),
                                              package.getId())

        return torch.FloatTensor([reward]).to(self.device), torch.FloatTensor(
            [done]).to(self.device), self.mask.to(self.device)
Ejemplo n.º 7
0
def gen_random_data(package_num, seed):
    workpackages = []
    resources = []
    random.seed(seed)
    tmp = Resource("resource",
                   random.random(),
                   random.random(),
                   timeLimit=define.get_value('time_limit'),
                   speed=define.get_value('speed'))
    resources.append(tmp)
    for i in range(package_num):
        x = random.random()
        y = random.random()
        urgency = random.randint(1, 100) / 100
        working_time = random.random() * 0.1
        tmp = WorkPackage(i, x, y, urgency, working_time)
        workpackages.append(tmp)
    return workpackages, resources
Ejemplo n.º 8
0
 def reset(self, i):
     self.seed = self.range_start + i % (self.range_end - self.range_start)
     self.count = (self.count + 1) % (self.range_end - self.range_start)
     self.packages, self.resources = gen_data.wrapper(self.seed)
     self.mask = torch.zeros(1, define.get_value('package_num') + 1)
     self.mask[0, -1] =1
     self.path = path_obj.Path(self.resources[0], self.packages, None, False, self.device)
     self.reward = 0
     self.times = 0
Ejemplo n.º 9
0
def beam_search(encoder, decoder, beam_size):
    import copy
    total_baselines = 0
    time_start = time.time()
    for _index in range(0, 10000):
        env = Env(_index, _index + 1)
        beam_list = [[env, 1, 0, 0]]
        max_rewards = 0
        state = env.reset(0)
        envs = Envs([env for env, prob, action, reward in beam_list])
        emb = encoder(state)
        first = True
        while True:

            if first:
                first = False
                prob = decoder(emb.expand(len(envs.envs), -1, -1),
                               envs.masks().to(device),
                               envs.times().to(device), None)
            else:
                prob = decoder(
                    emb.expand(len(envs.envs), -1, -1),
                    envs.masks().to(device),
                    envs.times().to(device),
                    torch.LongTensor([a
                                      for e, p, a, r in beam_list]).to(device))

            prob = prob.cpu().detach().numpy()
            tmp_list = []
            for i in range(len(beam_list)):
                for j in range(define.get_value('package_num')):
                    if prob[i, j] < 1e-10:
                        continue
                    tmp_list.append([
                        beam_list[i][0], beam_list[i][1] * prob[i, j], j,
                        beam_list[i][3]
                    ])
            tmp_list.sort(key=lambda x: x[1], reverse=True)
            tmp_list = tmp_list[:beam_size]
            envs = Envs(
                [copy.deepcopy(env) for env, prob, action, r in tmp_list])
            action = [a for env, prob, a, r in tmp_list]
            rewards, dones, masks, all_done = envs.step(action)

            beam_list.clear()
            rewards = rewards.cpu().numpy()
            for i, (env, prob, a, r) in enumerate(tmp_list):
                r = r + rewards[i]
                max_rewards = max(max_rewards, r)
                beam_list.append([copy.deepcopy(envs.envs[i]), prob, a, r])
            if np.sum(1 - dones.cpu().numpy()) == 0:
                break
        total_baselines += max_rewards
        print('{} {} {}'.format(_index, total_baselines / (_index + 1),
                                (time.time() - time_start) / (_index + 1)))
Ejemplo n.º 10
0
def feature_extractor2(sample):
    resource, work_packages, total_time, end_axis = sample
    work_packages = [[
        p.getX(),
        p.getY(),
        p.getUrgency(),
        p.getWorkingTime(),
        p.getUrgency(),
        p.getId()
    ] for p in work_packages]
    gnn_feature = np.zeros(
        (define.get_value('package_num'), define.get_value('package_num'), 8))
    mask = np.zeros(
        (define.get_value('package_num'), define.get_value('package_num'), 1))

    for p_i in work_packages:
        i = p_i[5]
        for p_j in work_packages:
            j = p_j[5]
            gnn_feature[i, j, 0] = p_i[2]
            gnn_feature[i, j, 1] = p_j[2]
            # gnn_feature[i, j, 2] = p_i[3]
            # gnn_feature[i, j, 3] = p_j[3]
            gnn_feature[i, j,
                        2] = define.dis(end_axis[0], p_i[0], end_axis[1],
                                        p_i[1]) / define.get_value('speed')
            gnn_feature[i, j,
                        3] = define.dis(end_axis[0], p_j[0], end_axis[1],
                                        p_j[1]) / define.get_value('speed')
            gnn_feature[
                i, j,
                4] = define.dis(p_i[0], p_j[0], p_i[1],
                                p_j[1]) / define.get_value('speed') + p_j[3]
            gnn_feature[
                i, j,
                5] = define.dis(resource[0], p_i[0], resource[1],
                                p_i[1]) / define.get_value('speed') + p_i[3]
            gnn_feature[
                i, j,
                6] = define.dis(resource[0], p_j[0], resource[1],
                                p_j[1]) / define.get_value('speed') + p_j[3]
            gnn_feature[i, j, 7] = total_time
            mask[i, j] = 1
    # for i in range(7):
    #     print(np.max(gnn_feature[:,:,i]))
    # input()
    return np.expand_dims(gnn_feature, axis=0), np.expand_dims(mask, axis=0)
Ejemplo n.º 11
0
 def step(self, action, reset=True):
     package = self.packages[action]
     times = self.path.getResourceNeedTime(package) + self.path.getResourceWorkingTime()
     if self.done==1 or times + self.path.getReturnTime(package) > define.get_value('time_limit'):
         self.done = 1
         done = 1
         reward = 0
         if reset:
             self.reset()
     else:
         self.done = 0
         done = 0
         reward = package.getUrgency()
         self.path.addWorkPackage(package)
         self.path.setResourceWorkingTime(times)
         self.path.setResourcePosition(package.getX(), package.getY(), package.getId())
         if self.path.getWorkPackageSize() >= define.get_value('package_num'):
             self.done = 1
             done = 1
             self.reset()
     return self.path.to_state(), torch.FloatTensor([reward]).to(self.device), torch.FloatTensor([done]).to(self.device)
Ejemplo n.º 12
0
def dqn_schedule(model,
                 data,
                 device,
                 plan_limit=None,
                 time_limit=None,
                 time_interval=None):
    plan_limit = define.get_value(
        'plan_limit') if plan_limit is None else plan_limit
    time_limit = define.get_value(
        'time_limit') if time_limit is None else time_limit
    time_interval = define.get_value(
        'time_interval') if time_interval is None else time_interval
    data0, data1 = data
    workpackages, resources = data0
    time0 = time.time()
    schedule = SchedulePolicy(resources,
                              workpackages,
                              None,
                              model,
                              is_dqn=True,
                              time_limit=time_limit,
                              plan_limit=plan_limit,
                              time_interval=time_interval,
                              device=device,
                              batch_size=args.batch_size,
                              data=data1)
    schedule.greedySchedule()
    ret = schedule.get_urgency()
    global lock, counter, timer, values
    with lock:
        timer.value += time.time() - time0
        values.value += ret
        counter.value += 1
        if counter.value % 1 == 0:
            sys.stdout.write('\r{} {:.4f} {:.4f}'.format(
                counter.value, values.value / counter.value,
                timer.value / counter.value))
            sys.stdout.flush()

    return ret, schedule.path_idx
Ejemplo n.º 13
0
def no_dqn_schedule(data,
                    plan_limit=None,
                    time_limit=None,
                    time_interval=None):
    plan_limit = define.get_value(
        'plan_limit') if plan_limit is None else plan_limit
    time_limit = define.get_value(
        'time_limit') if time_limit is None else time_limit
    time_interval = define.get_value(
        'time_interval') if time_interval is None else time_interval
    workpackages, resources = data
    schedule = Schedule(resources,
                        workpackages,
                        replay_memory=None,
                        net=None,
                        is_dqn=False,
                        time_limit=time_limit,
                        plan_limit=plan_limit,
                        time_interval=time_interval)
    schedule.greedySchedule()
    # schedule.vis('x_{}_{}'.format(seed,plan_limit))
    # print('time spent : {0:.3f}s'.format(time.time() - time0))
    return schedule.get_urgency(), schedule.path_idx
Ejemplo n.º 14
0
def beam_search(model, beam_size):
    import copy
    total_baselines = 0
    time_start = time.time()
    for _index in range(0,10000):
        env = Env(_index, _index+1)
        beam_list = [[env, 1, 0, 0]]
        max_rewards = 0
        state = env.reset()
        envs = Envs([env for env, prob, action, reward in beam_list])
        while True:
            state = envs.to_state()
            prob, value = model(*state)
            # prob = prob1.cpu().detach().numpy()
            # del prob1, value1
            # if len(envs.envs) > 2:
            #     probx, value2 = model(state[0][len(envs.envs)//2+1:], state[1][len(envs.envs)//2+1:])
            #     prob2 = probx.cpu().detach().numpy()
            #     del probx, value2
            #     prob = np.concatenate([prob, prob2], axis=0)

            prob = prob.cpu().detach().numpy()
            tmp_list = []
            for i in range(len(beam_list)):
                for j in range(define.get_value('package_num')):
                    if prob[i, j] < 1e-10:
                        continue
                    tmp_list.append([beam_list[i][0], beam_list[i][1] * prob[i, j], j, beam_list[i][3]])
            tmp_list.sort(key = lambda x:x[1], reverse=True)
            tmp_list = tmp_list[:beam_size]
            envs = Envs([copy.deepcopy(env) for env, prob, action, r in tmp_list])
            action = [a for env, prob, a, r in tmp_list]
            next_state, reward, done = envs.step(action, False)

            beam_list.clear()
            reward = reward.cpu().numpy()
            for i, (env, prob, a, r) in enumerate(tmp_list):
                r = r + reward[i]
                max_rewards = max(max_rewards, r)
                beam_list.append([copy.deepcopy(envs.envs[i]), prob, a, r])
            if np.sum(1 - done.cpu().numpy()) == 0:
                break
        total_baselines += max_rewards
        print('{} {} {}'.format(_index, total_baselines/(_index+1), (time.time()-time_start)/(_index+1)))
Ejemplo n.º 15
0
def evaluate(model, seed):
    returns = 0
    packages, resources = gen_data.wrapper(seed)
    path = path_obj.Path(resources[0], packages, lambda x:0, True, device)
    while True:
        state = path.to_state()
        prob, value = model(*state)
        action = torch.argmax(prob, dim=1)
        action = action.cpu().numpy()[0]
        package = packages[action]
        times = path.getResourceNeedTime(package) + path.getResourceWorkingTime()

        if times + path.getReturnTime(package) > define.get_value('time_limit'):
            return returns
        reward = package.getUrgency()
        path.addWorkPackage(package)
        path.setResourceWorkingTime(times)
        path.setResourcePosition(package.getX(), package.getY(), package.getId())
        returns += reward

    return returns
Ejemplo n.º 16
0
    parser.add_argument("--beam", action='store_true', help='generate RL sample or IL sample')
    parser.add_argument("--func-type", type=str, help='generate RL sample or IL sample')
    args = parser.parse_args()

    define.init()
    define.set_value('package_num', args.package_num)
    define.set_value('time_limit', args.time_limit)
    define.set_value('func_type', args.func_type)
    gen_data.generate_data(100000, args.package_num, args.func_type)

    device  = torch.device(args.device)
    encoder = GraphNet( hidden_size=args.hidden_size, n_head=args.nhead, nlayers=args.nlayer).to(device)
    decoder = GraphNetDecoder(hidden_size=args.hidden_size).to(device)

    if args.beam:
        encoder.load_state_dict(torch.load('model/model_encoder_pn_{}_{}.ckpt'.format(define.get_value('package_num'), args.func_type)
                                         ,map_location=device))
        decoder.load_state_dict(torch.load('model/model_decoder_pn_{}_{}.ckpt'.format(define.get_value('package_num'), args.func_type)
                                         ,map_location=device))
        encoder.eval()
        decoder.eval()
        print('load successfully')
        gen_data.generate_data(10000, args.package_num, args.func_type)

        beam_search(encoder, decoder, 100)

    if args.test:
        encoder.load_state_dict(torch.load('model/model_encoder_pn_{}_{}.ckpt'.format(define.get_value('package_num'), args.func_type)
                                         ,map_location=device))
        decoder.load_state_dict(torch.load('model/model_decoder_pn_{}_{}.ckpt'.format(define.get_value('package_num'), args.func_type)
                                         ,map_location=device))
Ejemplo n.º 17
0
    parser.add_argument('--beam', action='store_true', help='beam search or not')
    args = parser.parse_args()

    define.init()
    define.set_value('package_num', args.package_num)
    define.set_value('time_limit', args.time_limit)
    define.set_value('func_type', args.func_type)
    if args.device == 'cpu':
        torch.set_num_threads(58)
    else:
        torch.set_num_threads(1)

    if args.beam:
        device  = torch.device(args.device)
        model = GraphNet( hidden_size=args.hidden_size, n_head=args.nhead, nlayers=args.nlayer, duel_dqn=args.duel_dqn)
        model.load_state_dict(torch.load('model/model_dqn{}_{}_{}_{}_{}_{}_{}_{}_{}.ckpt'.format(args.fn, define.get_value('package_num'), args.func_type, args.num_env, args.hidden_size, args.nhead, args.nlayer, 'double' if args.double_dqn else 'vanilla', 'duel' if args.duel_dqn else 'vanilla')
                                         ,map_location=torch.device('cpu')))
        model = model.to(device)

        model.eval()
        gen_data.generate_data(10000, args.package_num, args.func_type)

        beam_search(model, 100)
        exit()

    if args.path:

        device  = torch.device(args.device)
        model = GraphNet( hidden_size=args.hidden_size, n_head=args.nhead, nlayers=args.nlayer, duel_dqn=args.duel_dqn)
        model.load_state_dict(torch.load('model/model_dqn{}_{}_{}_{}_{}_{}_{}_{}_{}.ckpt'.format(args.fn, define.get_value('package_num'), args.func_type, args.num_env, args.hidden_size, args.nhead, args.nlayer, 'double' if args.double_dqn else 'vanilla', 'duel' if args.duel_dqn else 'vanilla')
                                         ,map_location=torch.device('cpu')))
Ejemplo n.º 18
0
    define.init()
    define.set_value('package_num', args.package_num)
    define.set_value('time_limit', args.time_limit)
    define.set_value('func_type', args.func_type)
    gen_data.generate_data(100000, args.package_num, args.func_type)

    device = torch.device(args.device)
    encoder = GraphNet(hidden_size=args.hidden_size,
                       n_head=args.nhead,
                       nlayers=args.nlayer).to(device)
    decoder = GraphNetDecoder(hidden_size=args.hidden_size).to(device)

    if args.beam:
        encoder.load_state_dict(
            torch.load('model/model_att_encoder_{}_{}.ckpt'.format(
                define.get_value('package_num'), args.func_type),
                       map_location=device))
        decoder.load_state_dict(
            torch.load('model/model_att_decoder_{}_{}.ckpt'.format(
                define.get_value('package_num'), args.func_type),
                       map_location=device))
        encoder.eval()
        decoder.eval()
        gen_data.generate_data(10000, args.package_num, args.func_type)

        beam_search(encoder, decoder, 100)

    if args.test:
        encoder.load_state_dict(
            torch.load('model/model_att_encoder_{}_{}.ckpt'.format(
                define.get_value('package_num'), args.func_type),