Esempio n. 1
0
 def cb(outputs):
     #if not FEATURE:
     distrib, value = outputs.result()
     #else:
     #    distrib, value, feature = outputs.result()
     assert np.all(np.isfinite(distrib)), distrib
     action = np.random.choice(len(distrib), p=distrib)
     client = self.clients[ident]
     client.memory.append(TransitionExperience(state, action, None, value=value))
     if not FEATURE:
         self.send_queue.put([ident, dumps(action)])
     else:
         feature = self.offline_predictor([[state]])[0][0]
         self.send_queue.put([ident, dumps([action, feature])])
Esempio n. 2
0
        def cb(outputs):
            try:
                policy, value = outputs.result()
            except CancelledError:
                logger.info("Client {} cancelled.".format(ident))
                return
            assert np.all(np.isfinite(policy)), policy
            action = policy
            # action = np.clip(action, -1., 1.)
            # 能否在初期得到比较好的reward决定了收敛的快慢,所以此处加入一些先验
            # 新手上路,方向盘保守一点,带点油门,不踩刹车
            # if client._cidx < SIMULATOR_PROC:
            #     if self.epoch_num <= 1:
            #         if self.local_step % 10 == 0:
            #             action[1] = self._rng.rand() * 0.5 + 0.5
            #     if action[1] < 0: action[1] = 0.
            #     if self.epoch_num <= 2:
            #         action[1] = np.clip(action[1], 0, 1.)
            #         if self.local_step % 3 == 0:
            #             action[0] *= self._rng.choice([-1., 1.])
            #             # action[0] *= (self._rng.rand() * 0.2 + 0.2) * self._rng.choice([-1., 1.])
            #         else:
            #             action[0] = np.clip(action[0], -0.2, 0.2)
            # if self._rng.rand() < client._explore:
            #     action[0] = self._rng.rand() - 0.5

            client.memory.append(
                TransitionExperience(state,
                                     action=None,
                                     reward=None,
                                     value=value))
            self.send_queue.put([ident, dumps((action, value))])
Esempio n. 3
0
 def cb(outputs):
     try:
         distrib1, distrib2, value1, value2 = outputs.result()
     except CancelledError:
         logger.info("Client {} cancelled.".format(ident))
         return
     assert np.all(np.isfinite(distrib1)), distrib1
     assert np.all(np.isfinite(distrib2)), distrib2
     rand_num = np.random.rand()
     if rand_num < 0.5:
         action = np.random.choice(len(distrib1), p=distrib1)
         updateweight1, updateweight2 = 1.0, 0.0
     else:
         action = np.random.choice(len(distrib2), p=distrib2)
         updateweight2, updateweight1 = 1.0, 0.0
     client = self.clients[ident]
     client.memory.append(
         TransitionExperience(state,
                              action,
                              reward=None,
                              value1=value1,
                              value2=value2,
                              updateweight1=updateweight1,
                              updateweight2=updateweight2,
                              prob1=distrib1[action],
                              prob2=distrib2[action]))
     self.send_queue.put([ident, dumps(action)])
def perf_from_log(log_fn):
    """
    Args:
    log_fn : a stdout file xxx/stdout/triali/stdout.txt
    """
    dn = os.path.dirname(log_fn)
    cache_fn = dn.replace('/', '__')
    cache_fn = os.path.join(cache_dir, cache_fn)
    if os.path.exists(cache_fn):
        with open(cache_fn, 'rb') as fin:
            ss = fin.read()
        try:
            ret = loads(ss)
        except:
            pass
        if ret and not FORCE_LOAD:
            return ret

    if os.path.exists(log_fn):
        min_ve, min_ve_epoch = val_err_from_log(log_fn)
        multi_add, n_params = multi_add_from_log(log_fn)
        ret = (min_ve, multi_add * 2. * 1e-9, min_ve_epoch)
        with open(cache_fn, 'wb') as fout:
            fout.write(dumps(ret))
        return ret
    else:
        return 2.0, -1.0, -1
Esempio n. 5
0
    def run(self):
        enable_death_signal()
        player = self._build_player()
        context = zmq.Context()
        c2s_socket = context.socket(zmq.PUSH)
        c2s_socket.setsockopt(zmq.IDENTITY, self.identity)
        c2s_socket.set_hwm(2)
        c2s_socket.connect(self.c2s)

        s2c_socket = context.socket(zmq.DEALER)
        s2c_socket.setsockopt(zmq.IDENTITY, self.identity)
        s2c_socket.connect(self.s2c)

        state = player.reset()
        reward, isOver = 0, False
        while True:
            # after taking the last action, get to this state and get this reward/isOver.
            # If isOver, get to the next-episode state immediately.
            # This tuple is not the same as the one put into the memory buffer
            c2s_socket.send(dumps((self.identity, state, reward, isOver)),
                            copy=False)
            action = loads(s2c_socket.recv(copy=False))
            state, reward, isOver, _ = player.step(action)
            if isOver:
                state = player.reset()
Esempio n. 6
0
        def cb(outputs):
            try:
                policy, value = outputs.result()
            except CancelledError:
                logger.info("Client {} cancelled.".format(ident))
                return
            assert np.all(np.isfinite(policy)), policy
            action = policy
            # action = np.clip(action, -1., 1.)
            # 能否在初期得到比较好的reward决定了收敛的快慢,所以此处加入一些先验
            # 新手上路,方向盘保守一点,带点油门,不踩刹车
            # if client._cidx < SIMULATOR_PROC:
            #     if self.epoch_num <= 1:
            #         if self.local_step % 10 == 0:
            #             action[1] = self._rng.rand() * 0.5 + 0.5
            #     if action[1] < 0: action[1] = 0.
            #     if self.epoch_num <= 2:
            #         action[1] = np.clip(action[1], 0, 1.)
            #         if self.local_step % 3 == 0:
            #             action[0] *= self._rng.choice([-1., 1.])
            #             # action[0] *= (self._rng.rand() * 0.2 + 0.2) * self._rng.choice([-1., 1.])
            #         else:
            #             action[0] = np.clip(action[0], -0.2, 0.2)
            # if self._rng.rand() < client._explore:
            #     action[0] = self._rng.rand() - 0.5

            client.memory.append(TransitionExperience(
                state, action=None, reward=None, value=value))
            self.send_queue.put([ident, dumps((action,value))])
Esempio n. 7
0
    def run(self):
        player = self._build_player()
        context = zmq.Context()
        c2s_socket = context.socket(zmq.PUSH)
        c2s_socket.setsockopt(zmq.IDENTITY, self.identity)
        c2s_socket.set_hwm(2)
        c2s_socket.connect(self.c2s)

        s2c_socket = context.socket(zmq.DEALER)
        s2c_socket.setsockopt(zmq.IDENTITY, self.identity)
        s2c_socket.connect(self.s2c)

        state = player.reset()
        reward, isOver = 0, False
        while True:
            # after taking the last action, get to this state and get this reward/isOver.
            # If isOver, get to the next-episode state immediately.
            # This tuple is not the same as the one put into the memory buffer
            c2s_socket.send(dumps(
                (self.identity, state, reward, isOver)),
                copy=False)
            action = loads(s2c_socket.recv(copy=False).bytes)
            state, reward, isOver, _ = player.step(action)
            if isOver:
                state = player.reset()
Esempio n. 8
0
 def _process_msg(self, client, role_id, prob_state, all_state,
                  last_cards_onehot, first_st, mask, minor_type, mode,
                  reward, isOver):
     """
     Process a message sent from some client.
     """
     # in the first message, only state is valid,
     # reward&isOver should be discarde
     # print('received msg')
     if isOver and first_st:
         # should clear client's memory and put to queue
         assert reward != 0
         for i in range(3):
             j = -1
             while client.memory[i][j].reward == 0:
                 # notice that C++ returns the reward for farmer, transform to the reward in each agent's perspective
                 client.memory[i][
                     j].reward = reward if i != 1 else -reward
                 if client.memory[i][j].first_st:
                     break
                 j -= 1
         self._parse_memory(0, client)
     # feed state and return action
     rand_a = np.random.rand(mask.shape[0])
     rand_a = (rand_a + 1e-6) * mask
     self.send_queue.put([client.ident, dumps(np.argmax(rand_a))])
     client.memory[role_id - 1].append(
         TransitionExperience(prob_state,
                              all_state,
                              np.argmax(rand_a),
                              reward=0,
                              first_st=first_st,
                              mode=mode))
Esempio n. 9
0
def mark_stopped(log_dir, is_interrupted=False, msg_func=None):
    fn = stop_mark_fn(log_dir, is_interrupted)
    tmp_fn = fn + '.tmp'
    with open(tmp_fn, 'wb') as fout:
        msg = msg_func() if msg_func is not None else dumps('meow')
        fout.write(msg)
    # we do this in case we cannot finish writing "finish.bin" before it is found
    os.rename(tmp_fn, fn)
Esempio n. 10
0
 def request_click(bbox):
     sim2mgr_socket.send(
         dumps([
             self.name, SimulatorManager.MSG_TYPE.CLICK,
             [(bbox[0] + bbox[2]) // 2 + self.window_rect[0] + 6,
              (bbox[1] + bbox[3]) // 2 + self.window_rect[1] + 46]
         ]))
     return loads(mgr2sim_socket.recv(copy=False).bytes)
Esempio n. 11
0
def mark_failed(log_dir):
    fn = stop_mark_fn(log_dir, is_interrupted=False)
    tmp_fn = fn + '.tmp'
    with open(tmp_fn, 'wb') as fout:
        msg = dumps('failed_meow')
        fout.write(msg)
    # we do this in case we cannot finish writing "finish.bin" before it is found
    os.rename(tmp_fn, fn)
Esempio n. 12
0
 def spawn(self, job_type, entry_func, stop_func, msg_func, sleep_time):
     qid = job_type
     self.worker_id += 1
     msg_func2 = lambda: dumps(msg_func() + [self.worker_id])
     proc = WhileSleepWorker(self.pipename, self.hwm, entry_func, stop_func,
                             msg_func2, sleep_time)
     self.pools.enqueue(qid, self.worker_id)
     self.worker_id_to_proc[self.worker_id] = proc
     start_proc_mask_signal([proc])
Esempio n. 13
0
 def cb(outputs):
     try:
         output = outputs.result()
     except CancelledError:
         logger.info("{} cancelled.".format(sim_name))
         return
     print('coordinator sending', sim_name.encode('utf-8'),
           output[0].shape)
     self.coord2sim_socket.send_multipart(
         [sim_name.encode('utf-8'),
          dumps(output[0])])
Esempio n. 14
0
 def cb(outputs):
     try:
         distrib, value = outputs.result()
     except CancelledError:
         logger.info("Client {} cancelled.".format(client.ident))
         return
     assert np.all(np.isfinite(distrib)), distrib
     action = np.random.choice(len(distrib), p=distrib)
     client.memory.append(TransitionExperience(
         state, action, reward=None, value=value, prob=distrib[action]))
     self.send_queue.put([client.ident, dumps(action)])
Esempio n. 15
0
def compute_mean_std(db, fname):
    ds = LMDBSerializer.load(db, shuffle=False)
    ds.reset_state()
    o = OnlineMoments()
    for dp in get_tqdm(ds):
        feat = dp[0]  # len x dim
        for f in feat:
            o.feed(f)
    logger.info("Writing to {} ...".format(fname))
    with open(fname, 'wb') as f:
        f.write(serialize.dumps([o.mean, o.std]))
Esempio n. 16
0
 def cb(outputs):
     try:
         distrib, value = outputs.result()
     except CancelledError:
         logger.info("Client {} cancelled.".format(client.ident))
         return
     assert np.all(np.isfinite(distrib)), distrib
     action = np.random.choice(len(distrib), p=distrib)
     client.memory.append(TransitionExperience(
         state, action, reward=None, value=value, prob=distrib[action]))
     self.send_queue.put([client.ident, dumps(action)])
Esempio n. 17
0
def compute_mean_std(db, fname):
    ds = LMDBSerializer.load(db, shuffle=False)
    ds.reset_state()
    o = OnlineMoments()
    for dp in get_tqdm(ds):
        feat = dp[0]  # len x dim
        for f in feat:
            o.feed(f)
    logger.info("Writing to {} ...".format(fname))
    with open(fname, 'wb') as f:
        f.write(serialize.dumps([o.mean, o.std]))
Esempio n. 18
0
    def run(self):
        self.player = self._build_player()

        self.ctx = zmq.Context()
        self.c2s_socket = self.ctx.socket(zmq.PUSH)
        self.c2s_socket.setsockopt(zmq.IDENTITY, self.identity)
        self.c2s_socket.set_hwm(5)
        self.c2s_socket.connect(self.pipe_c2s)

        self._prepare()
        for dp in self.get_data():
            self.c2s_socket.send(dumps(dp), copy=False)
Esempio n. 19
0
    def run(self):
        self.player = self._build_player()

        self.ctx = zmq.Context()
        self.c2s_socket = self.ctx.socket(zmq.PUSH)
        self.c2s_socket.setsockopt(zmq.IDENTITY, self.identity)
        self.c2s_socket.set_hwm(5)
        self.c2s_socket.connect(self.pipe_c2s)

        self._prepare()
        for dp in self.get_data():
            self.c2s_socket.send(dumps(dp), copy=False)
Esempio n. 20
0
def compute_mean_std(db, fname):
    ds = LMDBDataPoint(db, shuffle=False)
    ds.reset_state()
    o = OnlineMoments()
    with get_tqdm(total=ds.size()) as bar:
        for dp in ds.get_data():
            feat = dp[0]  # len x dim
            for f in feat:
                o.feed(f)
            bar.update()
    logger.info("Writing to {} ...".format(fname))
    with open(fname, 'wb') as f:
        f.write(serialize.dumps([o.mean, o.std]))
Esempio n. 21
0
def compute_mean_std(db, fname):
    ds = LMDBDataPoint(db, shuffle=False)
    ds.reset_state()
    o = OnlineMoments()
    with get_tqdm(total=ds.size()) as bar:
        for dp in ds.get_data():
            feat = dp[0]  # len x dim
            for f in feat:
                o.feed(f)
            bar.update()
    logger.info("Writing to {} ...".format(fname))
    with open(fname, 'wb') as f:
        f.write(serialize.dumps([o.mean, o.std]))
def compute_mean_std(ds, fname):
    """
    Compute mean and std in datasets.
    Usage: compute_mean_std(ds, 'mean_std.txt')
    """
    o = stats.OnlineMoments()
    for dp in get_tqdm(ds):
        feat = dp[0]  # len x dim
        for f in feat:
            o.feed(f)
    logger.info("Writing to {} ...".format(fname))
    with open(fname, 'wb') as f:
        f.write(serialize.dumps([o.mean, o.std]))
Esempio n. 23
0
        def f():
            msg = self.queue.get()
            sim_name = msg[0]
            if msg[1] == SimulatorManager.MSG_TYPE.LOCK and self.locked_sim is None:
                self.locked_sim = sim_name
                self.mgr2sim_socket.send_multipart(
                    [sim_name.encode('utf-8'),
                     dumps('lock')])
                time.sleep(0.2)
                return
            if self.locked_sim is not None:
                if sim_name != self.locked_sim:
                    time.sleep(0.2)
                    self.queue.put(msg)
                    return
                elif msg[1] == SimulatorManager.MSG_TYPE.UNLOCK:
                    self.locked_sim = None
                    self.mgr2sim_socket.send_multipart(
                        [sim_name.encode('utf-8'),
                         dumps('unlock')])
                    time.sleep(0.2)
                    return

            self.cxt_switch(sim_name)
            # time.sleep(0.2)
            # print(msg[1])
            if msg[1] == SimulatorManager.MSG_TYPE.SCREEN:
                screen = grab_screen()
                self.mgr2sim_socket.send_multipart(
                    [sim_name.encode('utf-8'),
                     dumps(screen)])
            elif msg[1] == SimulatorManager.MSG_TYPE.CLICK:
                # print('need to click')
                click(msg[2][0], msg[2][1])
                self.mgr2sim_socket.send_multipart(
                    [sim_name.encode('utf-8'),
                     dumps('click')])
Esempio n. 24
0
 def cb(outputs):
     # logger.info('async predictor callback')
     try:
         action, prob, value = outputs.result()
     except CancelledError:
         logger.info("Client {} cancelled.".format(client.ident))
         return
     assert np.all(np.isfinite(prob)), prob
     client.memory.append(
         TransitionExperience(state,
                              action,
                              reward=None,
                              value=value,
                              prob=prob))
     self.send_queue.put([client.ident, dumps((action))])
Esempio n. 25
0
 def cb(outputs):
     # logger.info('async predictor callback')
     try:
         output = outputs.result()
     except CancelledError:
         logger.info("Client {} cancelled.".format(client.ident))
         return
     mode = output[-1]
     distrib = (output[:-1][mode] + 1e-6) * mask
     assert np.all(np.isfinite(distrib)), distrib
     action = np.random.choice(len(distrib), p=distrib / distrib.sum())
     client.memory[role_id - 1].append(TransitionExperience(
         prob_state, all_state, action, reward=0, minor_type=minor_type, first_st=first_st,
         last_cards_onehot=last_cards_onehot, mode=mode, prob=distrib[action]))
     self.send_queue.put([client.ident, dumps(action)])
Esempio n. 26
0
    def run(self):
        player = self._build_player()
        context = zmq.Context()
        c2s_socket = context.socket(zmq.PUSH)
        c2s_socket.setsockopt(zmq.IDENTITY, self.identity)
        c2s_socket.set_hwm(2)
        c2s_socket.connect(self.c2s)

        s2c_socket = context.socket(zmq.DEALER)
        s2c_socket.setsockopt(zmq.IDENTITY, self.identity)
        # s2c_socket.set_hwm(5)
        s2c_socket.connect(self.s2c)

        state = player.current_state()
        reward, isOver = 0, False
        while True:
            c2s_socket.send(dumps((self.identity, state, reward, isOver)),
                            copy=False)
            action = loads(s2c_socket.recv(copy=False).bytes)
            reward, isOver = player.action(action)
            state = player.current_state()
Esempio n. 27
0
    def run(self):
        player = self._build_player()
        context = zmq.Context()
        c2s_socket = context.socket(zmq.PUSH)
        c2s_socket.setsockopt(zmq.IDENTITY, self.identity)
        c2s_socket.set_hwm(2)
        c2s_socket.connect(self.c2s)

        s2c_socket = context.socket(zmq.DEALER)
        s2c_socket.setsockopt(zmq.IDENTITY, self.identity)
        # s2c_socket.set_hwm(5)
        s2c_socket.connect(self.s2c)

        state = player.current_state()
        reward, isOver = 0, False
        while True:
            c2s_socket.send(dumps(
                (self.identity, state, reward, isOver)),
                copy=False)
            action = loads(s2c_socket.recv(copy=False).bytes)
            reward, isOver = player.action(action)
            state = player.current_state()
Esempio n. 28
0
    def run(self):
        player = self._build_player()
        context = zmq.Context()
        c2s_socket = context.socket(zmq.PUSH)
        c2s_socket.setsockopt(zmq.IDENTITY, self.identity)
        c2s_socket.set_hwm(10)
        c2s_socket.connect(self.c2s)

        s2c_socket = context.socket(zmq.DEALER)
        s2c_socket.setsockopt(zmq.IDENTITY, self.identity)
        s2c_socket.connect(self.s2c)

        st = player.reset()
        r, is_over = 0, False

        while True:
            c2s_socket.send(dumps((self.identity, st, r, is_over)), copy=False)
            # action = player.action_space.sample()
            action = loads(s2c_socket.recv(copy=False).bytes)
            st, r, is_over, _ = player.step(action)
            # print(st.shape)
            if is_over:
                player.reset()
Esempio n. 29
0
    def _eval(self):
        if cfg.TRAINER == 'replicated':
            with ThreadPoolExecutor(max_workers=self.num_predictor, thread_name_prefix='EvalWorker') as executor, \
                    tqdm.tqdm(total=sum([df.size() for df in self.dataflows])) as pbar:
                futures = []
                for dataflow, pred in zip(self.dataflows, self.predictors):
                    futures.append(
                        executor.submit(eval_coco, dataflow, pred, pbar))
                all_results = list(
                    itertools.chain(*[fut.result() for fut in futures]))
        else:
            local_results = eval_coco(self.dataflow, self.predictor)
            results_as_arr = np.frombuffer(dumps(local_results),
                                           dtype=np.uint8)
            sizes, concat_arrs = tf.get_default_session().run(
                [self.string_lens, self.concat_results],
                feed_dict={self.local_result_tensor: results_as_arr})
            if hvd.rank() > 0:
                return
            all_results = []
            start = 0
            for size in sizes:
                substr = concat_arrs[start:start + size]
                results = loads(substr.tobytes())
                all_results.extend(results)
                start = start + size

        output_file = os.path.join(logger.get_logger_dir(),
                                   'outputs{}.json'.format(self.global_step))
        with open(output_file, 'w') as f:
            json.dump(all_results, f)
        try:
            scores = print_evaluation_scores(output_file)
            for k, v in scores.items():
                self.trainer.monitors.put_scalar(k, v)
        except Exception:
            logger.exception("Exception in COCO evaluation.")
Esempio n. 30
0
 def request_screen():
     sim2mgr_socket.send(dumps([self.name, SimulatorManager.MSG_TYPE.SCREEN, []]))
     return loads(mgr2sim_socket.recv(copy=False).bytes)
Esempio n. 31
0
def final():
    camera = libcpm.Camera()
    camera.setup()

    # cpp matcher:
    pmatcher = libcpm.PatchMatch()
    pmatcher.init(camera, 20)

    # python matcher:
    #bgs0, bgs1 = [], []
    #for k in range(20):
    #m1 = camera.get_for_py(0)
    #m1 = np.array(m1, copy=True)
    #m2 = camera.get_for_py(1)
    #m2 = np.array(m2, copy=True)
    #bgs0.append(m1)
    #bgs1.append(m2)
    #matcher = Matcher(BackgroundSegmentor(bgs0), BackgroundSegmentor(bgs1))

    runner = get_parallel_runner('../data/cpm.npy')

    viewer = libcpm.StereoCameraViewer(camera)
    viewer.start()

    C1, C0, d1, d0 = load_camera_from_calibr(
        '../calibr-1211/camchain-homeyihuaDesktopCPM3D_kalibrfinal3.yaml')
    queue = deque(maxlen=2)

    ctx = zmq.Context()
    sok = ctx.socket(zmq.PUSH)
    global args
    sok.connect('tcp://{}:8888'.format(args.host))

    def cpp_matcher(m1, m2, o1, o2):
        o1 = libcpm.Mat(o1)
        o2 = libcpm.Mat(o2)
        out = pmatcher.match_with_hm(m1, m2, o1, o2)
        return np.asarray(out).reshape(14, 4)  #14 x 2image x (x,y)

    pts3ds = []
    cnt = 0
    while True:
        cnt += 1
        print 'begin---', time.time()
        m1 = camera.get_for_py(0)
        m1r = np.array(m1, copy=False)
        m2 = camera.get_for_py(1)
        m2r = np.array(m2, copy=False)

        m1s = cv2.resize(m1r, (368, 368))
        m2s = cv2.resize(m2r, (368, 368))
        print 'after resize---', time.time()

        o1, o2 = runner(m1s, m2s)
        print 'after cpm---', time.time()

        #pts14x4 = matcher.match(m1r, m2r, o1, o2)
        pts14x4 = cpp_matcher(m1, m2, o1, o2)

        #to_save = (m1s, m2s, o1, o2, pts14x4)
        #fout = open('full-recording/{:04d}.dat'.format(cnt), 'wb')
        #fout.write(dumps(to_save))
        #fout.close()

        print 'after match---', time.time()
        queue.append(pts14x4)
        p2d = np.mean(queue, axis=0)
        p3ds = np.zeros((14, 3))
        for c in range(14):
            p3d = triangulate(C0, C1, p2d[c, :2], p2d[c, 2:])
            p3ds[c, :] = p3d
        sok.send(dumps(p3ds))
        print p3ds
        print 'after send---', time.time()
        print '-----------------'
Esempio n. 32
0
                args.do_validation = False
                args.compute_hallu_stats = False
                test_ret = eval_child(model_cls, args,
                    args.log_dir, args.model_dir, collect_hallu_stats=False)
                te = test_ret[0]
            else:
                te = ve
            # form stopping message for main.
            json_ret = dict()
            json_ret['ve'] = ve
            json_ret['te'] = te
            json_ret['fp'] = fp
            json_ret['l_stats'] = l_stats
            json_ret['l_op_indices'] = l_op_indices
            json_ret['l_op_omega'] = l_op_omega
            ret_str = dumps(json_ret)
            msg_func = lambda : ret_str
            mark_stopped(args.log_dir, msg_func=msg_func)
            # Go to parse_remote_stop_file for how this msg is parsed.
        except Exception as e:
            mi = os.path.basename(os.path.normpath(args.model_dir))
            logger.info("mi={} failed: {}".format(mi, e))
            # TODO differentiate OOM and Unknown:
            # tensorflow.python.framework.errors_impl.UnknownError
            # tensorflow.python.framework.errors_impl.ResourceExhaustedError
            mark_failed(args.log_dir)
            traceback.print_exc()
            raise
        # pack the info for the stop file, see parse_remote_stop_file forr unpacking

Esempio n. 33
0
    def run(self):
        player = self._build_player()
        context = zmq.Context()
        c2s_socket = context.socket(zmq.PUSH)
        c2s_socket.setsockopt(zmq.IDENTITY, self.identity)
        c2s_socket.set_hwm(10)
        c2s_socket.connect(self.c2s)

        s2c_socket = context.socket(zmq.DEALER)
        s2c_socket.setsockopt(zmq.IDENTITY, self.identity)
        s2c_socket.connect(self.s2c)

        player.reset()
        init_cards = np.arange(21)
        # init_cards = np.append(init_cards[::4], init_cards[1::4])
        player.prepare_manual(init_cards)
        r, is_over = 0, False
        while True:
            all_state, role_id, curr_handcards_value, last_cards_value, last_category = \
                player.get_state_all_cards(), player.get_role_ID(), player.get_curr_handcards(), player.get_last_outcards(), player.get_last_outcategory_idx()
            # after taking the last action, get to this state and get this reward/isOver.
            # If isOver, get to the next-episode state immediately.
            # This tuple is not the same as the one put into the memory buffer
            is_active = (last_cards_value.size == 0)
            all_state = np.stack([
                get_mask(
                    Card.onehot2char(all_state[i * 60:(i + 1) * 60]),
                    action_space,
                    None if is_active else to_char(last_cards_value)).astype(
                        np.float32) for i in range(3)
            ]).reshape(-1)
            last_state = get_mask(to_char(last_cards_value), action_space,
                                  None).astype(np.float32)

            if role_id == 2:
                st = SubState(
                    ACT_TYPE.PASSIVE if last_cards_value.size > 0
                    else ACT_TYPE.ACTIVE, all_state,
                    to_char(curr_handcards_value), last_cards_value,
                    last_category)
                if last_cards_value.size > 0:
                    assert last_category > 0
                first_st = True
                while not st.finished:
                    c2s_socket.send(dumps(
                        (self.identity, role_id,
                         st.state, st.all_state, last_state, first_st,
                         st.get_mask(), st.minor_type, st.mode, r, is_over)),
                                    copy=False)
                    first_st = False
                    action = loads(s2c_socket.recv(copy=False).bytes)
                    # logger.info('received action {}'.format(action))
                    # print(action)
                    st.step(action)

                # print(st.intention)
                assert st.card_type != -1
                r, is_over, category_idx = player.step_manual(st.intention)
            else:
                _, r, _ = player.step_auto()
                is_over = (r != 0)
            if is_over:
                # print('{} over with reward {}'.format(self.identity, r))
                # logger.info('{} over with reward {}'.format(self.identity, r))
                # sys.stdout.flush()
                player.reset()
                player.prepare_manual(init_cards)
Esempio n. 34
0
    def run(self):
        player = self._build_player()
        context = zmq.Context()
        c2s_socket = context.socket(zmq.PUSH)
        c2s_socket.setsockopt(zmq.IDENTITY, self.identity)
        c2s_socket.set_hwm(10)
        c2s_socket.connect(self.c2s)

        s2c_socket = context.socket(zmq.DEALER)
        s2c_socket.setsockopt(zmq.IDENTITY, self.identity)
        s2c_socket.connect(self.s2c)

        player.reset()
        # init_cards = np.arange(52)
        # init_cards = np.append(init_cards[::4], init_cards[1::4])
        # player.prepare_manual(init_cards)
        player.prepare()
        r, is_over = 0, False
        lstm_state = np.zeros([1024 * 2])
        while True:
            role_id = player.get_role_ID()
            if role_id in ROLE_IDS_TO_TRAIN:
                prob_state, all_state, curr_handcards_value, last_cards_value, last_category = \
                    player.get_state_prob(), player.get_state_all_cards(), player.get_curr_handcards(), player.get_last_outcards(), player.get_last_outcategory_idx()
                prob_state = np.concatenate(
                    [Card.val2onehot60(curr_handcards_value), prob_state])
                # after taking the last action, get to this state and get this reward/isOver.
                # If isOver, get to the next-episode state immediately.
                # This tuple is not the same as the one put into the memory buffer

                is_active = False if last_cards_value.size > 0 else True
                mask = get_mask(
                    to_char(curr_handcards_value), action_space,
                    None if is_active else to_char(last_cards_value))
                if is_active:
                    mask[0] = 0
                last_two_cards = player.get_last_two_cards()
                last_two_cards_onehot = np.concatenate([
                    Card.val2onehot60(last_two_cards[0]),
                    Card.val2onehot60(last_two_cards[1])
                ])
                c2s_socket.send(dumps(
                    (self.identity, role_id, prob_state, all_state,
                     last_two_cards_onehot, mask, 0 if is_active else 1,
                     lstm_state, r, is_over)),
                                copy=False)
                action_idx, lstm_state = loads(
                    s2c_socket.recv(copy=False).bytes)

                r, is_over, _ = player.step_manual(
                    to_value(action_space[action_idx]))
            else:
                _, r, _ = player.step_auto()
                is_over = (r != 0)
            if is_over:
                # print('{} over with reward {}'.format(self.identity, r))
                # logger.info('{} over with reward {}'.format(self.identity, r))
                # sys.stdout.flush()
                player.reset()
                player.prepare()
                lstm_state = np.zeros([1024 * 2])
Esempio n. 35
0
    def run(self):
        logger.info('simulator main loop')
        context = zmq.Context()

        sim2coord_socket = context.socket(zmq.PUSH)
        sim2coord_socket.setsockopt(zmq.IDENTITY, self.name.encode('utf-8'))
        sim2coord_socket.set_hwm(2)
        sim2coord_socket.connect(self.sim2coord)

        coord2sim_socket = context.socket(zmq.DEALER)
        coord2sim_socket.setsockopt(zmq.IDENTITY, self.name.encode('utf-8'))
        coord2sim_socket.set_hwm(2)
        coord2sim_socket.connect(self.coord2sim)

        sim2exp_sockets = []
        for sim2exp in self.sim2exps:
            sim2exp_socket = context.socket(zmq.PUSH)
            sim2exp_socket.setsockopt(zmq.IDENTITY, self.name.encode('utf-8'))
            sim2exp_socket.set_hwm(2)
            sim2exp_socket.connect(sim2exp)
            sim2exp_sockets.append(sim2exp_socket)

        sim2mgr_socket = context.socket(zmq.PUSH)
        sim2mgr_socket.setsockopt(zmq.IDENTITY, self.name.encode('utf-8'))
        sim2mgr_socket.set_hwm(2)
        sim2mgr_socket.connect(self.sim2mgr)

        mgr2sim_socket = context.socket(zmq.DEALER)
        mgr2sim_socket.setsockopt(zmq.IDENTITY, self.name.encode('utf-8'))
        mgr2sim_socket.set_hwm(2)
        mgr2sim_socket.connect(self.mgr2sim)

        # while True:
        #     time.sleep(0.3)
        #     print(self.name)
        #     sim2exp_sockets[1].send(dumps([self.name, 'haha']))

        # print('main loop')
        # while True:
        #     time.sleep(0.3)
        #     msg = loads(coord2sim_socket.recv(copy=False).bytes)
        #     print(msg)
            # sim2coord_socket.send(dumps([self.name, self.agent_names[0], np.arange(10)]))

        def request_screen():
            sim2mgr_socket.send(dumps([self.name, SimulatorManager.MSG_TYPE.SCREEN, []]))
            return loads(mgr2sim_socket.recv(copy=False).bytes)

        def request_click(bbox):
            sim2mgr_socket.send(dumps([self.name, SimulatorManager.MSG_TYPE.CLICK, [(bbox[0] + bbox[2]) // 2 + self.window_rect[0] + 6, (bbox[1] + bbox[3]) // 2 + self.window_rect[1] + 46]]))
            return loads(mgr2sim_socket.recv(copy=False).bytes)

        def request_lock():
            sim2mgr_socket.send(dumps([self.name, SimulatorManager.MSG_TYPE.LOCK, []]))
            return loads(mgr2sim_socket.recv(copy=False).bytes)

        def request_unlock():
            sim2mgr_socket.send(dumps([self.name, SimulatorManager.MSG_TYPE.UNLOCK, []]))
            return loads(mgr2sim_socket.recv(copy=False).bytes)

        def spin_lock_on_button():
            act = dict()
            while not act:
                self.current_screen = request_screen()
                cv2.imwrite('debug.png', self.current_screen)
                act = get_current_button_action(self.current_screen)
                if self.toggle.value == 0:
                    break

            return act

        def discard(act, bboxes, idxs):
            def diff(idxs, cards):
                res = []
                for i in range(len(cards)):
                    if cards[i] is not None:
                        if i in idxs:
                            res.append(i)
                    else:
                        if i not in idxs:
                            res.append(i)
                return res

            differences = diff(idxs, get_cards_bboxes(request_screen(), self.templates, bboxes=bboxes)[0])
            print(differences)
            request_lock()
            while len(differences) > 0:
                for d in differences:
                    request_click(bboxes[d])
                # request_click(bboxes[differences[0]])
                # time.sleep(0.3)
                differences = diff(idxs, get_cards_bboxes(request_screen(), self.templates, bboxes=bboxes)[0])
                print(differences)
            if 'chupai' in act:
                request_click(act['chupai'])
            elif 'alone_chupai' in act:
                request_click(act['alone_chupai'])
            elif 'ming_chupai' in act:
                request_click(act['ming_chupai'])
            request_unlock()

        game_cnt = 0
        while True:
            import psutil
            # print('memory usage is: ', psutil.virtual_memory())
            if self.toggle.value == 0:
                time.sleep(0.2)
                continue
            print('new round')
            self.current_screen = request_screen()

            act = spin_lock_on_button()
            if not act:
                continue
            print(act)
            if 'start' in act:
                request_click(act['start'])
                continue
            if self.state == Simulator.State.CALLING:
                # state has changed
                if 'reverse' in act:
                    self.state = Simulator.State.PLAYING
                    self.current_lord_pos = who_is_lord(self.current_screen)
                    while self.current_lord_pos < 0:
                        self.current_screen = request_screen()
                        self.current_lord_pos = who_is_lord(self.current_screen)
                        print('current lord pos ', self.current_lord_pos)
                        if self.toggle.value == 0:
                            break
                    continue
                if 'continuous defeat' in act:
                    request_click(act['continuous defeat'])
                    continue
                print('calling', act)
                handcards, _ = get_cards_bboxes(self.current_screen, self.templates, 0)
                cards_value, _ = CEnv.get_cards_value(Card.char2color(handcards))
                print('cards value: ', cards_value)
                # assert 'jiaodizhu' in act
                request_click(act['bujiao']) if cards_value < 10 else request_click(act['jiaodizhu'])
            elif self.state == Simulator.State.PLAYING:
                if 'defeat' in act or 'victory' in act:
                    request_click(act['defeat'] if 'defeat' in act else act['victory'])
                    if self.cached_msg is None:
                        print('other player wins in one step!!!')
                        continue
                    win = is_win(self.current_screen)
                    state, action, fine_mask = self.cached_msg
                    if win:
                        sim2exp_sockets[self.current_lord_pos].send(dumps([[state, state], action, 1, True, False, [fine_mask, fine_mask]]))
                        self.win_rates[self.agent_names[self.current_lord_pos]].feed(1.)
                    else:
                        sim2exp_sockets[self.current_lord_pos].send(dumps([[state, state], action, -1, True, False, [fine_mask, fine_mask]]))
                        self.win_rates[self.agent_names[self.current_lord_pos]].feed(0.)

                    game_cnt += 1
                    if game_cnt % 100 == 0:
                        for agent in self.agent_names:
                            if self.win_rates[agent].count > 0:
                                logger.info('[last-100]{} win rate: {}'.format(agent, self.win_rates[agent].average))
                                self.win_rates[agent].reset()

                    self.reset_episode()

                    continue
                # test if we have cached msg not sent

                print('playing', act)
                left_cards, _ = get_cards_bboxes(self.current_screen, self.mini_templates, 1)
                right_cards, _ = get_cards_bboxes(self.current_screen, self.mini_templates, 2)
                if None in left_cards or None in right_cards:
                    request_click(act['buchu'])
                    time.sleep(1.)
                    continue
                assert None not in left_cards
                assert None not in right_cards
                self.history[1].extend(right_cards)
                self.history[2].extend(left_cards)
                # last_cards = left_cards
                # if not left_cards:
                #     last_cards = right_cards
                # print('last cards', last_cards)
                total_cards = np.ones([60])
                total_cards[53:56] = 0
                total_cards[57:60] = 0
                handcards, bboxes = get_cards_bboxes(self.current_screen, self.templates, 0)
                handcards = [card for card in handcards if card is not None]
                remain_cards = total_cards - Card.char2onehot60(handcards + self.history[0] + self.history[1] + self.history[2])
                print('current handcards: ', handcards)
                # left_cnt, right_cnt = get_opponent_cnts(self.current_screen, self.tiny_templates)
                # print('left cnt: ', left_cnt, 'right cnt: ', right_cnt)
                left_cnt = 17 - len(self.history[2])
                right_cnt = 17 - len(self.history[1])
                if self.current_lord_pos == 1:
                    left_cnt += 3
                if self.current_lord_pos == 2:
                    right_cnt += 3
                # assert left_cnt > 0 and right_cnt > 0
                # to be the same as C++ side, right comes before left

                right_prob_state = remain_cards * (right_cnt / (left_cnt + right_cnt))
                left_prob_state = remain_cards * (left_cnt / (left_cnt + right_cnt))
                prob_state = np.concatenate([right_prob_state, left_prob_state])
                # assert prob_state.size == 120
                # assert np.all(prob_state < 1.) and np.all(prob_state >= 0.)
                # print(prob_state)
                intention, buffer_comb, buffer_fine = self.predictor.predict(handcards, [left_cards, right_cards], prob_state, self, sim2coord_socket, coord2sim_socket)
                if self.cached_msg is not None:
                    state, action, fine_mask = self.cached_msg
                    sim2exp_sockets[self.current_lord_pos].send(
                                               dumps([[state, buffer_comb[0]], action, 0, False, False,
                                                      [fine_mask, buffer_comb[2]]]))

                    sim2exp_sockets[self.current_lord_pos].send(
                                           dumps([[buffer_comb[0], buffer_fine[0]], buffer_comb[1], 0, False, True,

                                                  [buffer_comb[2], buffer_fine[2]]]))
                self.cached_msg = buffer_fine

                self.history[0].extend(intention)
                print('intention is: ', intention)
                intention.sort(key=lambda k: Card.cards_to_value[k])
                if len(intention) == 0:
                    request_click(act['buchu'])
                else:
                    i = 0
                    j = 0
                    to_click = []
                    to_click_idxs = []
                    while j < len(intention):
                        if handcards[i] == intention[j]:
                            to_click_idxs.append(i)
                            to_click.append(bboxes[i])
                            i += 1
                            j += 1
                        else:
                            i += 1
                    for bbox in to_click:
                        request_click(bbox)
                    time.sleep(0.5)
                    request_click([1310, 760, 1310, 760])
            time.sleep(1.)
Esempio n. 36
0
    def save(df, paths, N, write_frequency=1000):
        """
        Args:
            df (DataFlow): the DataFlow to serialize.
            path (str): output path. Must be an lmdb file.
            write_frequency (int): the frequency to write back data to disk.
                A smaller value reduces memory usage.
        """
        assert isinstance(df, DataFlow), type(df)
        map_size = 1099511627776 * 2 if platform.system() == 'Linux' else 128 * 10**6
        dbs = []
        txns = []
        all_slice_keys = [ [] for i in range(N) ]
        size = _reset_df_and_get_size(df)
        slice_sizes = [ 0 for i in range(N) ]
        
        for path in paths:
            assert not os.path.isfile(path), "LMDB file {} exists!".format(path)
            
            # It's OK to use super large map_size on Linux, but not on other platforms
            # See: https://github.com/NVIDIA/DIGITS/issues/206
            db = lmdb.open(path, subdir=False,
                           map_size=map_size, readonly=False,
                           meminit=False, map_async=True)    # need sync() at the end
            dbs.append(db)
            # LMDB transaction is not exception-safe!
            # although it has a context manager interface
            txns.append(db.begin(write=True))
            
        # put data into lmdb, and doubling the size if full.
        # Ref: https://github.com/NVIDIA/DIGITS/pull/209/files
        def put_or_grow(db, txn, key, value):
            try:
                txn.put(key, value)
                return txn
            except lmdb.MapFullError:
                pass
            txn.abort()
            curr_size = db.info()['map_size']
            new_size = curr_size * 2
            print("Doubling LMDB map_size to {:.2f}GB".format(new_size / 10**9))
            db.set_mapsize(new_size)
            txn = db.begin(write=True)
            txn = put_or_grow(db, txn, key, value)
            return txn

        with tqdm.tqdm(total=size) as pbar:
            idx = -1
            db = None
            
            for idx, dp in enumerate(df):
                slice_idx = idx % N
                db  = dbs[slice_idx]
                txn = txns[slice_idx]
                
                slice_keys = all_slice_keys[slice_idx]
                
                txn = put_or_grow(db, txn, u'{:08}'.format(idx).encode('ascii'), dumps(dp))
                slice_sizes[slice_idx] += 1
                key = u'{:08}'.format(idx).encode('ascii')
                slice_keys.append(key)

                pbar.set_postfix(s=str(slice_sizes))
                pbar.update()
                if (slice_sizes[slice_idx] + 1) % write_frequency == 0:
                    txn.commit()
                    txn = db.begin(write=True)
                txns[slice_idx] = txn

            print("Finished reading %d data points" %(idx+1))
            
            for i in range(N):
                db = dbs[i]
                txns[i].commit()
                
                slice_keys = all_slice_keys[i]
                with db.begin(write=True) as txn:
                    txn = put_or_grow(db, txn, b'__keys__', dumps(slice_keys))

                print("Flushing '%s' (%d keys) ..." %((paths[i]), len(slice_keys)) )
                db.sync()
                
        for db in dbs:
            db.close()
Esempio n. 37
0
 def request_unlock():
     sim2mgr_socket.send(dumps([self.name, SimulatorManager.MSG_TYPE.UNLOCK, []]))
     return loads(mgr2sim_socket.recv(copy=False).bytes)