def gen_Ball(info):
    seed, data_dir, data_names = info['seed'], info['data_dir'], info[
        'data_names']
    time_step, dt, n_ball = info['time_step'], info['dt'], info['n_ball']
    file_name = info['file_name']

    os.system('mkdir -p ' + data_dir)

    np.random.seed(seed)

    attr_dim = 1
    state_dim = 4
    action_dim = 2

    engine = BallEngine(dt, state_dim, action_dim=2)
    engine.init(n_ball)

    n_obj = engine.num_obj
    attrs_all = np.zeros((time_step, n_obj, attr_dim))
    states_all = np.zeros((time_step, n_obj, state_dim))
    actions_all = np.zeros((time_step, n_obj, action_dim))
    rel_attrs_all = np.zeros((time_step, engine.param_dim, 2))

    act = np.zeros((n_obj, 2))
    for j in range(time_step):
        state = engine.get_state()

        vel_dim = state_dim // 2
        pos = state[:, :vel_dim]
        vel = state[:, vel_dim:]

        if j > 0:
            vel = (pos - states_all[j - 1, :, :vel_dim]) / dt

        attrs = np.zeros((n_obj, attr_dim))
        attrs[:] = engine.radius

        attrs_all[j] = attrs
        states_all[j, :, :vel_dim] = pos
        states_all[j, :, vel_dim:] = vel
        rel_attrs_all[j] = engine.param

        # apply zero action
        engine.step(act)

        actions_all[j] = act.copy()

    datas = [attrs_all, states_all, actions_all, rel_attrs_all]
    store_data(data_names, datas, os.path.join(data_dir, '%s.h5' % file_name))
    '''
Exemple #2
0
def main():
    print('Provide data about your employee:\n')

    data_frame, data_list, column_name = u.store_data(
        os.path.dirname(os.path.realpath(__file__)) + '/Dane.csv')

    probability_list, probability_dictionary = u.specify_list_of_probabilities(
        data_frame, data_list, column_name)

    probability_cleared = u.count_final_probability(probability_list)

    u.give_recommendations(probability_cleared, probability_dictionary)
Exemple #3
0
def test_params(data_dir, nrows, low, high, len):
    frequency = np.random.choice(np.linspace(low, high, len), nrows)
    ufrequency = np.unique(frequency)

    store_data(os.path.join(data_dir, 'frequency.dat'), frequency)
    store_data(os.path.join(data_dir, 'ufrequency.dat'), ufrequency)

    start = time.time()
    vmap = np.array(get_rowmap(frequency, ufrequency))
    stop = time.time()
    print('Original Time:  {:.2f}s'.format(stop - start))

    store_data(os.path.join(data_dir, 'vmap.dat'), vmap)
Exemple #4
0
def gen_Cloth(info):
    env, env_idx = info['env'], info['env_idx']
    thread_idx, data_dir, data_names = info['thread_idx'], info['data_dir'], info['data_names']
    n_rollout, time_step = info['n_rollout'], info['time_step']
    dt, args, phase = info['dt'], info['args'], info['phase']
    vis_width, vis_height = info['vis_width'], info['vis_height']

    state_dim = args.state_dim
    action_dim = args.action_dim
    dt = 1. / 60.

    np.random.seed(round(time.time() * 1000 + thread_idx) % 2 ** 32)

    stats = [init_stat(state_dim), init_stat(action_dim)]

    engine = ClothEngine(dt, state_dim, action_dim)

    import pyflex
    pyflex.init()

    # bar = ProgressBar()
    for i in range(n_rollout):
        rollout_idx = thread_idx * n_rollout + i
        rollout_dir = os.path.join(data_dir, str(rollout_idx))
        os.system('mkdir -p ' + rollout_dir)

        engine.init(pyflex)

        scene_params = engine.scene_params

        action = np.zeros(4)
        states_all = np.zeros((time_step, engine.n_particles, state_dim))
        actions_all = np.zeros((time_step, 1, action_dim))

        # drop the cloth down
        engine.set_action(action)
        engine.step()

        for j in range(time_step):
            positions = pyflex.get_positions().reshape(-1, 4)[:, :3]

            # sample the action
            if j % 5 == 0:
                ctrl_pts = rand_int(0, 8)

                act_lim = 0.05
                dx = rand_float(-act_lim, act_lim)
                dz = rand_float(-act_lim, act_lim)
                dy = 0.05

                action = np.array([ctrl_pts, dx, dy, dz])

            else:
                action[2] = 0.

            # store the rollout information
            state = engine.get_state()
            states_all[j] = state

            tga_path = os.path.join(rollout_dir, '%d.tga' % j)
            pyflex.render(capture=True, path=tga_path)
            tga = Image.open(tga_path)
            tga = np.array(tga)[:, 60:780, :3][:, :, ::-1]
            tga = cv2.resize(tga, (vis_width, vis_height), interpolation=cv2.INTER_AREA)
            os.system('rm ' + tga_path)

            jpg_path = os.path.join(rollout_dir, 'fig_%d.jpg' % j)
            cv2.imwrite(jpg_path, tga)

            actions_all[j, 0] = action.copy()

            engine.set_action(action)
            engine.step()

        datas = [states_all, actions_all, scene_params]
        store_data(data_names, datas, rollout_dir + '.h5')

        datas = [datas[j].astype(np.float64) for j in range(len(datas))]

        for j in range(len(stats)):
            stat = init_stat(stats[j].shape[0])
            stat[:, 0] = np.mean(datas[j], axis=(0, 1))[:]
            stat[:, 1] = np.std(datas[j], axis=(0, 1))[:]
            stat[:, 2] = datas[j].shape[0]
            stats[j] = combine_stat(stats[j], stat)

    pyflex.clean()

    return stats
Exemple #5
0
    def gen_data(self):
        # if the data hasn't been generated, generate the data
        n_rollout, time_step, dt = self.n_rollout, self.args.time_step, self.args.dt
        assert n_rollout % self.args.num_workers == 0

        print("Generating data ... n_rollout=%d, time_step=%d" % (n_rollout, time_step))

        infos = []
        for i in range(self.args.num_workers):
            info = {'thread_idx': i,
                    'data_dir': self.data_dir,
                    'data_names': self.data_names,
                    'n_rollout': n_rollout // self.args.num_workers,
                    'time_step': time_step,
                    'dt': dt,
                    'video': self.args.video,
                    'image': self.args.image,
                    'draw_edge': self.args.draw_edge,
                    'phase': self.phase,
                    'args': self.args,
                    'vis_height': self.args.height_raw,
                    'vis_width': self.args.width_raw,
                    'save_type':self.args.h5}

            if self.args.env in ['Ball']:
                info['env'] = 'Ball'
                info['n_ball'] = self.args.n_ball
            elif self.args.env in ['Cloth']:
                info['env'] = 'Cloth'
                info['env_idx'] = 15

            infos.append(info)

        cores = self.args.num_workers
        pool = mp.Pool(processes=cores)

        env = self.args.env

        if env in ['Ball']:
            data = pool.map(gen_Ball, infos)

        elif env in ['Cloth']:
            data = pool.map(gen_Cloth, infos)
        else:
            raise AssertionError("Unknown env")

        print("Training data generated, warpping up stats ...")
   
        if self.phase == 'train':
            if env in ['Ball']:
                self.stat = [init_stat(self.args.attr_dim),
                             init_stat(self.args.state_dim),
                             init_stat(self.args.action_dim)]
            elif env in ['Cloth']:
                self.stat = [init_stat(self.args.state_dim),
                             init_stat(self.args.action_dim)]

            if self.args.h5 != 0 :
                data_, graph, trajectories = [core[0] for core in data], [core[1] for core in data], [core[2] for core in data]
            else:
                data_ = data

            for i in range(len(data_)):
                for j in range(len(self.stat)):
                    self.stat[j] = combine_stat(self.stat[j], data_[i][j])
            if self.args.h5 !=0 :
                store_trajectories(trajectories,self.args.dataf)
                store_graph(graph,self.args.dataf)
            store_data(self.data_names[:len(self.stat)], self.stat, self.stat_path)

        else:
            print("Loading stat from %s ..." % self.stat_path)
            self.stat = load_data(self.data_names, self.stat_path)
Exemple #6
0
                # if record_exist(date):
                if os.path.isfile(os.path.join(DUMP_DIRECTORY,
                                               f'{date}.json')):
                    logger.debug(f'"{str(date)}.json" exist, continue')
                    continue

                entries = dom.find('div.themeform').find('p')
                total, new = parse_confirmed_total(entries)

                persons = []
                entries = dom.find('div.themeform').find('li')
                checker = new
                for entry in entries:
                    person = parse_infected_info(entry)
                    if person:
                        persons.append(person)
                        checker -= 1
                    if checker == 0:
                        break
                store_data(
                    os.path.join(DUMP_DIRECTORY, f'{date}.json'), {
                        'day': date,
                        'total': total,
                        'diff': checker,
                        'new': new,
                        'persons': persons
                    })

            except IndexError:
                logger.debug('An index error has been found')
Exemple #7
0
def evaluate(roll_idx, video=True, image=True):

    eval_path = os.path.join(args.evalf, str(roll_idx))

    n_split = 4
    split = 4

    if image:
        os.system('mkdir -p ' + eval_path)
        print('Save images to %s' % eval_path)

    if video:
        video_path = eval_path + '.avi'
        fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G')
        print('Save video as %s' % video_path)
        frame_rate = 25 if args.env in ['Ball'] else 60
        out = cv2.VideoWriter(video_path, fourcc, frame_rate, (
            400 * n_split + split * (n_split - 1), 400))

    # load images
    imgs = []
    suffix = '.png' if args.env in ['Ball'] else '.jpg'
    for i in range(args.eval_st_idx, args.eval_ed_idx):
        img_path = os.path.join(data_dir, str(roll_idx), 'fig_%d%s' % (i, suffix))
        img = loader(img_path)

        img = resize_and_crop('valid', img, args.scale_size, args.crop_size)
        img = trans_to_tensor(img).unsqueeze(0).cuda()
        imgs.append(img)

    imgs = torch.cat(imgs, 0)


    '''
    model prediction
    '''

    loss_rec_acc = 0.
    loss_kp_acc = 0.
    for i in range(args.eval_ed_idx - args.eval_st_idx):

        if args.stage == 'kp':
            img = imgs[i:i+1]

            if i == 0:
                src = img.clone()

            with torch.set_grad_enabled(False):
                # reconstruct the target image using the source image
                img_pred, _, _ = model_kp(src, img)
                # predict the position of the keypoints
                keypoint = model_kp.predict_keypoint(img)
                # transform the keypoints to the heatmap
                heatmap = model_kp.keypoint_to_heatmap(keypoint, inv_std=args.inv_std)

            if args.store_result == 1:
                timesteps = args.eval_ed_idx - args.eval_st_idx
                if i == 0:
                    store_kp_result = np.zeros((timesteps, args.n_kp, 2))

                store_kp_result[i] = to_np(keypoint[0])

                if i == timesteps - 1:
                    store_data(['keypoints'], [store_kp_result], os.path.join(data_store_dir, '%d.h5' % roll_idx))

        if args.store_demo == 1:
            # transform the numpy
            img_pred = to_np(torch.clamp(img_pred, -1., 1.))[0].transpose(1, 2, 0)[:, :, ::-1]
            img_pred = (img_pred * 0.5 + 0.5) * 255.
            img_pred = cv2.resize(img_pred, (400, 400))

            lim = args.lim
            keypoint = to_np(keypoint)[0] - [lim[0], lim[2]]
            keypoint *= 400 / 2.
            keypoint = np.round(keypoint).astype(np.int)

            heatmap = to_np(heatmap)[0].transpose((1, 2, 0))
            heatmap = np.sum(heatmap, 2)

            # cv2.imshow('heatmap', heatmap)
            # cv2.waitKey(0)

            heatmap = np.clip(heatmap * 255., 0., 255.)
            heatmap = cv2.resize(heatmap, (400, 400), interpolation=cv2.INTER_NEAREST)
            heatmap = np.expand_dims(heatmap, -1)

            # generate the visualization
            img_path = os.path.join(data_dir, str(roll_idx), 'fig_%d%s' % (i + args.eval_st_idx, suffix))
            img = cv2.imread(img_path)
            img = cv2.resize(img, (400, 400)).astype(np.float)
            img_overlay = img.copy()
            kp_map = np.zeros((img.shape[0], img.shape[1], 3))

            c = [(255, 105, 65), (0, 69, 255), (50, 205, 50), (0, 165, 255), (238, 130, 238),
                 (128, 128, 128), (30, 105, 210), (147, 20, 255), (205, 90, 106), (0, 215, 255)]

            if args.env in ['Ball']:
                for j in range(keypoint.shape[0]):
                    cv2.circle(kp_map, (keypoint[j, 0], keypoint[j, 1]), 12, c[j], -1)
                    cv2.circle(kp_map, (keypoint[j, 0], keypoint[j, 1]), 12, (255, 255, 255), 1)
                    cv2.circle(img_overlay, (keypoint[j, 0], keypoint[j, 1]), 12, c[j], -1)
                    cv2.circle(img_overlay, (keypoint[j, 0], keypoint[j, 1]), 12, (255, 255, 255), 1)
            elif args.env in ['Cloth']:
                for j in range(keypoint.shape[0]):
                    cv2.circle(kp_map, (keypoint[j, 0], keypoint[j, 1]), 8, c[j], -1)
                    cv2.circle(kp_map, (keypoint[j, 0], keypoint[j, 1]), 8, (255, 255, 255), 1)
                    cv2.circle(img_overlay, (keypoint[j, 0], keypoint[j, 1]), 8, c[j], -1)
                    cv2.circle(img_overlay, (keypoint[j, 0], keypoint[j, 1]), 8, (255, 255, 255), 1)

            merge = np.zeros((img.shape[0], img.shape[1] * n_split + split * (n_split - 1), 3)) * 255.

            if args.stage == 'kp':
                merge[:, :img.shape[1]] = img
                merge[:, img.shape[1] + 4 : img.shape[1] * 2 + 4] = img_overlay
                merge[:, img.shape[1] * 2 + 8 : img.shape[1] * 3 + 8] = heatmap
                merge[:, img.shape[1] * 3 + 12 : img.shape[1] * 4 + 12] = img_pred

            merge = merge.astype(np.uint8)

            if image:
                cv2.imwrite(os.path.join(eval_path, 'fig_%d.png' % i), merge)

            if video:
                out.write(merge)

    if video:
        out.release()
Exemple #8
0
    def gen_data(self):
        # if the data hasn't been generated, generate the data
        n_rollout, time_step, dt = self.n_rollout, self.args.time_step, self.args.dt
        assert n_rollout % self.args.num_workers == 0

        print("Generating data ... n_rollout=%d, time_step=%d" %
              (n_rollout, time_step))

        infos = []
        for i in range(self.args.num_workers):
            info = {
                'thread_idx': i,
                'data_dir': self.data_dir,
                'data_names': self.data_names,
                'n_rollout': n_rollout // self.args.num_workers,
                'time_step': time_step,
                'dt': dt,
                'video': False,
                'phase': self.phase,
                'args': self.args
            }

            infos.append(info)

        cores = self.args.num_workers
        pool = mp.Pool(processes=cores)

        env = self.args.env

        if env == 'Rope':
            data = pool.map(gen_Rope, infos)
        elif env == 'Soft':
            data = pool.map(gen_Soft, infos)
        elif env == 'Swim':
            data = pool.map(gen_Swim, infos)
        else:
            raise AssertionError("Unknown env")

        print("Training data generated, warpping up stats ...")

        if self.phase == 'train':
            # states [x, y, angle, xdot, ydot, angledot], action [x, xdot]
            if env in ['Rope', 'Soft', 'Swim']:
                self.stat = [
                    init_stat(self.args.attr_dim),
                    init_stat(self.args.state_dim),
                    init_stat(self.args.action_dim)
                ]

            for i in range(len(data)):
                for j in range(len(self.stat)):
                    self.stat[j] = combine_stat(self.stat[j], data[i][j])

            if self.args.gen_stat:
                print("Storing stat to %s" % self.stat_path)
                store_data(self.data_names, self.stat, self.stat_path)
            else:
                print("stat will be discarded")
        else:
            print("Loading stat from %s ..." % self.stat_path)

            if env in ['Rope', 'Soft', 'Swim']:
                self.stat = load_data(self.data_names, self.stat_path)
Exemple #9
0
def gen_Cradle(info):
    thread_idx, data_dir, data_names = info['thread_idx'], info[
        'data_dir'], info['data_names']
    n_particle, n_rollout, time_step = info['n_particle'], info[
        'n_rollout'], info['time_step']
    dt, args = info['dt'], info['args']

    np.random.seed(round(time.time() * 1000 + thread_idx) % 2**32)

    attr_dim = args.attr_dim  # ball, anchor
    state_dim = args.state_dim  # x, y, xdot, ydot
    assert attr_dim == 2
    assert state_dim == 4

    lim = 300
    attr_dim = 2
    state_dim = 4
    relation_dim = 4

    stats = [init_stat(attr_dim), init_stat(state_dim)]

    engine = CradleEngine(dt)

    n_objects = n_particle * 2  # add the same number of anchor points
    attrs = np.zeros((n_rollout, time_step, n_objects, attr_dim))
    states = np.zeros((n_rollout, time_step, n_objects, state_dim))

    bar = ProgressBar()
    for i in bar(range(n_rollout)):
        rollout_idx = thread_idx * n_rollout + i
        rollout_dir = os.path.join(data_dir, str(rollout_idx))
        os.system('mkdir -p ' + rollout_dir)

        theta = rand_float(0, 90)
        engine.reset_scene(n_particle, theta)

        for j in range(time_step):
            states[i, j] = engine.get_state()
            if j > 0:
                states[i, j, :, 2:] = (states[i, j, :, :2] -
                                       states[i, j - 1, :, :2]) / dt

            attrs[i, j, :n_particle, 0] = 1  # balls
            attrs[i, j, n_particle:, 1] = 1  # anchors

            data = [attrs[i, j], states[i, j]]
            store_data(data_names, data,
                       os.path.join(rollout_dir,
                                    str(j) + '.h5'))

            engine.step()

        datas = [attrs[i].astype(np.float64), states[i].astype(np.float64)]

        for j in range(len(stats)):
            stat = init_stat(stats[j].shape[0])
            stat[:, 0] = np.mean(datas[j], axis=(0, 1))[:]
            stat[:, 1] = np.std(datas[j], axis=(0, 1))[:]
            stat[:, 2] = datas[j].shape[0]
            stats[j] = combine_stat(stats[j], stat)

    return stats
    def recommendTo(self, to, model_path, topK):
        user_id = get_user_id_by_email(to)
        keywords_data = get_data('rec_user_keywords', 'keywords', user_id,
                                 'user_id = %s')
        if not keywords_data:
            return

        sc = SimilarityCalculator(model_path)

        index, num = get_index_and_num(user_id)
        data = get_data('rec_arxiv_paper',
                        ['id', 'arxiv', 'title', 'abstract'], [index, user_id],
                        "rec_arxiv_paper.id >= %s AND \
                (SELECT COUNT(1) FROM rec_user_arxiv_preference WHERE \
                user_id = %s AND \
                rec_arxiv_paper.id = rec_user_arxiv_preference.paper_id) = 0",
                        limit=num + 100)

        paper_data = pd.DataFrame(data,
                                  columns=['id', 'arxiv', 'title', 'abstract'])
        # paper_data['contain_keywords'] = ''

        # idx_contain_kw = set()

        keywords = [' ' + kw[0] for kw in keywords_data]
        # for kw in keywords:
        #     title_cont = paper_data['title'].str.lower().str.contains(kw)
        #     abstract_cont = paper_data['abstract'].str.lower().str.contains(kw)
        #     cont = title_cont | abstract_cont
        #     paper_data.loc[cont, 'contain_keywords'] = paper_data.loc[cont, 'contain_keywords'] + kw + ';'
        #     idx_contain_kw = idx_contain_kw | set(title_cont[title_cont == True].index.tolist())
        #     idx_contain_kw = idx_contain_kw | set(abstract_cont[abstract_cont == True].index.tolist())
        # idx_not_contain_kw = set(paper_data.index.tolist()) - idx_contain_kw

        # paper_data_kw = paper_data.loc[list(idx_contain_kw)].reset_index(drop=True) # Don't insert index column to the df.
        # paper_data_no_kw = paper_data.loc[list(idx_not_contain_kw)].reset_index(drop=True)

        try:
            template = EmailTemplate()
            data = get_data(['rec_user_field_paper', 'rec_field_paper'],
                            ['title', 'abstract'],
                            user_id,
                            'rec_user_field_paper.user_id = %s AND \
                    rec_user_field_paper.field_paper_id = rec_field_paper.id',
                            option='all')
            if data:
                field_data = pd.DataFrame(data, columns=['title', 'abstract'])
                # kw_result = sc.get_top_k(paper_data_kw, field_data, int(topK * 0.5), None)
                # no_kw_result = sc.get_top_k(paper_data_no_kw, field_data, int(topK * 0.5), None)
                no_kw_result = sc.get_top_k(paper_data, field_data, 20, None)
                # results = pd.concat([kw_result, no_kw_result], axis=0)
                results = no_kw_result
                template.fill_paper(user_id, results, keywords)
            else:
                return
        except ValueError as e:
            print(e)
            template = 'Unknown Message.'

        error_times = 0
        while error_times < 5:
            try:
                self._send_email(to, template)
                break
            except:
                error_times += 1
                time.sleep(10)

        results = results[['id']]
        results['user_id'] = user_id
        results['islike'] = -1
        store_data('rec_user_arxiv_preference',
                   ['paper_id', 'user_id', 'islike'], results.values.tolist())
Exemple #11
0
def gen_Box(info):
    thread_idx, data_dir, data_names = info['thread_idx'], info[
        'data_dir'], info['data_names']
    n_rollout, n_particle, time_step = info['n_rollout'], info[
        'n_particle'], info['time_step']
    dt, args = info['dt'], info['args']

    np.random.seed(round(time.time() * 1000 + thread_idx) % 2**32)

    state_dim = args.state_dim  # x, y, angle, xdot, ydot, angledot
    action_dim = args.action_dim  # x, xdot
    assert state_dim == 6
    assert action_dim == 2

    stats = [init_stat(state_dim), init_stat(action_dim)]

    engine = BoxEngine(dt, state_dim, action_dim)

    states = np.zeros((n_rollout, time_step, n_particle, state_dim))
    actions = np.zeros((n_rollout, time_step, 1, action_dim))
    viss = np.zeros((n_rollout, time_step, n_particle))

    bar = ProgressBar()
    for i in bar(range(n_rollout)):
        rollout_idx = thread_idx * n_rollout + i
        rollout_dir = os.path.join(data_dir, str(rollout_idx))
        os.system('mkdir -p ' + rollout_dir)

        engine.reset_scene(n_particle)

        for j in range(time_step):
            engine.set_action(rand_float(-600., 100.))

            states[i, j] = engine.get_state()
            actions[i, j] = engine.get_action()
            viss[i, j] = engine.get_vis(states[i, j])

            if j > 0:
                states[i, j, :, 3:] = (states[i, j, :, :3] -
                                       states[i, j - 1, :, :3]) / dt
                actions[i, j, :, 1] = (actions[i, j, :, 0] -
                                       actions[i, j - 1, :, 0]) / dt

            data = [states[i, j], actions[i, j], viss[i, j]]

            store_data(data_names, data,
                       os.path.join(rollout_dir,
                                    str(j) + '.h5'))

            engine.step()

        datas = [states[i].astype(np.float64), actions[i].astype(np.float64)]

        for j in range(len(stats)):
            stat = init_stat(stats[j].shape[0])
            stat[:, 0] = np.mean(datas[j], axis=(0, 1))[:]
            stat[:, 1] = np.std(datas[j], axis=(0, 1))[:]
            stat[:, 2] = datas[j].shape[0]
            stats[j] = combine_stat(stats[j], stat)

    return stats
Exemple #12
0
def scrape_links(time_before_new_changed, title=None, links=None,
        unscraped_links_filename=os.path.join(
            '..', 'data', 'links', 'links_unscraped.txt'), 
        done_links_filename=os.path.join(
            '..', 'data', 'links', 'done_links.txt')):
    """Scrape links from pages on candidate URLs and retrieve any synonyms."""
    start_time = time.time()
    done_links = get_done_links(done_links_filename)
    if links == None:
        links = get_unscraped_links(unscraped_links_filename, done_links)
    syn_count = len(os.listdir(os.path.join('..', 'data', 'synonyms_new')))
    print('Found {} synonym-files at start of while-loop.\n'.format(syn_count))
    while links:
        if time.time() > start_time + time_before_new_changed:
            print('Time {} seconds exceeded; getting new changed links.'.
                    format(time_before_new_changed))
            links = get_recent_changes(links, done_links)
            start_time = time.time()
        title = links.pop()
        try:
            page, _, synonyms, new_links = S.main(title)
        except KeyboardInterrupt:
            print('''\nWe met with KeyboardInterrupt; title: {}. '''.
                    format(title))
            exc_type, exc_value, exc_traceback = sys.exc_info()
            traceback.print_exception(exc_type, exc_value, exc_traceback)
            return links, done_links
        except TypeError:
            # TypeError: 'NoneType' object is not iterable
            # Usually because "HTTP Error 404: Not Found", so restore title.
            # But temporarily we are leaving titles unrestored, as we think
            # some of these were previously unfiltered `redlink=1" cases.
            links.add(title)
            try:
                done_links.remove(title)
            except KeyError:
                pass
            print('    {}'.format(title))
            continue
        except Exception:
            print('\nWe met with Exception; title: {}.'.
                    format(title))
            exc_type, exc_value, exc_traceback = sys.exc_info()
            traceback.print_exception(exc_type, exc_value, exc_traceback)
            print('\n')
            continue
        # Do not examine whether title in done_links; 
        # would prevent utility of "get_recent_changes()".
        # Assume new links are checked only when received from S.main().
        with open(done_links_filename, 'a') as f:
            f.write('\n' + title)
        if synonyms:
            _ = U.store_data(
                    json.dumps(synonyms).encode(), title, 
                    target_dir='synonyms_new', tar=False)
            syn_count = len(
                    os.listdir(os.path.join('..', 'data', 'synonyms_new')))
        links, new_links, done_links = update_links(
                links, new_links, done_links, title)
        print('''T: {}; links: + {:>3} => {:>}; done: {} ({}%); '''
              '''syn: + {} => {} ({}%);\n    {}'''.
                format(int(time.time() - start_time), len(new_links), 
                    len(links), len(done_links), 
                    round(
                        100 * len(done_links) / 
                        (len(done_links) + len(links)), 1), 
                    len(synonyms), syn_count, 
                    round(100 * syn_count / len(done_links), 1), 
                    title))
        # Uncomment the following line to save whole pages (compressed).
        # _ = U.store_data(page, title, target_dir='html_new', tar=True)
#         # Write the whole of "links": "title" removed, "new_links" added.
#         try:
#             with open(unscraped_links_filename, 'w') as f:
#                 f.write('\n'.join(links))
#         except KeyboardInterrupt:
#             print('''\nWe met with KeyboardInterrupt; title: {}. '''.
#                     format(title))
#             exc_type, exc_value, exc_traceback = sys.exc_info()
#             traceback.print_exception(exc_type, exc_value, exc_traceback)
#             return links, done_links
    return links, done_links
Exemple #13
0
def scrape_links(time_before_new_changed,
                 title=None,
                 links=None,
                 unscraped_links_filename=os.path.join('..', 'data', 'links',
                                                       'links_unscraped.txt'),
                 done_links_filename=os.path.join('..', 'data', 'links',
                                                  'done_links.txt')):
    """Scrape links from pages on candidate URLs and retrieve any synonyms."""
    start_time = time.time()
    done_links = get_done_links(done_links_filename)
    if links == None:
        links = get_unscraped_links(unscraped_links_filename, done_links)
    syn_count = len(os.listdir(os.path.join('..', 'data', 'synonyms_new')))
    print('Found {} synonym-files at start of while-loop.\n'.format(syn_count))
    while links:
        if time.time() > start_time + time_before_new_changed:
            print(
                'Time {} seconds exceeded; getting new changed links.'.format(
                    time_before_new_changed))
            links = get_recent_changes(links, done_links)
            start_time = time.time()
        title = links.pop()
        try:
            page, _, synonyms, new_links = S.main(title)
        except KeyboardInterrupt:
            print('''\nWe met with KeyboardInterrupt; title: {}. '''.format(
                title))
            exc_type, exc_value, exc_traceback = sys.exc_info()
            traceback.print_exception(exc_type, exc_value, exc_traceback)
            return links, done_links
        except TypeError:
            # TypeError: 'NoneType' object is not iterable
            # Usually because "HTTP Error 404: Not Found", so restore title.
            # But temporarily we are leaving titles unrestored, as we think
            # some of these were previously unfiltered `redlink=1" cases.
            links.add(title)
            try:
                done_links.remove(title)
            except KeyError:
                pass
            print('    {}'.format(title))
            continue
        except Exception:
            print('\nWe met with Exception; title: {}.'.format(title))
            exc_type, exc_value, exc_traceback = sys.exc_info()
            traceback.print_exception(exc_type, exc_value, exc_traceback)
            print('\n')
            continue
        # Do not examine whether title in done_links;
        # would prevent utility of "get_recent_changes()".
        # Assume new links are checked only when received from S.main().
        with open(done_links_filename, 'a') as f:
            f.write('\n' + title)
        if synonyms:
            _ = U.store_data(json.dumps(synonyms).encode(),
                             title,
                             target_dir='synonyms_new',
                             tar=False)
            syn_count = len(
                os.listdir(os.path.join('..', 'data', 'synonyms_new')))
        links, new_links, done_links = update_links(links, new_links,
                                                    done_links, title)
        print('''T: {}; links: + {:>3} => {:>}; done: {} ({}%); '''
              '''syn: + {} => {} ({}%);\n    {}'''.format(
                  int(time.time() - start_time), len(new_links), len(links),
                  len(done_links),
                  round(100 * len(done_links) / (len(done_links) + len(links)),
                        1), len(synonyms), syn_count,
                  round(100 * syn_count / len(done_links), 1), title))
        # Uncomment the following line to save whole pages (compressed).
        # _ = U.store_data(page, title, target_dir='html_new', tar=True)


#         # Write the whole of "links": "title" removed, "new_links" added.
#         try:
#             with open(unscraped_links_filename, 'w') as f:
#                 f.write('\n'.join(links))
#         except KeyboardInterrupt:
#             print('''\nWe met with KeyboardInterrupt; title: {}. '''.
#                     format(title))
#             exc_type, exc_value, exc_traceback = sys.exc_info()
#             traceback.print_exception(exc_type, exc_value, exc_traceback)
#             return links, done_links
    return links, done_links
Exemple #14
0
 def quit_game(self):
     utils.store_data(self)
     utils.log('Quit the game.')
     pg.quit()
     exit()
Exemple #15
0
                    date = parse_date(content)

                    # Si existe el fichero ya el parte se ha registrado
                    if record_exist(date):
                        logger.debug(f'"{str(date)}.json" exist, continue')
                        continue

                    entries = dom.find('div.themeform').find('p')
                    total, new = parse_confirmed_total(entries)

                    persons = []
                    entries = dom.find('div.themeform').find('li')
                    checker = new
                    for entry in entries:
                        person = parse_infected_info(entry)
                        if person:
                            persons.append(person)
                        checker -= 1
                        if checker == 0:
                            break
                    # print({'day': date, 'total': total, 'new': new, 'persons': persons})
                    # open(f'{str(date)}.json', 'w').write(str({'day': date, 'total': total, 'new': new, 'persons': persons})
                    store_data(f'{str(date)}.json', {
                        'day': date,
                        'total': total,
                        'new': new,
                        'persons': persons
                    })
            except IndexError:
                logger.debug('An index error has been found')
Exemple #16
0
    def gen_data(self):
        # if the data hasn't been generated, generate the data
        n_rollout, n_particle = self.n_rollout, self.args.n_particle
        time_step, dt = self.args.time_step, self.args.dt

        print("Generating data ... n_rollout=%d, time_step=%d" %
              (n_rollout, time_step))

        infos = []
        for i in range(self.args.num_workers):
            info = {
                'thread_idx': i,
                'data_dir': self.data_dir,
                'data_names': self.data_names,
                'n_particle': n_particle,
                'n_rollout': n_rollout // self.args.num_workers,
                'time_step': time_step,
                'dt': dt,
                'args': self.args
            }

            infos.append(info)

        cores = self.args.num_workers
        pool = mp.Pool(processes=cores)

        env = self.args.env

        if env == 'Cradle':
            data = pool.map(gen_Cradle, infos)
        elif env == 'Rope':
            data = pool.map(gen_Rope, infos)
        elif env == 'Box':
            data = pool.map(gen_Box, infos)
        else:
            raise AssertionError("Unknown env")

        print("Training data generated, warpping up stats ...")

        if self.phase == 'train' and self.args.gen_stat:
            if env in ['Cradle']:
                self.stat = [
                    init_stat(self.args.attr_dim),
                    init_stat(self.args.state_dim)
                ]
            elif env in ['Rope']:
                self.stat = [
                    init_stat(self.args.attr_dim),
                    init_stat(self.args.state_dim),
                    init_stat(self.args.action_dim)
                ]
            elif env in ['Box']:
                self.stat = [
                    init_stat(self.args.state_dim),
                    init_stat(self.args.action_dim)
                ]

            for i in range(len(data)):
                for j in range(len(self.stat)):
                    self.stat[j] = combine_stat(self.stat[j], data[i][j])

            store_data(self.data_names[:len(self.stat)], self.stat,
                       self.stat_path)

        else:
            print("Loading stat from %s ..." % self.stat_path)

            if env in ['Cradle', 'Rope']:
                self.stat = load_data(self.data_names, self.stat_path)
            elif env in ['Box']:
                self.stat = load_data(self.data_names[:2], self.stat_path)
Exemple #17
0
def gen_Ball(info):
    thread_idx, data_dir, data_names = info['thread_idx'], info['data_dir'], info['data_names']
    n_rollout, time_step = info['n_rollout'], info['time_step']
    dt, video, image, draw_edge, args, phase = info['dt'], info['video'], info['image'], info['draw_edge'], info['args'], info['phase']
    n_ball = info['n_ball']
    save_type = info['save_type']

    np.random.seed(round(time.time() * 1000 + thread_idx) % 2 ** 32)

    attr_dim = args.attr_dim    # radius
    state_dim = args.state_dim  # x, y, xdot, ydot
    action_dim = 2              # ddx, ddy

    stats = [init_stat(attr_dim), init_stat(state_dim), init_stat(action_dim)]
    traj = []
    graph = None
    engine = BallEngine(dt, state_dim, action_dim=2)

    # bar = ProgressBar()
    for i in range(n_rollout):
        rollout_idx = thread_idx * n_rollout + i
        rollout_dir = os.path.join(data_dir, str(rollout_idx))
        if save_type==0:
            os.system('mkdir -p ' + rollout_dir)

        engine.init(n_ball,param_load=args.load_rels) # changed this
        n_obj = engine.num_obj
        attrs_all = np.zeros((time_step, n_obj, attr_dim))
        states_all = np.zeros((time_step, n_obj, state_dim))
        actions_all = np.zeros((time_step, n_obj, action_dim))
        rel_attrs_all = np.zeros((time_step, engine.param_dim, 2))

        act = np.zeros((n_obj, 2))
        for j in range(time_step):
            state = engine.get_state()
            vel_dim = state_dim // 2
            pos = state[:, :vel_dim]
            vel = state[:, vel_dim:]

            if j > 0:
                vel = (pos - states_all[j - 1, :, :vel_dim]) / dt
            attrs = np.zeros((n_obj, attr_dim))
            attrs[:] = engine.radius

            attrs_all[j] = attrs
            states_all[j, :, :vel_dim] = pos
            states_all[j, :, vel_dim:] = vel
            rel_attrs_all[j] = engine.param

            act += (np.random.rand(n_obj, 2) - 0.5) * 600 - act * 0.1 - state[:, 2:] * 0.1
            act = np.clip(act, -1000, 1000)
            engine.step(act)

            actions_all[j] = act.copy()

        
        datas = [attrs_all, states_all, actions_all, rel_attrs_all]
        traj.append(states_all.astype(np.float64))
        graph = rel_attrs_all
        if save_type == 0:
            store_data(data_names, datas, rollout_dir + '.h5')   # stores the rollout
            engine.render(states_all, actions_all, engine.get_param(), video=video, image=image,
                      path=rollout_dir, draw_edge=draw_edge, verbose=True)

        datas = [datas[i].astype(np.float64) for i in range(len(datas))]
        
        for j in range(len(stats)):
            stat = init_stat(stats[j].shape[0])
            stat[:, 0] = np.mean(datas[j], axis=(0, 1))[:]
            stat[:, 1] = np.std(datas[j], axis=(0, 1))[:]
            stat[:, 2] = datas[j].shape[0]
            stats[j] = combine_stat(stats[j], stat)
    if save_type == 0:
        return stats
    else:
        return stats, graph, traj
Exemple #18
0
def gen_Swim(info):
    thread_idx, data_dir, data_names = info['thread_idx'], info[
        'data_dir'], info['data_names']
    n_rollout, time_step = info['n_rollout'], info['time_step']
    dt, video, args, phase = info['dt'], info['video'], info['args'], info[
        'phase']

    np.random.seed(round(time.time() * 1000 + thread_idx) % 2**32)

    attr_dim = args.attr_dim  # actuated, soft, rigid
    state_dim = args.state_dim  # x, y, xdot, ydot
    action_dim = args.action_dim
    param_dim = args.param_dim  # n_box, k, damping, init_p

    act_scale = 500.
    act_delta = 250.

    # attr, state, action
    stats = [init_stat(attr_dim), init_stat(state_dim), init_stat(action_dim)]

    engine = SwimEngine(dt, state_dim, action_dim, param_dim)

    group_size = args.group_size
    sub_dataset_size = n_rollout * args.num_workers // args.n_splits
    print('group size', group_size, 'sub_dataset_size', sub_dataset_size)
    assert n_rollout % group_size == 0
    assert args.n_rollout % args.n_splits == 0

    bar = ProgressBar()
    for i in bar(range(n_rollout)):
        rollout_idx = thread_idx * n_rollout + i
        group_idx = rollout_idx // group_size
        sub_idx = rollout_idx // sub_dataset_size

        num_obj_range = args.num_obj_range if phase in {
            'train', 'valid'
        } else args.extra_num_obj_range
        num_obj = num_obj_range[sub_idx]

        rollout_dir = os.path.join(data_dir, str(rollout_idx))
        param_file = os.path.join(data_dir, str(group_idx) + '.param')
        os.system('mkdir -p ' + rollout_dir)

        if rollout_idx % group_size == 0:
            init_p = None if not args.regular_data else sample_init_p_flight(
                n_box=num_obj, aug=True, train=phase == 'train')
            engine.init(param=(num_obj, None, None, init_p))
            torch.save(engine.get_param(), param_file)
        else:
            while not os.path.isfile(param_file):
                time.sleep(0.5)
            param = torch.load(param_file)
            engine.init(param=param)

        act_t_param = np.zeros((engine.n_box, 3))

        for j in range(time_step):
            box_type = engine.init_p[:, 2]
            act_t = np.zeros((engine.n_box, action_dim))

            for k in range(engine.n_box):
                if box_type[k] == 0:
                    # if this is an actuated box
                    if j == 0:
                        act_t_param[k] = np.array([
                            rand_float(0., 1.),
                            rand_float(1., 2.5),
                            rand_float(0, np.pi * 2)
                        ])

                    if act_t_param[k, 0] < 0.3:
                        # using smooth action
                        if j == 0:
                            act_t[k] = rand_float(-act_delta, act_delta)
                        else:
                            lo = max(actions_all[j - 1, k] - act_delta,
                                     -act_scale - 20)
                            hi = min(actions_all[j - 1, k] + act_delta,
                                     act_scale + 20)
                            act_t[k] = rand_float(lo, hi)
                            act_t[k] = np.clip(act_t[k], -act_scale, act_scale)

                    elif act_t_param[k, 0] < 0.6:
                        # using random action
                        act_t[k] = rand_float(-act_scale, act_scale)

                    else:
                        # using sin action
                        act_t[k] = np.sin(j / act_t_param[k, 1] + act_t_param[k, 2]) * \
                                rand_float(act_scale / 2., act_scale)

            engine.set_action(act_t)

            states = engine.get_state()
            actions = engine.get_action()

            pos = states[:, :8].copy()
            vec = states[:, 8:].copy()
            '''reset velocity'''
            if j > 0:
                vec = (pos - states_all[j - 1, :, :8]) / dt

            if j == 0:
                attrs_all = np.zeros((time_step, num_obj, attr_dim))
                states_all = np.zeros((time_step, num_obj, state_dim))
                actions_all = np.zeros((time_step, num_obj, action_dim))
            '''attrs: actuated/soft/rigid'''
            assert attr_dim == 3
            attrs = np.zeros((num_obj, attr_dim))

            for k in range(engine.n_box):
                attrs[k, int(engine.init_p[k, 2])] = 1

            assert np.sum(attrs[:, 0]) == np.sum(engine.init_p[:, 2] == 0)
            assert np.sum(attrs[:, 1]) == np.sum(engine.init_p[:, 2] == 1)
            assert np.sum(attrs[:, 2]) == np.sum(engine.init_p[:, 2] == 2)

            attrs_all[j] = attrs
            states_all[j, :, :8] = pos
            states_all[j, :, 8:] = vec
            actions_all[j] = actions

            data = [attrs, states_all[j], actions_all[j]]

            store_data(data_names, data,
                       os.path.join(rollout_dir,
                                    str(j) + '.h5'))

            engine.step()

        datas = [
            attrs_all.astype(np.float64),
            states_all.astype(np.float64),
            actions_all.astype(np.float64)
        ]

        for j in range(len(stats)):
            stat = init_stat(stats[j].shape[0])
            stat[:, 0] = np.mean(datas[j], axis=(0, 1))[:]
            stat[:, 1] = np.std(datas[j], axis=(0, 1))[:]
            stat[:, 2] = datas[j].shape[0]
            stats[j] = combine_stat(stats[j], stat)

    return stats
Exemple #19
0
def gen_Rope(info):
    thread_idx, data_dir, data_names = info['thread_idx'], info[
        'data_dir'], info['data_names']
    n_rollout, time_step = info['n_rollout'], info['time_step']
    dt, video, args, phase = info['dt'], info['video'], info['args'], info[
        'phase']

    np.random.seed(round(time.time() * 1000 + thread_idx) % 2**32)

    attr_dim = args.attr_dim  # root, child
    state_dim = args.state_dim  # x, y, xdot, ydot
    action_dim = args.action_dim
    param_dim = args.param_dim  # n_ball, init_x, k, damping, gravity

    act_scale = 2.
    ret_scale = 1.

    # attr, state, action
    stats = [init_stat(attr_dim), init_stat(state_dim), init_stat(action_dim)]

    engine = RopeEngine(dt, state_dim, action_dim, param_dim)

    group_size = args.group_size
    sub_dataset_size = n_rollout * args.num_workers // args.n_splits
    print('group size', group_size, 'sub_dataset_size', sub_dataset_size)
    assert n_rollout % group_size == 0
    assert args.n_rollout % args.n_splits == 0

    bar = ProgressBar()
    for i in bar(range(n_rollout)):
        rollout_idx = thread_idx * n_rollout + i
        group_idx = rollout_idx // group_size
        sub_idx = rollout_idx // sub_dataset_size

        num_obj_range = args.num_obj_range if phase in {
            'train', 'valid'
        } else args.extra_num_obj_range
        num_obj = num_obj_range[sub_idx]

        rollout_dir = os.path.join(data_dir, str(rollout_idx))

        param_file = os.path.join(data_dir, str(group_idx) + '.param')

        os.system('mkdir -p ' + rollout_dir)

        if rollout_idx % group_size == 0:
            engine.init(param=(num_obj, None, None, None, None))
            torch.save(engine.get_param(), param_file)
        else:
            while not os.path.isfile(param_file):
                time.sleep(0.5)
            param = torch.load(param_file)
            engine.init(param=param)

        for j in range(time_step):
            states_ctl = engine.get_state()[0]
            act_t = np.zeros((engine.num_obj, action_dim))
            act_t[0, 0] = (np.random.rand() * 2 -
                           1.) * act_scale - states_ctl[0] * ret_scale

            engine.set_action(action=act_t)

            states = engine.get_state()
            actions = engine.get_action()

            n_obj = engine.num_obj

            pos = states[:, :2].copy()
            vec = states[:, 2:].copy()
            '''reset velocity'''
            if j > 0:
                vec = (pos - states_all[j - 1, :, :2]) / dt

            if j == 0:
                attrs_all = np.zeros((time_step, n_obj, attr_dim))
                states_all = np.zeros((time_step, n_obj, state_dim))
                actions_all = np.zeros((time_step, n_obj, action_dim))
            '''attrs: [1, 0] => root; [0, 1] => child'''
            assert attr_dim == 2
            attrs = np.zeros((n_obj, attr_dim))
            # category: the first ball is fixed
            attrs[0, 0] = 1
            attrs[1:, 1] = 1

            assert np.sum(attrs[:, 0]) == 1
            assert np.sum(attrs[:, 1]) == engine.num_obj - 1

            attrs_all[j] = attrs
            states_all[j, :, :2] = pos
            states_all[j, :, 2:] = vec
            actions_all[j] = actions

            data = [attrs, states_all[j], actions_all[j]]

            store_data(data_names, data,
                       os.path.join(rollout_dir,
                                    str(j) + '.h5'))

            engine.step()

        datas = [
            attrs_all.astype(np.float64),
            states_all.astype(np.float64),
            actions_all.astype(np.float64)
        ]

        for j in range(len(stats)):
            stat = init_stat(stats[j].shape[0])
            stat[:, 0] = np.mean(datas[j], axis=(0, 1))[:]
            stat[:, 1] = np.std(datas[j], axis=(0, 1))[:]
            stat[:, 2] = datas[j].shape[0]
            stats[j] = combine_stat(stats[j], stat)

    return stats
Exemple #20
0
def gen_Rope(info):
    thread_idx, data_dir, data_names = info['thread_idx'], info[
        'data_dir'], info['data_names']
    n_rollout, n_particle, time_step = info['n_rollout'], info[
        'n_particle'], info['time_step']
    dt, args = info['dt'], info['args']

    np.random.seed(round(time.time() * 1000 + thread_idx) % 2**32)

    attr_dim = args.attr_dim  # fixed, moving, radius
    state_dim = args.state_dim  # x, y, xdot, ydot
    action_dim = args.action_dim  # xddot, yddot
    assert attr_dim == 3
    assert state_dim == 4
    assert action_dim == 2

    act_scale = 15

    # attr, state, action
    stats = [init_stat(attr_dim), init_stat(state_dim), init_stat(action_dim)]

    engine = RopeEngine(dt, state_dim, action_dim)

    attrs = np.zeros((n_rollout, time_step, n_particle + 2, attr_dim))
    states = np.zeros((n_rollout, time_step, n_particle + 2, state_dim))
    actions = np.zeros((n_rollout, time_step, n_particle + 2, action_dim))

    bar = ProgressBar()
    for i in bar(range(n_rollout)):
        rollout_idx = thread_idx * n_rollout + i
        rollout_dir = os.path.join(data_dir, str(rollout_idx))
        os.system('mkdir -p ' + rollout_dir)

        engine.reset_scene(n_particle)

        act = np.zeros((n_particle, action_dim))
        for j in range(time_step):

            f = np.zeros(action_dim)
            for k in range(n_particle):
                f += (np.random.rand(action_dim) * 2 - 1) * act_scale
                act[k] = f

            engine.set_action(action=act)

            state = engine.get_state()
            action = engine.get_action()

            states[i, j, :n_particle] = state
            states[i, j, n_particle:, :2] = engine.c_positions
            actions[i, j, :n_particle] = action

            # reset velocity
            if j > 0:
                states[i, j, :, 2:] = (states[i, j, :, :2] -
                                       states[i, j - 1, :, :2]) / dt

            # attrs: [1, 0] => moving; [0, 1] => fixed
            n_obj = attrs.shape[2]
            attr = np.zeros((n_obj, attr_dim))
            attr[0, 1] = 1  # the first ball is fixed
            attr[1:n_particle, 0] = 1  # the rest of the balls is free to move
            attr[n_particle:, 1] = 1  # the cylinders are fixed
            attr[:n_particle, 2] = engine.radius
            attr[n_particle:, 2] = engine.c_radius
            # assert np.sum(attr[:, 0]) == 14
            assert np.sum(attr[:, 1]) == 3
            attrs[i, j] = attr

            data = [attr, states[i, j], actions[i, j]]

            store_data(data_names, data,
                       os.path.join(rollout_dir,
                                    str(j) + '.h5'))

            engine.step()

        datas = [
            attrs[i].astype(np.float64), states[i].astype(np.float64),
            actions[i].astype(np.float64)
        ]

        for j in range(len(stats)):
            stat = init_stat(stats[j].shape[0])
            stat[:, 0] = np.mean(datas[j], axis=(0, 1))[:]
            stat[:, 1] = np.std(datas[j], axis=(0, 1))[:]
            stat[:, 2] = datas[j].shape[0]
            stats[j] = combine_stat(stats[j], stat)

    return stats