def random_trim_length(self, length): assert length < len(self) logger.info("Randomly trim the dataset: #samples = {}.".format(length)) indices = list(random.choice(len(self), size=length, replace=False)) return type(self)(self, indices=indices, filter_name="randomtrim[{}]".format(length))
def main(): initialize_dataset(args.dataset) build_symbolic_dataset = get_symbolic_dataset_builder(args.dataset) dataset = build_symbolic_dataset(args) if args.nr_vis is None: args.nr_vis = min(100, len(dataset)) if args.random: indices = random.choice(len(dataset), size=args.nr_vis, replace=False) else: indices = list(range(args.nr_vis)) vis = HTMLTableVisualizer(args.data_vis_dir, 'Dataset: ' + args.dataset.upper()) vis.begin_html() with vis.table('Metainfo', [ HTMLTableColumnDesc('k', 'Key', 'text', {}, None), HTMLTableColumnDesc('v', 'Value', 'code', {}, None) ]): for k, v in args.__dict__.items(): vis.row(k=k, v=v) with vis.table('Visualize', [ HTMLTableColumnDesc('id', 'QuestionID', 'text', {}, None), HTMLTableColumnDesc('image', 'QA', 'figure', {'width': '100%'}, None), HTMLTableColumnDesc( 'qa', 'QA', 'text', css=None, td_css={'width': '30%'}), HTMLTableColumnDesc( 'p', 'Program', 'code', css=None, td_css={'width': '30%'}) ]): for i in tqdm(indices): feed_dict = GView(dataset[i]) image_filename = osp.join(args.data_image_root, feed_dict.image_filename) image = Image.open(image_filename) if 'objects' in feed_dict: fig, ax = vis_bboxes(image, feed_dict.objects, 'object', add_text=False) else: fig, ax = vis_bboxes(image, [], 'object', add_text=False) _ = ax.set_title('object bounding box annotations') QA_string = """ <p><b>Q</b>: {}</p> <p><b>A</b>: {}</p> """.format(feed_dict.question_raw, feed_dict.answer) P_string = '\n'.join([repr(x) for x in feed_dict.program_seq]) vis.row(id=i, image=fig, qa=QA_string, p=P_string) plt.close() vis.end_html() logger.info( 'Happy Holiday! You can find your result at "http://monday.csail.mit.edu/xiuming' + osp.realpath(args.data_vis_dir) + '".')
def restart(self, obstacles=None, start_point=None, final_point=None): assert obstacles is None, 'Can not provide obstacles to CustomLavaWorldEnv' # CAUTION: this method ignores the obstacles parameter super().restart() if start_point is None: i = random.choice(len(self.lv_starts)) start_point = self.lv_starts[i] start_point = tuple(start_point) if final_point is None: while True: j = random.choice(len(self.lv_finals)) final_point = self.lv_finals[j] if start_point != final_point: break final_point = tuple(final_point) assert start_point != final_point, 'Invalid start and final point: {} {}'.format( start_point, final_point) if self._empty_canvas is None: super().restart(self.lv_obstacles, start_point, final_point) self._empty_canvas = self._canvas.copy() self._fill_canvas(self._empty_canvas, *self._start_point, v=0) self._fill_canvas(self._empty_canvas, *self._final_point, v=0) else: # do partial reload self._start_point = start_point self._final_point = final_point self._current_point = start_point self._canvas = self._empty_canvas.copy() self._fill_canvas(self._canvas, *self._start_point, v=2) self._fill_canvas(self._canvas, *self._final_point, v=3) self._origin_canvas = self._canvas.copy() self._refresh_view() self._clear_distance_info()
def random_size_crop(img, target_shape, area_range, aspect_ratio=None, contiguous_ar=False, *, nr_trial=10): """random size crop used for Facebook ImageNet data augmentation see https://github.com/facebook/fb.resnet.torch/blob/master/datasets/imagenet.lua """ target_shape = get_2dshape(target_shape) h, w = img.shape[:2] area = h * w area_range = area_range if isinstance( area, collections.Iterable) else (area_range, 1) if aspect_ratio is None: assert contiguous_ar == False aspect_ratio = [h / w] for i in range(nr_trial): target_area = random.uniform(area_range[0], area_range[1]) * area target_ar = random.choice(aspect_ratio) nw = int(round((target_area * target_ar)**0.5)) nh = int(round((target_area / target_ar)**0.5)) if random.rand() < 0.5: nh, nw = nw, nh if nh <= h and nw <= w: sx, sy = random.randint(w - nw + 1), random.randint(h - nh + 1) img = img[sy:sy + nh, sx:sx + nw] return imgproc.resize(img, target_shape) scale = min(*target_shape) / min(h, w) return imgproc.center_crop(imgproc.resize_scale(img, scale), target_shape)
def _sample_number(self, mode): """Sample an integer argument from choices defined by an array.""" if mode == 'test': return self.test_number # review (sample training data) from recently studied lessons. return random.choice(self.sample_array)
def run_episode(env, model, mode, number, play_name='', dump=False, dataset=None, eval_only=False, use_argmax=False, need_restart=False, entropy_beta=0.0): """Run one episode using the model with $number blocks.""" is_over = False traj = collections.defaultdict(list) score = 0 if need_restart: env.restart() optimal = None if args.task == 'path': optimal = env.unwrapped.dist relation = env.unwrapped.graph.get_edges() relation = np.stack([relation, relation.T], axis=-1).astype(dtype=np.float32) st, ed = env.current_state nodes_trajectory = [int(st)] destination = int(ed) policies = [] elif args.task == 'sort': optimal = env.unwrapped.optimal array = [str(i) for i in env.unwrapped.array] # If dump_play=True, store the states and actions in a json file # for visualization. dump_play = args.dump_play and dump if dump_play: nr_objects = number + 1 array = env.unwrapped.current_state moves, new_pos, policies = [], [], [] if args.model == 'dlm': # by default network isn't in training mode during data collection # but with dlm we don't want to use argmax only # except in 2 cases (testing the interpretability or the last mining phase to get an interpretable policy): if ('inter' in mode) or (('mining' in mode) or ('inherit' in mode) and number == args.curriculum_graduate): model.lowernoise() else: model.train(True) if args.dlm_noise == 1 and (('mining' in mode) or ('inherit' in mode) or ('test' in mode)): model.lowernoise() elif args.dlm_noise == 2: model.lowernoise() step = 0 while not is_over: if args.task == 'path': st, ed = env.current_state state = np.zeros((relation.shape[0], 2), dtype=np.float32) state[st, 0] = 1 state[ed, 1] = 1 feed_dict = dict(states=[np.array([state]), np.array([relation])]) else: state = env.current_state if 'nlrl' not in args.task or args.task == 'sort': feed_dict = dict(states=np.array([state])) else: feed_dict = dict(states=state) feed_dict['entropy_beta'] = as_tensor(entropy_beta).float() feed_dict['training'] = as_tensor(False) feed_dict = as_tensor(feed_dict) with torch.set_grad_enabled(False): output_dict = model(feed_dict) policy = output_dict['policy'] p = as_numpy(policy.data[0]) action = p.argmax() if use_argmax else random.choice(len(p), p=p) if args.pred_weight != 0.0: # Need to ensure that the env.utils.MapActionProxy is the outermost class. mapped_x, mapped_y = env.mapping[action] # env.unwrapped to get the innermost Env class. valid = env.unwrapped.world.moveable(mapped_x, mapped_y) reward, is_over = env.action(action) step += 1 if dump_play: moves.append([mapped_x, mapped_y]) res = tuple(env.current_state[mapped_x][2:]) new_pos.append((int(res[0]), int(res[1]))) logits = as_numpy(output_dict['logits'].data[0]) tops = np.argsort(p)[-10:][::-1] tops = list( map(lambda x: (env.mapping[x], float(p[x]), float(logits[x])), tops)) policies.append(tops) # For now, assume reward=1 only when succeed, otherwise reward=0. # Manipulate the reward and get success information according to reward. if reward == 0 and args.penalty is not None: reward = args.penalty succ = 1 if is_over and reward > 0.99 else 0 score += reward if type(feed_dict['states']) is list: traj['states'].append([f for f in feed_dict['states']]) else: traj['states'].append(state) traj['rewards'].append(reward) traj['actions'].append(action) if args.pred_weight != 0.0: if not eval_only and dataset is not None and mapped_x != mapped_y: dataset.append(nr_objects, state, action, valid) # Dump json file as record of the playing. if dump_play and not (args.dump_fail_only and succ): array = array[:, 2:].astype('int32').tolist() array = [array[:nr_objects], array[nr_objects:]] json_str = json.dumps( # Let indent=True for an indented view of json files. dict(array=array, moves=moves, new_pos=new_pos, policies=policies)) dump_file = os.path.join( args.current_dump_dir, '{}_blocks{}.json'.format(play_name, env.unwrapped.nr_blocks)) with open(dump_file, 'w') as f: f.write(json_str) length = step if args.model == 'dlm': model.restorenoise() return succ, score, traj, length, optimal
def run_episode(env, model, number, play_name='', dump=False, dataset=None, eval_only=False, use_argmax=False, need_restart=False, entropy_beta=0.0): """Run one episode using the model with $number blocks.""" is_over = False traj = collections.defaultdict(list) score = 0 if need_restart: env.restart() nr_objects = number + 1 # If dump_play=True, store the states and actions in a json file # for visualization. dump_play = args.dump_play and dump if dump_play: array = env.unwrapped.current_state moves, new_pos, policies = [], [], [] while not is_over: state = env.current_state feed_dict = dict(states=np.array([state])) feed_dict['entropy_beta'] = as_tensor(entropy_beta).float() feed_dict = as_tensor(feed_dict) if args.use_gpu: feed_dict = as_cuda(feed_dict) with torch.set_grad_enabled(not eval_only): output_dict = model(feed_dict) policy = output_dict['policy'] p = as_numpy(policy.data[0]) action = p.argmax() if use_argmax else random.choice(len(p), p=p) # Need to ensure that the env.utils.MapActionProxy is the outermost class. mapped_x, mapped_y = env.mapping[action] # env.unwrapped to get the innermost Env class. valid = env.unwrapped.world.moveable(mapped_x, mapped_y) reward, is_over = env.action(action) if dump_play: moves.append([mapped_x, mapped_y]) res = tuple(env.current_state[mapped_x][2:]) new_pos.append((int(res[0]), int(res[1]))) logits = as_numpy(output_dict['logits'].data[0]) tops = np.argsort(p)[-10:][::-1] tops = list( map(lambda x: (env.mapping[x], float(p[x]), float(logits[x])), tops)) policies.append(tops) # For now, assume reward=1 only when succeed, otherwise reward=0. # Manipulate the reward and get success information according to reward. if reward == 0 and args.penalty is not None: reward = args.penalty succ = 1 if is_over and reward > 0.99 else 0 score += reward traj['states'].append(state) traj['rewards'].append(reward) traj['actions'].append(action) if not eval_only and dataset is not None and mapped_x != mapped_y: dataset.append(nr_objects, state, action, valid) # Dump json file as record of the playing. if dump_play and not (args.dump_fail_only and succ): array = array[:, 2:].astype('int32').tolist() array = [array[:nr_objects], array[nr_objects:]] json_str = json.dumps( # Let indent=True for an indented view of json files. dict(array=array, moves=moves, new_pos=new_pos, policies=policies)) dump_file = os.path.join( args.current_dump_dir, '{}_blocks{}.json'.format(play_name, env.unwrapped.nr_blocks)) with open(dump_file, 'w') as f: f.write(json_str) length = len(traj['rewards']) return succ, score, traj, length
def randomly_generate_family(n, p_marriage=0.8, verbose=False): """Randomly generate family trees. Mimic the process of families growing using a timeline. Each time a new person is created, randomly sample the gender and parents (could be none, indicating not included in the family tree) of the person. Also maintain lists of singles of each gender. With probability $p_marrige, randomly pick two from each list to be married. Finally randomly permute the order of people. Args: n: The number of people in the family tree. p_marriage: The probability of marriage happens each time. verbose: print the marriage and child born process if verbose=True. Returns: A family tree instance of $n people. """ assert n > 0 ids = list(random.permutation(n)) single_m = [] single_w = [] couples = [None] # The relations are: husband, wife, father, mother, son, daughter rel = np.zeros((n, n, 6)) fathers = [None for i in range(n)] mothers = [None for i in range(n)] def add_couple(man, woman): """Add a couple relation among (man, woman).""" couples.append((man, woman)) rel[woman, man, 0] = 1 # husband rel[man, woman, 1] = 1 # wife if verbose: print('couple', man, woman) def add_child(parents, child, gender): """Add a child relation between parents and the child according to gender.""" father, mother = parents fathers[child] = father mothers[child] = mother rel[child, father, 2] = 1 # father rel[child, mother, 3] = 1 # mother if gender == 0: # son rel[father, child, 4] = 1 rel[mother, child, 4] = 1 else: # daughter rel[father, child, 5] = 1 rel[mother, child, 5] = 1 if verbose: print('child', father, mother, child, gender) def check_relations(man, woman): """Disable marriage between cousins.""" if fathers[man] is None or fathers[woman] is None: return True if fathers[man] == fathers[woman]: return False def same_parent(x, y): return fathers[x] is not None and fathers[ y] is not None and fathers[x] == fathers[y] for x in [fathers[man], mothers[man]]: for y in [fathers[woman], mothers[woman]]: if same_parent(man, y) or same_parent(woman, x) or same_parent( x, y): return False return True while ids: x = ids.pop() gender = random.randint(2) parents = random.choice(couples) if gender == 0: single_m.append(x) else: single_w.append(x) if parents is not None: add_child(parents, x, gender) if random.rand() < p_marriage and len(single_m) > 0 and len( single_w) > 0: mi = random.randint(len(single_m)) wi = random.randint(len(single_w)) man = single_m[mi] woman = single_w[wi] if check_relations(man, woman): add_couple(man, woman) del single_m[mi] del single_w[wi] return Family(n, rel)
def run_episode(env, model, number, play_name='', dump=False, eval_only=False, use_argmax=False, need_restart=False, entropy_beta=0.0): """Run one episode using the model with $number nodes/numbers.""" is_over = False traj = collections.defaultdict(list) score = 0 moves = [] # If dump_play=True, store the states and actions in a json file # for visualization. dump_play = args.dump_play and dump if need_restart: env.restart() if args.is_path_task: optimal = env.unwrapped.dist relation = env.unwrapped.graph.get_edges() relation = np.stack([relation, relation.T], axis=-1) st, ed = env.current_state nodes_trajectory = [int(st)] destination = int(ed) policies = [] elif args.is_sort_task: optimal = env.unwrapped.optimal array = [str(i) for i in env.unwrapped.array] while not is_over: if args.is_path_task: st, ed = env.current_state state = np.zeros((relation.shape[0], 2)) state[st, 0] = 1 state[ed, 1] = 1 feed_dict = dict(states=np.array([state]), relations=np.array([relation])) elif args.is_sort_task: state = env.current_state feed_dict = dict(states=np.array([state])) feed_dict['entropy_beta'] = as_tensor(entropy_beta).float() feed_dict = as_tensor(feed_dict) if args.use_gpu: feed_dict = as_cuda(feed_dict) with torch.set_grad_enabled(not eval_only): output_dict = model(feed_dict) policy = output_dict['policy'] p = as_numpy(policy.data[0]) action = p.argmax() if use_argmax else random.choice(len(p), p=p) reward, is_over = env.action(action) # collect moves information if dump_play: if args.is_path_task: moves.append(int(action)) nodes_trajectory.append(int(env.current_state[0])) logits = as_numpy(output_dict['logits'].data[0]) tops = np.argsort(p)[-10:][::-1] tops = list( map(lambda x: (int(x), float(p[x]), float(logits[x])), tops)) policies.append(tops) if args.is_sort_task: # Need to ensure that env.utils.MapActionProxy is the outermost class. mapped_x, mapped_y = env.mapping[action] moves.append([mapped_x, mapped_y]) # For now, assume reward=1 only when succeed, otherwise reward=0. # Manipulate the reward and get success information according to reward. if reward == 0 and args.penalty is not None: reward = args.penalty succ = 1 if is_over and reward > 0.99 else 0 score += reward traj['states'].append(state) if args.is_path_task: traj['relations'].append(relation) traj['rewards'].append(reward) traj['actions'].append(action) # dump json file storing information of playing if dump_play and not (args.dump_fail_only and succ): if args.is_path_task: num = env.unwrapped.nr_nodes graph = relation[:, :, 0].tolist() coordinates = env.unwrapped.graph.get_coordinates().tolist() json_str = json.dumps( dict(graph=graph, coordinates=coordinates, policies=policies, destination=destination, current=nodes_trajectory, moves=moves)) if args.is_sort_task: num = env.unwrapped.nr_numbers json_str = json.dumps(dict(array=array, moves=moves)) dump_file = os.path.join(args.current_dump_dir, '{}_size{}.json'.format(play_name, num)) with open(dump_file, 'w') as f: f.write(json_str) length = len(traj['rewards']) return succ, score, traj, length, optimal