def action(self, state): """ Returns an action for a given state. Public handle to function. """ # save state if self._logging_dir is not None: policy_id = utils.gen_experiment_id() self._policy_dir = os.path.join(self._logging_dir, 'policy_output_%s' % (policy_id)) while os.path.exists(self._policy_dir): policy_id = utils.gen_experiment_id() self._policy_dir = os.path.join(self._logging_dir, 'policy_output_%s' % (policy_id)) os.mkdir(self._policy_dir) state_dir = os.path.join(self._policy_dir, 'state') state.save(state_dir) # plan action action = self._action(state) # save action if self._logging_dir is not None: action_dir = os.path.join(self._policy_dir, 'action') action.save(action_dir) return action
def benchmark_bin_picking_policy(policy, # input_dataset_path, # heap_ids, # timesteps, # output_dataset_path, config, # excluded_heaps_file): ): """ Benchmark a bin picking policy. Parameters ---------- policy : :obj:`Policy` policy to roll out input_dataset_path : str path to the input dataset heap_ids : list integer identifiers for the heaps to re-run timesteps : list integer timesteps to seed the simulation from output_dataset_path : str path to store the results config : dict dictionary-like objects containing parameters of the simulator and visualization """ # read subconfigs vis_config = config['vis'] dataset_config = config['dataset'] # read parameters fully_observed = config['fully_observed'] steps_per_test_case = config['steps_per_test_case'] rollouts_per_garbage_collect = config['rollouts_per_garbage_collect'] debug = config['debug'] im_height = config['state_space']['camera']['im_height'] im_width = config['state_space']['camera']['im_width'] max_obj_per_pile = config['state_space']['object']['max_obj_per_pile'] if debug: random.seed(SEED) np.random.seed(SEED) # read ids # if len(heap_ids) != len(timesteps): # raise ValueError('Must provide same number of heap ids and timesteps') # num_rollouts = len(heap_ids) num_rollouts = 1 # set dataset params tensor_config = dataset_config['tensors'] fields_config = tensor_config['fields'] # fields_config['color_ims']['height'] = im_height # fields_config['color_ims']['width'] = im_width # fields_config['depth_ims']['height'] = im_height # fields_config['depth_ims']['width'] = im_width fields_config['obj_poses']['height'] = POSE_DIM * max_obj_per_pile fields_config['obj_coms']['height'] = POINT_DIM * max_obj_per_pile fields_config['obj_ids']['height'] = max_obj_per_pile fields_config['bin_distances']['height'] = max_obj_per_pile # matrix has (n choose 2) elements in it max_distance_matrix_length = int(comb(max_obj_per_pile, 2)) fields_config['distance_matrix']['height'] = max_distance_matrix_length # sample a process id proc_id = utils.gen_experiment_id() # if not os.path.exists(output_dataset_path): # try: # os.mkdir(output_dataset_path) # except: # logging.warning('Failed to create %s. The dataset path may have been created simultaneously by another process' %(dataset_path)) # proc_id = 'clustering_2' # output_dataset_path = os.path.join(output_dataset_path, 'dataset_%s' %(proc_id)) # open input dataset # logging.info('Opening input dataset: %s' % input_dataset_path) # input_dataset = TensorDataset.open(input_dataset_path) # open output_dataset # logging.info('Opening output dataset: %s' % output_dataset_path) # dataset = TensorDataset(output_dataset_path, tensor_config) # datapoint = dataset.datapoint_template # setup logging # experiment_log_filename = os.path.join(output_dataset_path, 'dataset_generation.log') # formatter = logging.Formatter('%(asctime)s %(levelname)s: %(message)s') # hdlr = logging.FileHandler(experiment_log_filename) # hdlr.setFormatter(formatter) # logging.getLogger().addHandler(hdlr) # config.save(os.path.join(output_dataset_path, 'dataset_generation_params.yaml')) # key mappings # we add the empty string as a mapping because if you don't evaluate dexnet on the 'before' state of the push obj_id = 1 obj_ids = {'': 0} action_ids = { 'ParallelJawGraspAction': 0, 'SuctionGraspAction': 1, 'LinearPushAction': 2 } # add action ids reverse_action_ids = utils.reverse_dictionary(action_ids) # dataset.add_metadata('action_ids', reverse_action_ids) # perform rollouts n = 0 rollout_start = time.time() current_heap_id = None while n < num_rollouts: # create env create_start = time.time() bin_picking_env = GraspingEnv(config, vis_config) create_stop = time.time() logging.info('Creating env took %.3f sec' %(create_stop-create_start)) # perform rollouts rollouts_remaining = num_rollouts - n for i in range(min(rollouts_per_garbage_collect, rollouts_remaining)): # log current rollout logging.info('\n') if n % vis_config['log_rate'] == 0: logging.info('Rollout: %03d' %(n)) try: # mark rollout status data_saved = False num_steps = 0 # read heap id # heap_id = heap_ids[n] # timestep = timesteps[n] # while heap_id == current_heap_id:# or heap_id < 81:#[226, 287, 325, 453, 469, 577, 601, 894, 921]: 26 # n += 1 # heap_id = heap_ids[n] # timestep = timesteps[n] push_logger = logging.getLogger('push') # push_logger.info('~') # push_logger.info('Heap ID %d' % heap_id) # current_heap_id = heap_id # reset env reset_start = time.time() # bin_picking_env.reset_from_dataset(input_dataset, # heap_id, # timestep) bin_picking_env.reset() state = bin_picking_env.state environment = bin_picking_env.environment if fully_observed: observation = None else: observation = bin_picking_env.observation policy.set_environment(environment) reset_stop = time.time() # add objects to mapping for obj_key in state.obj_keys: if obj_key not in obj_ids.keys(): obj_ids[obj_key] = obj_id obj_id += 1 push_logger.info(obj_key) # save id mappings reverse_obj_ids = utils.reverse_dictionary(obj_ids) # dataset.add_metadata('obj_ids', reverse_obj_ids) # store datapoint env params # datapoint['heap_ids'] = current_heap_id # datapoint['camera_poses'] = environment.camera.T_camera_world.vec # datapoint['camera_intrs'] = environment.camera.intrinsics.vec # datapoint['robot_poses'] = environment.robot.T_robot_world.vec # render if vis_config['initial_state']: vis3d.figure() bin_picking_env.render_3d_scene() vis3d.pose(environment.robot.T_robot_world) vis3d.show(starting_camera_pose=CAMERA_POSE) # observe if vis_config['initial_obs']: vis2d.figure() vis2d.imshow(observation, auto_subplot=True) vis2d.show() # rollout on current satte done = False failed = False # if isinstance(policy, SingulationFullRolloutPolicy): # policy.reset_num_failed_grasps() while not done: if vis_config['step_stats']: logging.info('Heap ID: %s' % heap_id) logging.info('Timestep: %s' % bin_picking_env.timestep) # get action policy_start = time.time() if fully_observed: action = policy.action(state) else: action = policy.action(observation) policy_stop = time.time() logging.info('Composite Policy took %.3f sec' %(policy_stop-policy_start)) # render scene before if vis_config['action']: #gripper = bin_picking_env.gripper(action) vis3d.figure() # GRASPINGENV # bin_picking_env.render_3d_scene(render_camera=False, workspace_objs_wireframe=False) bin_picking_env.render_3d_scene() if isinstance(action, GraspAction): vis3d.gripper(gripper, action.grasp(gripper)) #if isinstance(action, LinearPushAction): else: # # T_start_world = action.T_begin_world * gripper.T_mesh_grasp # # T_end_world = action.T_end_world * gripper.T_mesh_grasp # #start_point = action.T_begin_world.translation # start_point = action['start'] # #end_point = action.T_end_world.translation # end_point = action['end'] # vec = (end_point - start_point) / np.linalg.norm(end_point-start_point) if np.linalg.norm(end_point-start_point) > 0 else end_point-start_point # #h1 = np.array([[0.7071,-0.7071,0],[0.7071,0.7071,0],[0,0,1]]).dot(vec) # #h2 = np.array([[0.7071,0.7071,0],[-0.7071,0.7071,0],[0,0,1]]).dot(vec) # arrow_len = np.linalg.norm(start_point - end_point) # h1 = (end_point - start_point + np.array([0,0,arrow_len])) / (arrow_len*math.sqrt(2)) # h2 = (end_point - start_point - np.array([0,0,arrow_len])) / (arrow_len*math.sqrt(2)) # shaft_points = [start_point, end_point] # head_points = [end_point - 0.03*h2, end_point, end_point - 0.03*h1] # #vis3d.plot3d(shaft_points, color=[0,0,1]) # #vis3d.plot3d(head_points, color=[0,0,1]) # Displaying all potential topple points for vertex, prob in zip(action['vertices'], action['probabilities']): color = np.array([min(1, 2*(1-prob)), min(2*prob, 1), 0]) vis3d.points(Point(vertex, 'world'), scale=.0005, color=color) for vertex in action['bottom_points']: color = np.array([0,0,1]) vis3d.points(Point(vertex, 'world'), scale=.0005, color=color) vis3d.points(Point(action['com'], 'world'), scale=.005, color=np.array([0,0,1])) vis3d.points(Point(np.array([0,0,0]), 'world'), scale=.005, color=np.array([0,1,0])) #set_of_lines = action['set_of_lines'] #for i, line in enumerate(set_of_lines): # color = str(bin(i+1))[2:].zfill(3) # color = np.array([color[2], color[1], color[0]]) # vis3d.plot3d(line, color=color) vis3d.show(starting_camera_pose=CAMERA_POSE) # Show vis3d.figure() bin_picking_env.render_3d_scene() final_pose_ind = action['final_pose_ind'] / np.amax(action['final_pose_ind']) for vertex, final_pose_ind in zip(action['vertices'], final_pose_ind): color = np.array([0, min(1, 2*(1-prob)), min(2*prob, 1)]) vis3d.points(Point(vertex, 'world'), scale=.0005, color=color) vis3d.show(starting_camera_pose=CAMERA_POSE) color=np.array([0,0,1]) original_pose = state.obj.T_obj_world pose_num = 0 for pose, edge_point1, edge_point2 in zip(action['final_poses'], action['bottom_points'], np.roll(action['bottom_points'],-1,axis=0)): print 'Pose:', pose_num pose_num += 1 pose = pose.T_obj_table vis3d.figure() state.obj.T_obj_world = original_pose bin_picking_env.render_3d_scene() vis3d.points(Point(edge_point1, 'world'), scale=.0005, color=color) vis3d.points(Point(edge_point2, 'world'), scale=.0005, color=color) vis3d.show(starting_camera_pose=CAMERA_POSE) vis3d.figure() state.obj.T_obj_world = pose bin_picking_env.render_3d_scene() vis3d.points(Point(edge_point1, 'world'), scale=.0005, color=color) vis3d.points(Point(edge_point2, 'world'), scale=.0005, color=color) vis3d.show(starting_camera_pose=CAMERA_POSE) #vis3d.save('/home/mjd3/Pictures/weird_pics/%d_%d_before.png' % (heap_id, bin_picking_env.timestep), starting_camera_pose=CAMERA_POSE) # store datapoint pre-step data j = 0 obj_poses = np.zeros(fields_config['obj_poses']['height']) obj_coms = np.zeros(fields_config['obj_coms']['height']) obj_ids_vec = np.iinfo(np.uint32).max * np.ones(fields_config['obj_ids']['height']) for obj_state in state.obj_states: obj_poses[j*POSE_DIM:(j+1)*POSE_DIM] = obj_state.T_obj_world.vec obj_coms[j*POINT_DIM:(j+1)*POINT_DIM] = obj_state.center_of_mass obj_ids_vec[j] = obj_ids[obj_state.key] j += 1 action_poses = np.zeros(fields_config['action_poses']['height']) #if isinstance(action, GraspAction): # action_poses[:7] = action.T_grasp_world.vec #else: # action_poses[:7] = action.T_begin_world.vec # action_poses[7:] = action.T_end_world.vec # if isinstance(policy, SingulationMetricsCompositePolicy): # actual_distance_matrix_length = int(comb(len(state.objs), 2)) # bin_distances = np.append(action.metadata['bin_distances'], # np.zeros(max_obj_per_pile-len(state.objs)) # ) # distance_matrix = np.append(action.metadata['distance_matrix'], # np.zeros(max_distance_matrix_length - actual_distance_matrix_length) # ) # datapoint['bin_distances'] = bin_distances # datapoint['distance_matrix'] = distance_matrix # datapoint['T_begin_world'] = action.T_begin_world.matrix # datapoint['T_end_world'] = action.T_end_world.matrix # datapoint['parallel_jaw_best_q_value'] = action.metadata['parallel_jaw_best_q_value'] # # datapoint['parallel_jaw_mean_q_value'] = action.metadata['parallel_jaw_mean_q_value'] # # datapoint['parallel_jaw_num_grasps'] = action.metadata['parallel_jaw_num_grasps'] # datapoint['suction_best_q_value'] = action.metadata['suction_best_q_value'] # # datapoint['suction_mean_q_value'] = action.metadata['suction_mean_q_value'] # # datapoint['suction_num_grasps'] = action.metadata['suction_num_grasps'] # # logging.info('Suction Q: %f, PJ Q: %f' % (action.metadata['suction_q_value'], action.metadata['parallel_jaw_q_value'])) # # datapoint['obj_index'] = action.metadata['obj_index'] # # datapoint['parallel_jaw_best_q_value_single'] = action.metadata['parallel_jaw_best_q_value_single'] # # datapoint['suction_best_q_value_single'] = action.metadata['suction_best_q_value_single'] # datapoint['singulated_obj_index'] = action.metadata['singulated_obj_index'] # datapoint['parallel_jaw_grasped_obj_index'] = obj_ids[action.metadata['parallel_jaw_grasped_obj_key']] # datapoint['suction_grasped_obj_index'] = obj_ids[action.metadata['suction_grasped_obj_key']] # else: # datapoint['bin_distances'] = np.zeros(max_obj_per_pile) # datapoint['distance_matrix'] = np.zeros(max_distance_matrix_length) # datapoint['T_begin_world'] = np.zeros((4,4)) # datapoint['T_end_world'] = np.zeros((4,4)) # datapoint['parallel_jaw_best_q_value'] = -1 # datapoint['suction_best_q_value'] = -1 # datapoint['singulated_obj_index'] = -1 # datapoint['parallel_jaw_grasped_obj_index'] = -1 # datapoint['suction_grasped_obj_index'] = -1 # policy_id = 0 # if 'policy_id' in action.metadata.keys(): # policy_id = action.metadata['policy_id'] # greedy_q_value = 0 # if 'greedy_q_value' in action.metadata.keys(): # greedy_q_value = action.metadata['greedy_q_value'] # datapoint['timesteps'] = bin_picking_env.timestep # datapoint['obj_poses'] = obj_poses # datapoint['obj_coms'] = obj_coms # datapoint['obj_ids'] = obj_ids_vec # # if bin_picking_env.render_mode == RenderMode.RGBD: # # color_data = observation.color.raw_data # # depth_data = observation.depth.raw_data # # elif bin_picking_env.render_mode == RenderMode.DEPTH: # # color_data = np.zeros(observation.shape).astype(np.uint8) # # depth_data = observation.raw_data # # elif bin_picking_env.render_mode == RenderMode.COLOR: # # color_data = observation.raw_data # # depth_data = np.zeros(observation.shape) # # datapoint['color_ims'] = color_data # # datapoint['depth_ims'] = depth_data # datapoint['action_ids'] = action_ids[type(action).__name__] # datapoint['action_poses'] = action_poses # datapoint['policy_ids'] = policy_id # datapoint['greedy_q_values'] = greedy_q_value # datapoint['pred_q_values'] = action.q_value # step the policy #observation, reward, done, info = bin_picking_env.step(action) #state = bin_picking_env.state state.objs[0].T_obj_world = action['final_state'] # if isinstance(policy, SingulationFullRolloutPolicy): # policy.grasp_succeeds(info['grasp_succeeds']) # debugging info if vis_config['step_stats']: logging.info('Action type: %s' %(type(action).__name__)) logging.info('Action Q-value: %.3f' %(action.q_value)) logging.info('Reward: %d' %(reward)) logging.info('Policy took %.3f sec' %(policy_stop-policy_start)) logging.info('Num objects remaining: %d' %(bin_picking_env.num_objects)) if info['cleared_pile']: logging.info('Cleared pile!') # # store datapoint post-step data # datapoint['rewards'] = reward # datapoint['grasp_metrics'] = info['grasp_metric'] # datapoint['collisions'] = 1 * info['collides'] # datapoint['collisions_with_env'] = 1 * info['collides_with_static_obstacles'] # datapoint['grasped_obj_ids'] = obj_ids[info['grasped_obj_key']] # datapoint['cleared_pile'] = 1 * info['cleared_pile'] # # store datapoint # # dataset.add(datapoint) # data_saved = True # render observation if vis_config['obs']: vis2d.figure() vis2d.imshow(observation, auto_subplot=True) vis2d.show() # render scene after if vis_config['state']: vis3d.figure() bin_picking_env.render_3d_scene(render_camera=False) vis3d.show(starting_camera_pose=CAMERA_POSE) # vis3d.save('/home/mjd3/Pictures/weird_pics/%d_%d_after.png' % (heap_id, bin_picking_env.timestep), starting_camera_pose=CAMERA_POSE) state.objs[0].T_obj_world = action['tmpR'] vis3d.figure() bin_picking_env.render_3d_scene(render_camera=False) vis3d.show(starting_camera_pose=CAMERA_POSE) state.objs[0].T_obj_world = action['final_state'] # increment the number of steps num_steps += 1 if num_steps >= steps_per_test_case: done = True except NoActionFoundException as e: logging.warning('The policy failed to plan an action!') done = True except Exception as e: # log an error logging.warning('Rollout failed!') logging.warning('%s' %(str(e))) logging.warning(traceback.print_exc()) # if debug: # raise # reset env del bin_picking_env gc.collect() bin_picking_env = BinPickingEnv(config, vis_config) # terminate current rollout failed = True done = True # update test case id n += 1 # dataset.flush() # logging.info("\n\nflushing") # logging.info("exiting") # sys.exit() # garbage collect del bin_picking_env gc.collect() # return the dataset # dataset.flush() # log time rollout_stop = time.time() logging.info('Rollouts took %.3f sec' %(rollout_stop-rollout_start)) return dataset
def test_single_read_write(self): # seed np.random.seed(SEED) random.seed(SEED) # open dataset create_successful = True try: dataset = TensorDataset(TEST_TENSOR_DATASET_NAME, TENSOR_CONFIG) except: create_successful = False self.assertTrue(create_successful) # check field names write_datapoint = dataset.datapoint_template for field_name in write_datapoint.keys(): self.assertTrue(field_name in dataset.field_names) # add the datapoint write_datapoint['float_value'] = np.random.rand() write_datapoint['int_value'] = int(100 * np.random.rand()) write_datapoint['str_value'] = utils.gen_experiment_id() write_datapoint['vector_value'] = np.random.rand(HEIGHT) write_datapoint['matrix_value'] = np.random.rand(HEIGHT, WIDTH) write_datapoint['image_value'] = np.random.rand( HEIGHT, WIDTH, CHANNELS) dataset.add(write_datapoint) # check num datapoints self.assertTrue(dataset.num_datapoints == 1) # add metadata metadata_num = np.random.rand() dataset.add_metadata('test', metadata_num) # check written arrays dataset.flush() for field_name in dataset.field_names: filename = os.path.join(TEST_TENSOR_DATASET_NAME, 'tensors', '%s_00000.npz' % (field_name)) value = np.load(filename)['arr_0'] if isinstance(value[0], str): self.assertTrue(value[0] == write_datapoint[field_name]) else: self.assertTrue( np.allclose(value[0], write_datapoint[field_name])) # re-open the dataset del dataset dataset = TensorDataset.open(TEST_TENSOR_DATASET_NAME) # read metadata self.assertTrue(np.allclose(dataset.metadata['test'], metadata_num)) # read datapoint read_datapoint = dataset.datapoint(0) for field_name in dataset.field_names: if isinstance(read_datapoint[field_name], str): self.assertTrue( read_datapoint[field_name] == write_datapoint[field_name]) else: self.assertTrue( np.allclose(read_datapoint[field_name], write_datapoint[field_name])) # check iterator for read_datapoint in dataset: for field_name in dataset.field_names: if isinstance(read_datapoint[field_name], str): self.assertTrue(read_datapoint[field_name] == write_datapoint[field_name]) else: self.assertTrue( np.allclose(read_datapoint[field_name], write_datapoint[field_name])) # read individual fields for field_name in dataset.field_names: read_datapoint = dataset.datapoint(0, field_names=[field_name]) if isinstance(read_datapoint[field_name], str): self.assertTrue( read_datapoint[field_name] == write_datapoint[field_name]) else: self.assertTrue( np.allclose(read_datapoint[field_name], write_datapoint[field_name])) # re-open the dataset in write-only del dataset dataset = TensorDataset.open(TEST_TENSOR_DATASET_NAME, access_mode=READ_WRITE_ACCESS) # delete datapoint dataset.delete_last() # check that the dataset is correct self.assertTrue(dataset.num_datapoints == 0) self.assertTrue(dataset.num_tensors == 0) for field_name in dataset.field_names: filename = os.path.join(TEST_TENSOR_DATASET_NAME, 'tensors', '%s_00000.npz' % (field_name)) self.assertFalse(os.path.exists(filename)) # remove dataset if os.path.exists(TEST_TENSOR_DATASET_NAME): shutil.rmtree(TEST_TENSOR_DATASET_NAME)
def finetune_classification_cnn(config): """ Main function. """ # read params dataset = config['dataset'] x_names = config['x_names'] y_name = config['y_name'] model_dir = config['model_dir'] debug = config['debug'] num_classes = None if 'num_classes' in config.keys(): num_classes = config['num_classes'] batch_size = config['training']['batch_size'] train_pct = config['training']['train_pct'] model_save_period = config['training']['model_save_period'] data_aug_config = config['data_augmentation'] preproc_config = config['preprocessing'] iterator_config = config['data_iteration'] model_config = config['model'] base_model_config = model_config['base'] optimization_config = config['optimization'] train_config = config['training'] generator_image_shape = None if 'image_shape' in data_aug_config.keys(): generator_image_shape = data_aug_config['image_shape'] optimizer_name = optimization_config['optimizer'] model_params = {} if 'params' in model_config.keys(): model_params = model_config['params'] base_model_params = {} if 'params' in base_model_config.keys(): base_model_params = base_model_config['params'] if debug: seed = 108 random.seed(seed) np.random.seed(seed) # generate model dir if not os.path.exists(model_dir): os.mkdir(model_dir) model_id = utils.gen_experiment_id() model_dir = os.path.join(model_dir, 'model_%s' % (model_id)) if not os.path.exists(model_dir): os.mkdir(model_dir) logging.info('Saving model to %s' % (model_dir)) latest_model_filename = os.path.join(model_dir, 'weights_{epoch:05d}.h5') best_model_filename = os.path.join(model_dir, 'weights.h5') # save config training_config_filename = os.path.join(model_dir, 'training_config.yaml') config.save(training_config_filename) # open dataset dataset = TensorDataset.open(dataset) # split dataset indices_filename = os.path.join(model_dir, 'splits.npz') if os.path.exists(indices_filename): indices = np.load(indices_filename)['arr_0'].tolist() train_indices = indices['train'] val_indices = indices['val'] else: train_indices, val_indices = dataset.split(train_pct) indices = np.array({'train': train_indices, 'val': val_indices}) np.savez_compressed(indices_filename, indices) num_train = train_indices.shape[0] num_val = val_indices.shape[0] val_steps = int(np.ceil(float(num_val) / batch_size)) # init generator train_generator_filename = os.path.join(model_dir, 'train_preprocessor.pkl') val_generator_filename = os.path.join(model_dir, 'val_preprocessor.pkl') if os.path.exists(train_generator_filename): logging.info('Loading generators') train_generator = pkl.load(open(train_generator_filename, 'rb')) val_generator = pkl.load(open(val_generator_filename, 'rb')) else: logging.info('Fitting generator') train_generator = TensorDataGenerator(num_classes=num_classes, **data_aug_config) val_generator = TensorDataGenerator( featurewise_center=data_aug_config['featurewise_center'], featurewise_std_normalization=data_aug_config[ 'featurewise_std_normalization'], image_shape=generator_image_shape, num_classes=num_classes) fit_start = time.time() train_generator.fit(dataset, x_names, y_name, indices=train_indices, **preproc_config) val_generator.mean = train_generator.mean val_generator.std = train_generator.std val_generator.min_output = train_generator.min_output val_generator.max_output = train_generator.max_output val_generator.num_classes = train_generator.num_classes fit_stop = time.time() logging.info('Generator fit took %.3f sec' % (fit_stop - fit_start)) pkl.dump(train_generator, open(train_generator_filename, 'wb')) pkl.dump(val_generator, open(val_generator_filename, 'wb')) if num_classes is None: num_classes = int(train_generator.num_classes) # init iterator train_iterator = train_generator.flow_from_dataset(dataset, x_names, y_name, indices=train_indices, batch_size=batch_size, **iterator_config) val_iterator = val_generator.flow_from_dataset(dataset, x_names, y_name, indices=val_indices, batch_size=batch_size, **iterator_config) # setup model base_cnn = ClassificationCNN.open(base_model_config['model'], base_model_config['type'], input_name=x_names[0], **base_model_params) cnn = FinetunedClassificationCNN(base_cnn=base_cnn, name='dexresnet', num_classes=num_classes, output_name=y_name, im_preprocessor=val_generator, **model_params) # setup training cnn.freeze_base_cnn() if optimizer_name == 'sgd': optimizer = SGD(lr=optimization_config['lr'], momentum=optimization_config['momentum']) elif optimizer_name == 'adam': optimizer = Adam(lr=optimization_config['lr']) else: raise ValueError('Optimizer %s not supported!' % (optimizer_name)) model = cnn.model model.compile(optimizer=optimizer, loss=optimization_config['loss'], metrics=optimization_config['metrics']) # train steps_per_epoch = int(np.ceil(float(num_train) / batch_size)) latest_model_ckpt = ModelCheckpoint(latest_model_filename, period=model_save_period) best_model_ckpt = ModelCheckpoint(best_model_filename, save_best_only=True, period=model_save_period) train_history_cb = TrainHistory(model_dir) callbacks = [latest_model_ckpt, best_model_ckpt, train_history_cb] history = model.fit_generator( train_iterator, steps_per_epoch=steps_per_epoch, epochs=train_config['epochs'], callbacks=callbacks, validation_data=val_iterator, validation_steps=val_steps, class_weight=train_config['class_weight'], use_multiprocessing=train_config['use_multiprocessing']) # save model cnn.save(model_dir) # save history history_filename = os.path.join(model_dir, 'history.pkl') pkl.dump(history.history, open(history_filename, 'wb'))
def test_multi_tensor_read_write(self): # seed np.random.seed(SEED) random.seed(SEED) # open dataset dataset = TensorDataset(TEST_TENSOR_DATASET_NAME, TENSOR_CONFIG) write_datapoints = [] for i in range(DATAPOINTS_PER_FILE + 1): write_datapoint = {} write_datapoint['float_value'] = np.random.rand() write_datapoint['int_value'] = int(100 * np.random.rand()) write_datapoint['str_value'] = utils.gen_experiment_id() write_datapoint['vector_value'] = np.random.rand(HEIGHT) write_datapoint['matrix_value'] = np.random.rand(HEIGHT, WIDTH) write_datapoint['image_value'] = np.random.rand( HEIGHT, WIDTH, CHANNELS) dataset.add(write_datapoint) write_datapoints.append(write_datapoint) # check num datapoints self.assertTrue(dataset.num_datapoints == DATAPOINTS_PER_FILE + 1) self.assertTrue(dataset.num_tensors == 2) # check read dataset.flush() del dataset dataset = TensorDataset.open(TEST_TENSOR_DATASET_NAME, access_mode=READ_WRITE_ACCESS) for i, read_datapoint in enumerate(dataset): write_datapoint = write_datapoints[i] for field_name in dataset.field_names: if isinstance(read_datapoint[field_name], str): self.assertTrue(read_datapoint[field_name] == write_datapoint[field_name]) else: self.assertTrue( np.allclose(read_datapoint[field_name], write_datapoint[field_name])) for i, read_datapoint in enumerate(dataset): # check iterator item write_datapoint = write_datapoints[i] for field_name in dataset.field_names: if isinstance(read_datapoint[field_name], str): self.assertTrue(read_datapoint[field_name] == write_datapoint[field_name]) else: self.assertTrue( np.allclose(read_datapoint[field_name], write_datapoint[field_name])) # check random item ind = np.random.choice(dataset.num_datapoints) write_datapoint = write_datapoints[ind] read_datapoint = dataset.datapoint(ind) for field_name in dataset.field_names: if isinstance(read_datapoint[field_name], str): self.assertTrue(read_datapoint[field_name] == write_datapoint[field_name]) else: self.assertTrue( np.allclose(read_datapoint[field_name], write_datapoint[field_name])) # check deletion dataset.delete_last() self.assertTrue(dataset.num_datapoints == DATAPOINTS_PER_FILE) self.assertTrue(dataset.num_tensors == 1) for field_name in dataset.field_names: filename = os.path.join(TEST_TENSOR_DATASET_NAME, 'tensors', '%s_00001.npz' % (field_name)) dataset.add(write_datapoints[-1]) for write_datapoint in write_datapoints: dataset.add(write_datapoint) self.assertTrue(dataset.num_datapoints == 2 * (DATAPOINTS_PER_FILE + 1)) self.assertTrue(dataset.num_tensors == 3) # check valid for i in range(dataset.num_datapoints): read_datapoint = dataset.datapoint(i) write_datapoint = write_datapoints[i % (len(write_datapoints))] for field_name in dataset.field_names: if isinstance(read_datapoint[field_name], str): self.assertTrue(read_datapoint[field_name] == write_datapoint[field_name]) else: self.assertTrue( np.allclose(read_datapoint[field_name], write_datapoint[field_name])) # check read then write out of order ind = np.random.choice(DATAPOINTS_PER_FILE) write_datapoint = write_datapoints[ind] read_datapoint = dataset.datapoint(ind) for field_name in dataset.field_names: if isinstance(read_datapoint[field_name], str): self.assertTrue( read_datapoint[field_name] == write_datapoint[field_name]) else: self.assertTrue( np.allclose(read_datapoint[field_name], write_datapoint[field_name])) write_datapoint = write_datapoints[0] dataset.add(write_datapoint) read_datapoint = dataset.datapoint(dataset.num_datapoints - 1) for field_name in dataset.field_names: if isinstance(read_datapoint[field_name], str): self.assertTrue( read_datapoint[field_name] == write_datapoint[field_name]) else: self.assertTrue( np.allclose(read_datapoint[field_name], write_datapoint[field_name])) dataset.delete_last() # check data integrity for i, read_datapoint in enumerate(dataset): write_datapoint = write_datapoints[i % len(write_datapoints)] for field_name in dataset.field_names: if isinstance(read_datapoint[field_name], str): self.assertTrue(read_datapoint[field_name] == write_datapoint[field_name]) else: self.assertTrue( np.allclose(read_datapoint[field_name], write_datapoint[field_name])) # delete last dataset.delete_last(len(write_datapoints)) self.assertTrue(dataset.num_datapoints == DATAPOINTS_PER_FILE + 1) self.assertTrue(dataset.num_tensors == 2) for i, read_datapoint in enumerate(dataset): write_datapoint = write_datapoints[i] for field_name in dataset.field_names: if isinstance(read_datapoint[field_name], str): self.assertTrue(read_datapoint[field_name] == write_datapoint[field_name]) else: self.assertTrue( np.allclose(read_datapoint[field_name], write_datapoint[field_name])) # remove dataset if os.path.exists(TEST_TENSOR_DATASET_NAME): shutil.rmtree(TEST_TENSOR_DATASET_NAME)