def process_segment(self, segment_id): # TODO: add docstring self._log.info( 'Generation of body-face images in segment {}'.format(segment_id)) # sanity check for input_dir, detection_dir check_directory(self._input_dir, 'Input') check_directory(self._detection_dir, 'Detection') for faces in self._load_faces_from_segment(segment_id): # get original image name image_basename = faces[0].image_name # deal with inner folders inside segments image_path_in_unrolled = [self._input_dir, segment_id] image_path_in_unrolled.extend(self.segment_input_extra) image_path_in_unrolled.append(image_basename) image_path_in = os.path.join(*image_path_in_unrolled) image = self._load_input(image_path_in) body_list, face_list = self._process_batch(image, faces) # generate body and face output directories for a given segment for im_type, images in (('body', body_list), ('face', face_list)): segm_output_dir = os.path.join(self._output_dir, im_type, segment_id) create_directory(segm_output_dir, 'Segment') self._store(images, segm_output_dir, image_basename)
def process_segment(self, segment_id): # TODO: add docstring self._log.info( 'Synchronizing groups for segment {}'.format(segment_id)) # check input directory segm_input_dir = os.path.join(self._input_dir, segment_id) check_directory(segm_input_dir, 'Input Segment') # check groups directory segm_groups_dir = os.path.join(self._groups_dir, segment_id) check_directory(segm_groups_dir, 'Groups Segment') # load groups and create mapping image_name -> group groups_inverse_map = self._load_groups_inverse_map(segm_groups_dir) for frame_path in self._segment_sequence(segm_input_dir): image_name = os.path.basename(frame_path) group_id = self._get_group(image_name, groups_inverse_map) # create cluster directory if necessary (also create segment # directory the first time) cluster_output_dir = os.path.join(self._output_dir, segment_id, group_id) create_directory(cluster_output_dir, 'Cluster') # copy frame from original directory to cluster directory output_path = self._get_output_path(output_dir=cluster_output_dir, image_name=image_name) self._store(frame_path, output_path)
def init_callbacks(output_mode, plot_stats=True, save_model=False, save_stats=False, stop_early=False, plot_step=1, reduce_lr=False, figsize=None): callbacks = [] training_dir = os.path.join(egosocial.config.TMP_DIR, 'training') create_directory(training_dir, 'Training') if save_model: checkpoint_path = os.path.join( training_dir, 'weights.{epoch:02d}-{val_loss:.2f}.h5') checkpointer = ModelCheckpoint( filepath=checkpoint_path, monitor='val_loss', save_best_only=True, period=5, ) callbacks.append(checkpointer) if save_stats: metrics_path = os.path.join(training_dir, 'metrics.csv') csv_logger = CSVLogger(metrics_path) callbacks.append(csv_logger) if reduce_lr: lr_handler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=0.00001) callbacks.append(lr_handler) if plot_stats: # more plots need more space if not figsize: if output_mode != 'both_splitted': figsize = (25, 5) else: figsize = (25, 13) # plot_metrics = PlotLearning(update_step=plot_step, figsize=figsize) # callbacks.append(plot_metrics) if stop_early: stopper = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, mode='auto') callbacks.append(stopper) return callbacks
def process_segment(self, segment_id): # TODO: add docstring self._log.info( 'Synchronizing groups for segment {}'.format(segment_id)) # check input directory segm_input_dir = os.path.join(self._input_dir, segment_id) check_directory(segm_input_dir, 'Input Segment') # create directory segm_output_dir = os.path.join(self._output_dir, segment_id) create_directory(segm_output_dir, 'Output Segment', warn_if_exists=True) face_clustering = self._get_face_clustering(segm_input_dir) output_path = self._get_output_path(output_dir=segm_output_dir, file_name=self._groups_file_name) self._store(face_clustering, output_path)
def process_segment(self, segment_id): # TODO: add docstring self._log.info('Face detection for segment {}'.format(segment_id)) # sanity check for input_dir check_directory(self._input_dir, 'Input') # create directory segm_output_dir = os.path.join(self._output_dir, segment_id) create_directory(segm_output_dir, 'Output Segment', warn_if_exists=True) for image_path in self._get_images(segment_id): detected_faces = self.process_image(image_path) output_path = self._get_output_path(output_dir=segm_output_dir, input_path=image_path, ext='.json') # save detection self._store(detected_faces, output_path)
def process_segment(self, segment_id): # TODO: add docstring self._log.info('Create face pairs for segment {}'.format(segment_id)) # check input directory segm_input_dir = os.path.join(self._input_dir, segment_id) check_directory(segm_input_dir, 'Input Segment') # create output directory segm_output_dir = os.path.join(self._output_dir, segment_id) create_directory(segm_output_dir, 'Output Segment', warn_if_exists=True) iface_pairs = self.get_face_pairs(segment_id) # copy frame from original directory to cluster directory output_path = self._get_output_path(output_dir=segm_output_dir, file_name=self._pairs_file_name) self._store(iface_pairs, output_path)
def process_segment(self, segment_id): """ Group faces in social segments. Args: :param segment_id: segment id. """ self._log.info('Face clustering for segment {}'.format(segment_id)) # sanity check for input_dir check_directory(self._input_dir, 'Input') # create directory segm_output_dir = os.path.join(self._output_dir, segment_id) create_directory(segm_output_dir, 'Output Segment', warn_if_exists=True) # load detected faces and create face groups ifaces = self._load_ifaces_from_segment(segment_id) clusters = self._face_clustering(ifaces) # save results output_path = self._get_output_path(output_dir=segm_output_dir, file_name=self._groups_file_name) self._store(clusters, output_path)
def run(conf): # # Loading precomputed features and labels helper = SocialClassifierWithPreComputedFeatures( conf.dataset_path, conf.features_dir, test_size=0.2, k_fold_splits=3, val_size=0.2, # relative to training size seed=SHARED_SEED) helper.load_data() # # Prepare splits train_val_splits = [(helper.get_split_idx('train', k_fold=k), helper.get_split_idx('val', k_fold=k)) for k in range(helper.k_fold_splits)] # # Parameters n_components, Q = conf.pca_components, conf.Q n_features = n_components * 9 + 6 + 2 + 1 max_timestep = helper.max_sequence_len() helper._log.info( 'Number of pca components per attribute (for visual embeddings): {}'. format(n_components)) helper._log.info('Q: {}'.format(Q)) helper._log.info('Length of the largest sequence: {}'.format(max_timestep)) helper._log.info('Total number of features: {}'.format(n_features)) #'both_splitted' # multi-loss domain-relation #'domain' # domain only #'relation' # relation only output_mode = conf.output_mode helper._log.info('Output mode: {}'.format(output_mode)) model_strategy = conf.model_strategy helper._log.info('Model strategy: {}'.format(model_strategy)) # # Grid search CV reduce_dim = Preprocessing(features_range=helper.features_range, create_transformation_cbk=TransformationFactory( n_components=n_components, Q=Q, seed=SHARED_SEED)) generator_builder = TimeSeriesDataGeneratorBuilder( maxlen=max_timestep, output_cbk=LabelExpander(mode=output_mode), seed=SHARED_SEED, ) if output_mode == 'domain': single_output = 'domain' else: single_output = 'relation' metric_suffix = 'fmeasure' # used only if GridSearchCV scoring attribute is set to None if output_mode in ('domain', 'relation'): metric_score = metric_suffix else: metric_score = single_output + '_' + metric_suffix clf = KerasGeneratorClassifier( build_fn=build_model, build_generator=generator_builder, output_mode=output_mode, metric_score=metric_score, single_output=single_output, balanced=True, # compute class_weights internally max_seq_len=max_timestep, feature_vector_size=n_features, recurrent_type='LSTM', hidden_fc=1, mode=output_mode, model_strategy=model_strategy, metrics=[fmeasure], verbose=1, workers=2, ) pipeline = Pipeline([('reduce_dim', reduce_dim), ('clf', clf)]) scoring = [ 'accuracy', 'recall_weighted', 'precision_weighted', 'f1_weighted', 'recall_macro', 'precision_macro', 'f1_macro' ] # multi-output modes if output_mode == 'both_splitted': dom_scoring = [ dom_accuracy, dom_recall_weighted, dom_precision_weighted, dom_f1_weighted, dom_recall_macro, dom_precision_macro, dom_f1_macro ] scoring.extend(dom_scoring) common_search_params = dict( estimator=pipeline, cv=train_val_splits, scoring=scoring, refit=False, return_train_score=True, iid=False, verbose=4, n_jobs=1, ) param_grid = dict( clf__drop_rate=conf.drop_rate, clf__learning_rate=conf.lr, clf__epochs=conf.epochs, clf__units=conf.units, clf__l2_reg=conf.l2_reg, clf__batch_size=conf.batch_size, clf__decay=conf.lr_decay, ) do_search = 'grid' search_cv = GridSearchCV( param_grid=param_grid, **common_search_params, ) callbacks = [] if conf.schedule_lr: learning_rate = conf.lr[0] lr_scheduler = LearningRateScheduler( StepDecay(initial_lr=learning_rate, drop_rate=0.5, epochs_drop=10)) callbacks.append(lr_scheduler) X = helper.features if output_mode == 'domain': # domain specific-labels y = relation_to_domain_vec(helper._labels) else: y = helper._labels fit_params = dict( clf__verbose=1, clf__callbacks=callbacks, ) search_result = search_cv.fit(X, y, **fit_params) training_dir = os.path.join(egosocial.config.TMP_DIR, 'training') create_directory(training_dir, 'Training') date_str = datetime.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S') file_name = '{}_{}_results_{}_{}.pkl'.format(date_str, do_search, output_mode, model_strategy) results_path = os.path.join(training_dir, file_name) with open(results_path, 'wb') as file: pickle.dump(search_result.cv_results_, file, protocol=pickle.HIGHEST_PROTOCOL)