Esempio n. 1
0
    def process_segment(self, segment_id):
        # TODO: add docstring
        self._log.info(
            'Generation of body-face images in segment {}'.format(segment_id))
        # sanity check for input_dir, detection_dir
        check_directory(self._input_dir, 'Input')
        check_directory(self._detection_dir, 'Detection')

        for faces in self._load_faces_from_segment(segment_id):
            # get original image name
            image_basename = faces[0].image_name

            # deal with inner folders inside segments
            image_path_in_unrolled = [self._input_dir, segment_id]
            image_path_in_unrolled.extend(self.segment_input_extra)
            image_path_in_unrolled.append(image_basename)

            image_path_in = os.path.join(*image_path_in_unrolled)
            image = self._load_input(image_path_in)
            body_list, face_list = self._process_batch(image, faces)

            # generate body and face output directories for a given segment
            for im_type, images in (('body', body_list), ('face', face_list)):
                segm_output_dir = os.path.join(self._output_dir, im_type,
                                               segment_id)
                create_directory(segm_output_dir, 'Segment')
                self._store(images, segm_output_dir, image_basename)
Esempio n. 2
0
    def process_segment(self, segment_id):
        # TODO: add docstring
        self._log.info(
            'Synchronizing groups for segment {}'.format(segment_id))

        # check input directory
        segm_input_dir = os.path.join(self._input_dir, segment_id)
        check_directory(segm_input_dir, 'Input Segment')
        # check groups directory
        segm_groups_dir = os.path.join(self._groups_dir, segment_id)
        check_directory(segm_groups_dir, 'Groups Segment')
        # load groups and create mapping image_name -> group
        groups_inverse_map = self._load_groups_inverse_map(segm_groups_dir)

        for frame_path in self._segment_sequence(segm_input_dir):
            image_name = os.path.basename(frame_path)

            group_id = self._get_group(image_name, groups_inverse_map)

            # create cluster directory if necessary (also create segment
            # directory the first time)
            cluster_output_dir = os.path.join(self._output_dir, segment_id,
                                              group_id)
            create_directory(cluster_output_dir, 'Cluster')
            # copy frame from original directory to cluster directory
            output_path = self._get_output_path(output_dir=cluster_output_dir,
                                                image_name=image_name)
            self._store(frame_path, output_path)
Esempio n. 3
0
def init_callbacks(output_mode,
                   plot_stats=True,
                   save_model=False,
                   save_stats=False,
                   stop_early=False,
                   plot_step=1,
                   reduce_lr=False,
                   figsize=None):
    callbacks = []

    training_dir = os.path.join(egosocial.config.TMP_DIR, 'training')
    create_directory(training_dir, 'Training')

    if save_model:
        checkpoint_path = os.path.join(
            training_dir, 'weights.{epoch:02d}-{val_loss:.2f}.h5')
        checkpointer = ModelCheckpoint(
            filepath=checkpoint_path,
            monitor='val_loss',
            save_best_only=True,
            period=5,
        )
        callbacks.append(checkpointer)

    if save_stats:
        metrics_path = os.path.join(training_dir, 'metrics.csv')
        csv_logger = CSVLogger(metrics_path)
        callbacks.append(csv_logger)

    if reduce_lr:
        lr_handler = ReduceLROnPlateau(monitor='val_loss',
                                       factor=0.5,
                                       patience=5,
                                       min_lr=0.00001)
        callbacks.append(lr_handler)

    if plot_stats:
        # more plots need more space
        if not figsize:
            if output_mode != 'both_splitted':
                figsize = (25, 5)
            else:
                figsize = (25, 13)


#        plot_metrics = PlotLearning(update_step=plot_step, figsize=figsize)
#        callbacks.append(plot_metrics)

    if stop_early:
        stopper = EarlyStopping(monitor='val_loss',
                                min_delta=0,
                                patience=10,
                                mode='auto')
        callbacks.append(stopper)

    return callbacks
Esempio n. 4
0
    def process_segment(self, segment_id):
        # TODO: add docstring
        self._log.info(
            'Synchronizing groups for segment {}'.format(segment_id))

        # check input directory
        segm_input_dir = os.path.join(self._input_dir, segment_id)
        check_directory(segm_input_dir, 'Input Segment')
        # create directory
        segm_output_dir = os.path.join(self._output_dir, segment_id)
        create_directory(segm_output_dir,
                         'Output Segment',
                         warn_if_exists=True)

        face_clustering = self._get_face_clustering(segm_input_dir)

        output_path = self._get_output_path(output_dir=segm_output_dir,
                                            file_name=self._groups_file_name)
        self._store(face_clustering, output_path)
Esempio n. 5
0
    def process_segment(self, segment_id):
        # TODO: add docstring
        self._log.info('Face detection for segment {}'.format(segment_id))
        # sanity check for input_dir
        check_directory(self._input_dir, 'Input')
        # create directory
        segm_output_dir = os.path.join(self._output_dir, segment_id)
        create_directory(segm_output_dir,
                         'Output Segment',
                         warn_if_exists=True)

        for image_path in self._get_images(segment_id):
            detected_faces = self.process_image(image_path)

            output_path = self._get_output_path(output_dir=segm_output_dir,
                                                input_path=image_path,
                                                ext='.json')
            # save detection
            self._store(detected_faces, output_path)
    def process_segment(self, segment_id):
        # TODO: add docstring
        self._log.info('Create face pairs for segment {}'.format(segment_id))

        # check input directory
        segm_input_dir = os.path.join(self._input_dir, segment_id)
        check_directory(segm_input_dir, 'Input Segment')
        # create output directory
        segm_output_dir = os.path.join(self._output_dir, segment_id)
        create_directory(segm_output_dir,
                         'Output Segment',
                         warn_if_exists=True)

        iface_pairs = self.get_face_pairs(segment_id)

        # copy frame from original directory to cluster directory
        output_path = self._get_output_path(output_dir=segm_output_dir,
                                            file_name=self._pairs_file_name)

        self._store(iface_pairs, output_path)
    def process_segment(self, segment_id):
        """ Group faces in social segments.

        Args:
            :param segment_id: segment id.
        """
        self._log.info('Face clustering for segment {}'.format(segment_id))

        # sanity check for input_dir
        check_directory(self._input_dir, 'Input')
        # create directory
        segm_output_dir = os.path.join(self._output_dir, segment_id)
        create_directory(segm_output_dir,
                         'Output Segment',
                         warn_if_exists=True)
        # load detected faces and create face groups
        ifaces = self._load_ifaces_from_segment(segment_id)
        clusters = self._face_clustering(ifaces)

        # save results
        output_path = self._get_output_path(output_dir=segm_output_dir,
                                            file_name=self._groups_file_name)
        self._store(clusters, output_path)
Esempio n. 8
0
def run(conf):
    # # Loading precomputed features and labels
    helper = SocialClassifierWithPreComputedFeatures(
        conf.dataset_path,
        conf.features_dir,
        test_size=0.2,
        k_fold_splits=3,
        val_size=0.2,  # relative to training size
        seed=SHARED_SEED)

    helper.load_data()

    # # Prepare splits
    train_val_splits = [(helper.get_split_idx('train', k_fold=k),
                         helper.get_split_idx('val', k_fold=k))
                        for k in range(helper.k_fold_splits)]

    # # Parameters
    n_components, Q = conf.pca_components, conf.Q
    n_features = n_components * 9 + 6 + 2 + 1
    max_timestep = helper.max_sequence_len()

    helper._log.info(
        'Number of pca components per attribute (for visual embeddings): {}'.
        format(n_components))
    helper._log.info('Q: {}'.format(Q))
    helper._log.info('Length of the largest sequence: {}'.format(max_timestep))
    helper._log.info('Total number of features: {}'.format(n_features))

    #'both_splitted' # multi-loss domain-relation
    #'domain' # domain only
    #'relation' # relation only
    output_mode = conf.output_mode
    helper._log.info('Output mode: {}'.format(output_mode))

    model_strategy = conf.model_strategy
    helper._log.info('Model strategy: {}'.format(model_strategy))

    # # Grid search CV
    reduce_dim = Preprocessing(features_range=helper.features_range,
                               create_transformation_cbk=TransformationFactory(
                                   n_components=n_components,
                                   Q=Q,
                                   seed=SHARED_SEED))
    generator_builder = TimeSeriesDataGeneratorBuilder(
        maxlen=max_timestep,
        output_cbk=LabelExpander(mode=output_mode),
        seed=SHARED_SEED,
    )

    if output_mode == 'domain':
        single_output = 'domain'
    else:
        single_output = 'relation'

    metric_suffix = 'fmeasure'

    # used only if GridSearchCV scoring attribute is set to None
    if output_mode in ('domain', 'relation'):
        metric_score = metric_suffix
    else:
        metric_score = single_output + '_' + metric_suffix

    clf = KerasGeneratorClassifier(
        build_fn=build_model,
        build_generator=generator_builder,
        output_mode=output_mode,
        metric_score=metric_score,
        single_output=single_output,
        balanced=True,  # compute class_weights internally
        max_seq_len=max_timestep,
        feature_vector_size=n_features,
        recurrent_type='LSTM',
        hidden_fc=1,
        mode=output_mode,
        model_strategy=model_strategy,
        metrics=[fmeasure],
        verbose=1,
        workers=2,
    )

    pipeline = Pipeline([('reduce_dim', reduce_dim), ('clf', clf)])

    scoring = [
        'accuracy', 'recall_weighted', 'precision_weighted', 'f1_weighted',
        'recall_macro', 'precision_macro', 'f1_macro'
    ]

    # multi-output modes
    if output_mode == 'both_splitted':
        dom_scoring = [
            dom_accuracy, dom_recall_weighted, dom_precision_weighted,
            dom_f1_weighted, dom_recall_macro, dom_precision_macro,
            dom_f1_macro
        ]
        scoring.extend(dom_scoring)

    common_search_params = dict(
        estimator=pipeline,
        cv=train_val_splits,
        scoring=scoring,
        refit=False,
        return_train_score=True,
        iid=False,
        verbose=4,
        n_jobs=1,
    )

    param_grid = dict(
        clf__drop_rate=conf.drop_rate,
        clf__learning_rate=conf.lr,
        clf__epochs=conf.epochs,
        clf__units=conf.units,
        clf__l2_reg=conf.l2_reg,
        clf__batch_size=conf.batch_size,
        clf__decay=conf.lr_decay,
    )

    do_search = 'grid'
    search_cv = GridSearchCV(
        param_grid=param_grid,
        **common_search_params,
    )

    callbacks = []
    if conf.schedule_lr:
        learning_rate = conf.lr[0]
        lr_scheduler = LearningRateScheduler(
            StepDecay(initial_lr=learning_rate, drop_rate=0.5, epochs_drop=10))
        callbacks.append(lr_scheduler)

    X = helper.features
    if output_mode == 'domain':
        # domain specific-labels
        y = relation_to_domain_vec(helper._labels)
    else:
        y = helper._labels

    fit_params = dict(
        clf__verbose=1,
        clf__callbacks=callbacks,
    )

    search_result = search_cv.fit(X, y, **fit_params)

    training_dir = os.path.join(egosocial.config.TMP_DIR, 'training')
    create_directory(training_dir, 'Training')

    date_str = datetime.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')

    file_name = '{}_{}_results_{}_{}.pkl'.format(date_str, do_search,
                                                 output_mode, model_strategy)
    results_path = os.path.join(training_dir, file_name)

    with open(results_path, 'wb') as file:
        pickle.dump(search_result.cv_results_,
                    file,
                    protocol=pickle.HIGHEST_PROTOCOL)