def __init__(self, config_file, n_folds=4):
     self.config_file = config_file
     self.n_folds = n_folds
     self.cfg = scene_functions.get_config(
         config_file, CFG_SPEC.split('\n'))
     self.base_dir = mkdtemp()
     logger.info("Base dir : %s", self.base_dir)
    def _load_config(self, config_file):
        config = get_config(config_file, STATIC_VIDEO_CFG_SPEC.splitlines())
        for key, values in config.iteritems():
            for name, value in values.items():
                self.__dict__["{}_{}".format(key, name)] = value

        self.target_labels = {
            self.PHOTO: self.classifier_photo_tl_id,
            self.SLIDESHOW: self.classifier_slideshow_tl_id
        }
 def load_config_file(self, configfile_name):
     params_loaded = False
     cfg_obj = get_config(configfile_name, spec=VTEXT_CFG_SPEC.split('\n'))
     if cfg_obj:
         self.max_imgs = cfg_obj['data_params']['max_imgs']
         self.model_name = cfg_obj['data_params']['model_name']
         cm = CaffeModel.by_name(self.model_name)
         assert cm,\
             "%s model, from Config file, does not exist in the DB" % \
             (self.model_name)
         logger.info('Using %s'
                     % (self.model_name))
         self.pred_th = cfg_obj['data_params']['pred_th']
         self.score_th = cfg_obj['data_params']['score_th']
         params_loaded = True
     return params_loaded
def load_training_config(configfile_name):
    """
    Load the part of the config file relative to training of this classifier
    """
    training_folders = []
    cfg_obj = get_config(
        configfile_name, spec=TRAINCNNCLF_CFG_SPEC.split('\n'))
    if cfg_obj:
        training_folders = cfg_obj['train_params']['training_folders']
        target_labels_info = cfg_obj['train_params']['target_label_ids']
        label_ids = target_labels_info[1::2]
        for l_id in label_ids:
            assert Label.get(l_id), \
                " Target label id %d does not exist in the DB" % (l_id)

    return training_folders
Beispiel #5
0
    def _load_config_file(self, configfile_name):
        """
        Fill the classifier params from the given config file

        Args:
            configfile_name: string with full-path name of the config file
        Raises/Assertions:
            AssertionError: get_config raises AssertionError if configfile \
                has bad formatting: 'Config file validation failed'
        """
        cfg_obj = get_config(configfile_name, spec=CNNCLF_CFG_SPEC.split('\n'))
        if cfg_obj:
            target_label_info = cfg_obj['train_params']['target_label_ids']
            assert len(target_label_info) % 2 == 0, \
                "target_label_ids should have an even number of elements, "\
                "representing pairs of classifier output and DB label id, "\
                " e.g., 0, 1004, 1, 1006"
            self.target_labels = dict(zip(*[iter(target_label_info)] * 2))
            if len(self.target_labels) > 3:
                label_list = str(self.target_labels.values()[0:3]) + '...'
            else:
                label_list = str(self.target_labels.values())
            logger.info("Loaded classifier has %d target labels (%s)" %
                        (len(self.target_labels), label_list))

            self.cnn_model_name = cfg_obj['cnn_model_params']['model_name']
            self.cnn_layer_name = cfg_obj['cnn_model_params']['layer_name']
            self.oversampling = cfg_obj['cnn_model_params']['oversampling']
            cm = CaffeModel.by_name(self.cnn_model_name)
            assert cm,\
                "%s model, from Config file, does not exist in the DB" % \
                (self.cnn_model_name)
            logger.info('Using %s' % (self.cnn_model_name))
            self._load_classifiers_params(cfg_obj)
            self.crossval_tsize = cfg_obj['crossval_params']['test_size']
            self.crossval_accept = cfg_obj['crossval_params']['valid_th']

            self.max_frames_per_video = \
                cfg_obj['acceptance_params']['max_num_frames']
            self.accept_th = cfg_obj['acceptance_params']['accept_th']
            self.min_accept = cfg_obj['acceptance_params']['min_accept']
def scene_detector(config_filename, inter_dir):
    logger.info("checking config file")
    config_scene = get_config(config_filename, SCENE_CFG_SPEC.split('\n'))
    logger.info("starting training pipeline")
    logger.info("url injection")
    myurls = run_url_injection.query_video_urls(config_scene, inter_dir)
    logger.info("finished url injection")

    logger.info("injected %d urls in the database" % myurls)
    urls = os.path.join(inter_dir, 'all_urls.txt')
    d_start = datetime.now()
    logger.info("checking if videos have been donwloaded")
    n_videos = run_url_injection.check_videos_downloaded(
        urls, config_scene, d_start)
    logger.info("Downloaded %d videos out of %s initial urls" %
                (n_videos, config_scene["url_injection"]["max_vids"]))

    logger.info("Submiting hits to mechanical turk")
    run_mturk_submission.mturk_submission_ingestion(config_scene, inter_dir)
    logger.info("Mturk process finished. At least 95% of answers ingested")
    scene = SceneClassifier(config_filename, inter_dir)
    scene.set_params()

    logger.info("Obtaining positive examples from Mturk")
    posdata = scene.get_positive_examples_mturk()
    logger.info("Training Scene classifier")
    negdata = scene.write_negative_examples()
    logger.info("creating datasets")
    scene.create_dataset_partitions(posdata, negdata)
    logger.info("training")
    scene.train_classifier()
    logger.info("testing")
    results = scene.test_classifier()
    logger.info("Evaluating")
    scene.evaluate_classifier(results)
    if scene.tpr >= TPR and scene.fpr <= FPR:
        scene.inject_classifier()
    else:
        logger.info('Retrain classifier. TPR = %d , FPR = %d' %
                    (scene.tpr, scene.fpr))
Beispiel #7
0
    def load_config_file(self, configfile_name):
        params_loaded = False
        cfg_obj = get_config(configfile_name, spec=VTEXT_CFG_SPEC.split('\n'))
        if cfg_obj:
            box_extractor = cfg_obj['data_params']['box_extractor']
            self.max_img_size = cfg_obj['data_params']['max_img_size']
            self.max_imgs = cfg_obj['data_params']['max_imgs']
            self.rfc_score_th = cfg_obj['data_params']['rfc_score_th']
            self.bb_score_th = cfg_obj['data_params']['bb_score_th']

            wd_dict = cfg_obj['word_det_params']
            self.word_img_size = tuple(wd_dict['img_size'])
            self.pred_thresh = wd_dict['pred_thresh']
            self.word_det_model_name = wd_dict['model_name']
            cm = CaffeModel.by_name(self.word_det_model_name)
            assert cm,\
                "%s model, from Config file, does not exist in the DB" % \
                (self.word_det_model_name)
            self.box_ext_kwargs = cfg_obj[box_extractor]
            if box_extractor == 'edge_box_params':
                self.box_ext_cls = EdgeBoxExtractor
                patch_widths = self.box_ext_kwargs.pop('patch_widths')
                patch_shapes = [(self.word_img_size[0], w)
                                for w in patch_widths]
                self.box_ext_kwargs['patch_shapes'] = patch_shapes
            elif box_extractor == 'grid_box_params':
                self.box_ext_cls = GridBoxExtractor
            logger.info('Using %s' % (self.word_det_model_name))

            bb_dict = cfg_obj['bb_regress_params']
            self.bb_reg_model_name = bb_dict['model_name']
            if self.bb_reg_model_name:
                cm = CaffeModel.by_name(self.bb_reg_model_name)
                assert cm,\
                    "%s model, from Config file, does not exist in the DB" % \
                    (self.bb_reg_model_name)

            params_loaded = True
        return params_loaded
Beispiel #8
0
    def load_config_file(self, configfile_name):
        """
        Fill the classifier params from the given config file

        Args:
            filename: string with full-path name of the config file
        Returns:
            True on successful config load

        Raises/Assertions:
            AssertionError: get_config raises AssertionError if configfile \
                has bad formatting: 'Config file validation failed'
        """
        cfg_obj = get_config(configfile_name, spec=VIDEOG_CFG_SPEC.split('\n'))

        self.sampling_duration = cfg_obj['sampling_params']['duration']
        self.sampling_fps = cfg_obj['sampling_params']['fps']
        self.sampling_scale = cfg_obj['sampling_params']['frame_scale']

        self.get_masked_flow = cfg_obj['opt_flow_params']['get_masked_flow']
        self.flow_numBins = cfg_obj['opt_flow_params']['flow_numBins']

        self.color_numBins = cfg_obj['color_hist_params']['color_numBins']
        self.colorspaces = cfg_obj['color_hist_params']['colorspaces']
        self.num_frames_per_video = cfg_obj['color_hist_params'][
            'num_frames_per_video']

        self.svm_type = cfg_obj['svm_params']['svm_type']
        self.kernel_type = cfg_obj['svm_params']['svm_kernel']
        self.gamma = cfg_obj['svm_params']['svm_gamma']

        self.crossval_test_size = cfg_obj['crossval_params']['test_size']
        self.crossval_th = cfg_obj['crossval_params']['valid_th']

        self.confidence_th = cfg_obj['classif_params']['conf_th']
        self.ratio_motioncolor_votes = cfg_obj['classif_params']['ratio_v']
        self.accept_th = cfg_obj['classif_params']['accept_th']
Beispiel #9
0
    def __init__(self, model_dir):
        """The LogoMatcher is used to encapsulate all the information of
        a trained logo-model, the model_dir needs the following files,
        bow, knn, logos, model.cfg
        """
        self.model_dir = model_dir
        self.bow = BagOfWords.load_from_dir(self.model_path('bow'))
        self.knn = KNNScikit.load_from_file(self.model_path('knn'))
        with open(self.model_path('logos')) as f:
            self.training_logos = pickle.load(f)
        self.cfg = get_config(self.model_path('model.cfg'),
                              CFG_SPEC.split('\n'))

        # some params for performing matching
        self.k_neighbors = self.cfg['KNN']['k_neighbors']
        self.min_points = self.cfg['RbM']['min_points']
        self.min_matches = self.cfg['RbM']['min_matches']
        self.ransac_th = self.cfg['RbM']['ransac_th']
        self.accept_th = self.cfg['RbM']['accept_th']
        self.ransac_algorithm = self.cfg['RbM']['ransac_algorithm']
        self.ransac_max_iter = self.cfg['RbM']['ransac_max_iter']
        self.ransac_prob = self.cfg['RbM']['ransac_prob']
        self.inlier_r = self.cfg['RbM']['inlier_r']
        # size parameters
        self.resize = self.cfg['size']['resize']
        self.standard_width = self.cfg['size']['standard_width']
        # box finder params
        self.patch_shapes = ast.literal_eval(self.cfg['BOF']['patch_shapes'])
        self.scales = ast.literal_eval(self.cfg['BOF']['scales'])
        self.step_size = self.cfg['BOF']['step_size']
        self.center_area_offset = self.cfg['BOF']['center_area_offset']
        self.corner_area_sz = ast.literal_eval(
            self.cfg['BOF']['corner_area_sz'])
        self.raise_on_size = self.cfg['BOF']['raise_on_size']
        self.contrast_thresh = self.cfg['BOF']['contrast_thresh']
        self.variance_thresh = self.cfg['BOF']['variance_thresh']
Beispiel #10
0
def get_training_data_from_youtube(config_file):
    """
    Run a small pipeline to query, ingest and QA videos from Youtube from a query specified in a config file.
    Args:
        config_file: path to config file including target label info, query details
            (keywords, playlist, hit type, ...).
            For more details, see the GetTrainingConfigfile example.
    """
    config = get_config(config_file, CFG_SPEC.split('\n'))
    hit_type = config['mturk_submission_params']['hit_type']
    if hit_type == 'VideoHit':
        hit_type = VideoHit
    else:
        hit_type = ImageHit

    all_files_dir = tempfile.mkdtemp()
    logger.info(
        " Query Youtube for Urls and inject them to the download queue.")
    injected_urls = run_url_injection.query_video_urls(config, all_files_dir)
    url_file = os.path.join(all_files_dir, 'all_urls.txt')
    all_urls = []
    with open(url_file, 'r') as fo:
        all_urls = [x.strip() for x in fo.readlines()]

    logger.info(
        "Waiting until a reasonable amount of urls are already ingested.")
    d_start = datetime.now()
    num_vids = run_url_injection.check_videos_downloaded(
        all_urls, config, d_start)

    logger.info("Most part (%d) of the videos are downloaded." % (num_vids))
    job, _, num_hits_submitted = \
        run_mturk_submission.mturk_submission_only(config, all_urls, hit_type)
    logger.info("Submitted %d %s" % (num_hits_submitted, str(hit_type)))

    return job, injected_urls, all_urls
Beispiel #11
0
 def __init__(self, model_dir):
     face_index_path = os.path.join(model_dir, FACE_MODEL_INDEX_FILE)
     config_path = os.path.join(model_dir, FACE_MODEL_CONFIG_FILE)
     self.face_index = FaceIndex.load_from_file(face_index_path)
     self.config = get_config(config_path,
                              FACE_MODEL_CONFIG_SPEC.split('\n'))
Beispiel #12
0
def get_model_name(clf_model_dir):
    cfg_path = os.path.join(clf_model_dir, CFG_FILE_NAME)
    cfg = get_config(cfg_path, CFG_SPEC.split('\n'))
    return cfg['model_name']
def train(config_file):
    """The method that wraps around all steps required to train
    a new Logo recognition model

    Args:
        config_file : The path to the config file

    It save all the files in the op_model_dir mentioned in the config_file
    """
    assert os.path.exists(config_file), "Invalid Config file"

    logger.info("Validating Config File")
    cfg = scene_functions.get_config(config_file, CFG_SPEC.split('\n'))

    logo_dir = cfg['logo_dir']
    assert os.path.isdir(logo_dir), "Logo dir does not exist !"

    op_model_dir = cfg['op_model_dir']
    if not os.path.isdir(op_model_dir):
        os.makedirs(op_model_dir)

    # train BoW
    train_bow = cfg['BoW']['train_bow']
    if train_bow:
        logger.info("Training BoW")
        hessian_threshold = cfg['SURF']['hessianThreshold']
        keypoint_limit = cfg['SURF']['keypoint_limit']
        feat_ext = SurfExtractor(hessian_thresh=hessian_threshold,
                                 keypoint_limit=keypoint_limit)
        logo_paths = [
            os.path.join(logo_dir, img) for img in os.listdir(logo_dir)
        ]
        vocabsize = cfg['BoW']['vocabsize']
        num_images = cfg['BoW']['num_train_images']
        bow, _ = train_bag_of_words(vocabsize,
                                    feat_ext,
                                    logo_paths,
                                    num_images=num_images)
    else:
        logger.info("Using pre-trained bow model")
        bow_dir = cfg['BoW']['bow_dir']
        assert os.path.isdir(bow_dir), "BoW dir does not exist"
        bow = BagOfWords.load_from_dir(bow_dir)

    # train KNN
    logger.info("Training KNN")
    k_neighbors = cfg['KNN']['k_neighbors']
    metric = cfg['KNN']['metric']
    knn = KnnTrainer(bow, neighbors=k_neighbors, metric=metric)
    max_logo_size = cfg['size']['max_logo_size']
    logos = Logo.load_from_dir(logo_dir, max_logo_size=None)
    knn.train(logos)

    # saving all the trained files
    logger.info("Saving all to %s", op_model_dir)
    knn.save_knn(op_model_dir)
    knn.save_logos(op_model_dir)
    bow.save_to_dir(os.path.join(op_model_dir, 'bow'))

    # saving the config file
    cfg.filename = os.path.join(op_model_dir, "model.cfg")
    cfg.write()
 def get_model_name(model_dir):
     """Extract the FaceModel name from a FaceRecognizeClassifer's model directory."""
     model_file = os.path.join(model_dir, FACE_REC_CONFIG_FILE)
     cfg = get_config(model_file, FACE_REC_CONFIG_SPEC.split('\n'))
     return cfg['model_name']