def inject_detector(self, detector_name, label_id, pred_thresh=None):
        """Create the detector in the table TextDetectClassifier, upload the tar
        file with all the model files to s3

        Args:
            detector_name: string with detector name
                (it has to be unique because this detector can't be replaced)
            label_id: int, target label id of the detector
            pred_thresh: set float threshold for word detection

        Returns:
            clf: the created TextDetectClassifier object

        Raise/Assertions:
            This function asserts if the label_id does not correspond to any
            existing label and if the detector_name already exists in the db
        """
        l = Label.get(label_id)
        assert l, 'Label id %d does not correspond to any Label!' % (label_id)
        clf = self.classifier_cls.by_name(detector_name)
        assert not clf, '%s with name %s already exists!'\
            % (self.classifier_cls.__name__, detector_name)

        clf = self.classifier_cls(name=detector_name, pred_thresh=pred_thresh)
        session.flush()
        clf.add_targets([l])
        self.tar_and_upload(clf)
        logger.info('%s detector injected %s' %
                    (self.classifier_cls.__name__, clf))
        return clf
Example #2
0
    def inject(self, det_name, target_label, video_threshold,
               training_images, image_threshold=None):
        '''
        Inject model to db.

        If image_threshold is not specified, it will be set to that
        of the underlying classifier.

        Params:
            det_name: Name of detector created.
            target_label: Detector's target label.
            video_threshold: Detector's video threshold.
            training_images: ([(video_id, timestamp), ...], [label, ...])
            image_threshold: (Optional) Detector's image threshold.

        Returns:
            Detector.
        '''
        if image_threshold is None:
            clf = SpatialSceneClassifier.load_from_dir(self.model_dir)
            image_threshold = clf.image_threshold
        det = self.detector_cls(name=det_name, video_threshold=video_threshold,
                                image_threshold=image_threshold,
                                target_label=target_label)
        session.flush()
        save_training_images(det.id, *training_images)
        self.tar_and_upload(det)
        return det
Example #3
0
    def inject(self, clf_name, target_label_id_photo, target_label_id_sshow):
        '''
        Inject model to db.

        Params:
            clf_name: Name of classifier created.
            target_label_id_photo: Classifier's target label for photo videos
            target_label_id_sshow: target label for slideshow videos

        Returns:
            Classifier.
        '''
        target_labels_list = []
        for target_label_id in [target_label_id_photo, target_label_id_sshow]:
            l = Label.get(target_label_id)
            assert l, 'Label id %s does not correspond to any Label!' % \
                (target_label_id)
            target_labels_list += [l]

        assert not StaticVideoClassifier.by_name(clf_name), \
            'StaticVideoClassifier with name %s already exists!' % clf_name

        classifier = StaticVideoClassifier(name=clf_name)
        session.flush()

        classifier.add_targets(target_labels_list)
        self.tar_and_upload(classifier)
        return classifier
Example #4
0
def save_training_pages(detector_id, updated_at, true_pid_file):
    true_pid_list = open(true_pid_file, 'r').read().splitlines()
    true_pid_list = set(map(int, true_pid_list))
    for pid in true_pid_list:
        TrainingPage(detector_id=detector_id,
                     detector_version=updated_at,
                     page_id=pid)
    session.flush()
Example #5
0
 def inject_model(self, name):
     if self.has_mat_file:
         caffe_model = WordRecModel(name=name)
     else:
         caffe_model = CaffeModel(name=name)
     session.flush()
     self.tar_and_upload(caffe_model)
     return caffe_model
 def inject(self, clf_name, target_label_ids, training_box_ids):
     """'target_label_ids' and 'training_box_ids' should be sets"""
     target_labels = [Label.get(l_id) for l_id in target_label_ids]
     assert all(target_labels), "Bad target label id"
     training_boxes = [Box.get(b_id) for b_id in training_box_ids]
     assert all(training_boxes), "Bad training box id"
     clf = FaceRecognizeClassifier.create(name=clf_name)
     clf.add_targets(target_labels)
     for box_id in training_box_ids:
         TrainingBox(detector_id=clf.id, box_id=box_id)
     session.flush()
     self.tar_and_upload(clf)
     return clf
def save_training_videos(detector_id, true_vid_list):
    """
    Store all the pairs (detector_id, video_id) in TrainingVideo table

    Args:
        detector_id
        true_vid_list: list of int values that correspond to positive training
            video ids

    NOTE: This function assumes the detector_id and video_ids exists, so should
    be checked before calling to inject_detector
    """
    true_vid_list = set(true_vid_list)
    for v_id in true_vid_list:
        TrainingVideo(detector_id=detector_id, video_id=v_id)
    session.flush()
    def inject_detector(self,
                        detector_name,
                        label_id,
                        true_vid_list,
                        confidence_th=None,
                        acceptance_th=None):
        """
        Create the detector in the table VideoMotionColorDetector, upload the tar
        file with all the model files to s3, and save the video_ids used as
        positive training data

        Args:
            detector_name: string with detector name
                (it has to be unique because this detector can't be replaced)
            label_id: int, target label id of the detector
            true_vid_list: list of ints that correspond to the ids of the videos
                used as positive training data
        Returns:
            det: the created VideoMotionColorDetector object

        Raise/Assertions:
            This function asserts if the label_id does not correspond to any
            existing label and if the detector_name already exists in the db
        """
        l = Label.get(label_id)
        assert (l != None), 'Label id %d does not correspond to any Label!'\
            % (label_id)

        det = VideoMotionColorDetector.by_name(detector_name)
        assert not det, 'VideoMotionColorDetector with name %s already exists!'\
            % detector_name

        det = VideoMotionColorDetector(name=detector_name)
        if confidence_th and acceptance_th:
            det.confidence_th = confidence_th
            det.acceptance_th = acceptance_th
        session.flush()
        det.add_targets([l])
        self.tar_and_upload(det)

        logger.info('VideoMotionColorDetector detector injected %s' % det)
        save_training_videos(det.id, true_vid_list)

        return det
Example #9
0
    def inject_detector(self,
                        detector_name,
                        list_label_ids,
                        true_vid_list=None):
        """
        Create the detector in the table Cnn classifier, upload the tar
        file with all the model files to s3, and save the video_ids used as
        positive training data

        Args:
            detector_name: string with detector name
                (it has to be unique because this detector can't be replaced)
            label_id: int, target label id of the detector
            true_vid_list: list of ints with ids of the videos used
                as positive training data
        Returns:
            det: the created CnnClassifier object

        Raise/Assertions:
            This function asserts if the label_id does not correspond to any
            existing label and if the detector_name already exists in the db
        """
        target_label_list = {
            Label.get(label_id)
            for label_id in list_label_ids if Label.get(label_id)
        }
        target_label_list = list(target_label_list)
        assert len(target_label_list),\
            "Target label list needs at least one Label that exists in the DB"

        det = self.detector_cls.by_name(detector_name)
        assert not det, 'Cnn Classifier with name %s already exists!'\
            % detector_name

        det = self.detector_cls(name=detector_name)
        session.flush()
        det.add_targets(target_label_list)
        self.tar_and_upload(det)
        logger.info('CnnClassifier detector injected %s' % det)

        if true_vid_list:
            save_training_videos(det.id, true_vid_list)

        return det
def save_training_images(detector_id, images, labels):
    """Save collection of images used for training.

    Args:
        detector_id: Id of detector.
        images: List of (video id, timestamp)s.
        labels: List of labels.

    Raises:
        AssertionError
    """
    assert len(images) == len(labels)
    assert all([label in [POS_LABEL, NEG_LABEL] for label in labels])
    for (video_id, timestamp), label in zip(images, labels):
        TrainingImage(detector_id=detector_id,
                      video_id=video_id,
                      timestamp=timestamp,
                      label=label)
    session.flush()
Example #11
0
    def inject_detector(self, detector_name, label_id, replace_old,
                        true_pid_file):
        l = Label.get(label_id)
        assert l is not None, "Label with id %s does not exist" % label_id
        det = NerDetector.by_name(detector_name)
        if replace_old:
            assert det, 'NerDetector with name %s does not exist!'\
                % detector_name
        else:
            assert not det, 'NerDetector with name %s already exists!'\
                % detector_name
            # create the new detector
            det = NerDetector(name=detector_name)
            session.flush()
            det.add_targets([l])

        self.tar_and_upload(det)
        det.updated_at = datetime.utcnow()
        session.flush()
        logger.info('NER detector injected %s' % det)
        save_training_pages(det.id, det.updated_at, true_pid_file)
Example #12
0
 def inject_classifier(self, replace_old):
     # TODO: This seems like it could be generalized for all classifiers
     cfg_obj = validate_config(self.model_path(CFG_FILE), CFG_SPEC)
     clf_name = cfg_obj['classifier_name']
     label = Label.by_name(cfg_obj['target_label_name'])
     assert label
     clf = UrlClassifier.by_name(clf_name)
     if replace_old:
         assert clf, 'UrlClassifier with name %s does not exist!'\
             % clf_name
     else:
         assert not clf, 'UrlClassifier with name %s already exists!'\
             % clf_name
         # create the new classifier
         clf = UrlClassifier.create(name=clf_name)
     # note that failures above while running the script does not roll back
     # previously inserted models
     self.tar_and_upload(clf)
     clf.updated_at = datetime.utcnow()
     session.flush()
     clf.add_targets([label])
     logger.info('URL classifier injected %s' % clf)
Example #13
0
 def upload_transcript(self):
     """Upload transcript to s3"""
     self.video.upload_transcript(self.transcript_path)
     self.video.mark_transcript_uploaded()
     session.flush()