def inject_detector(self, detector_name, label_id, pred_thresh=None): """Create the detector in the table TextDetectClassifier, upload the tar file with all the model files to s3 Args: detector_name: string with detector name (it has to be unique because this detector can't be replaced) label_id: int, target label id of the detector pred_thresh: set float threshold for word detection Returns: clf: the created TextDetectClassifier object Raise/Assertions: This function asserts if the label_id does not correspond to any existing label and if the detector_name already exists in the db """ l = Label.get(label_id) assert l, 'Label id %d does not correspond to any Label!' % (label_id) clf = self.classifier_cls.by_name(detector_name) assert not clf, '%s with name %s already exists!'\ % (self.classifier_cls.__name__, detector_name) clf = self.classifier_cls(name=detector_name, pred_thresh=pred_thresh) session.flush() clf.add_targets([l]) self.tar_and_upload(clf) logger.info('%s detector injected %s' % (self.classifier_cls.__name__, clf)) return clf
def inject(self, det_name, target_label, video_threshold, training_images, image_threshold=None): ''' Inject model to db. If image_threshold is not specified, it will be set to that of the underlying classifier. Params: det_name: Name of detector created. target_label: Detector's target label. video_threshold: Detector's video threshold. training_images: ([(video_id, timestamp), ...], [label, ...]) image_threshold: (Optional) Detector's image threshold. Returns: Detector. ''' if image_threshold is None: clf = SpatialSceneClassifier.load_from_dir(self.model_dir) image_threshold = clf.image_threshold det = self.detector_cls(name=det_name, video_threshold=video_threshold, image_threshold=image_threshold, target_label=target_label) session.flush() save_training_images(det.id, *training_images) self.tar_and_upload(det) return det
def inject(self, clf_name, target_label_id_photo, target_label_id_sshow): ''' Inject model to db. Params: clf_name: Name of classifier created. target_label_id_photo: Classifier's target label for photo videos target_label_id_sshow: target label for slideshow videos Returns: Classifier. ''' target_labels_list = [] for target_label_id in [target_label_id_photo, target_label_id_sshow]: l = Label.get(target_label_id) assert l, 'Label id %s does not correspond to any Label!' % \ (target_label_id) target_labels_list += [l] assert not StaticVideoClassifier.by_name(clf_name), \ 'StaticVideoClassifier with name %s already exists!' % clf_name classifier = StaticVideoClassifier(name=clf_name) session.flush() classifier.add_targets(target_labels_list) self.tar_and_upload(classifier) return classifier
def save_training_pages(detector_id, updated_at, true_pid_file): true_pid_list = open(true_pid_file, 'r').read().splitlines() true_pid_list = set(map(int, true_pid_list)) for pid in true_pid_list: TrainingPage(detector_id=detector_id, detector_version=updated_at, page_id=pid) session.flush()
def inject_model(self, name): if self.has_mat_file: caffe_model = WordRecModel(name=name) else: caffe_model = CaffeModel(name=name) session.flush() self.tar_and_upload(caffe_model) return caffe_model
def inject(self, clf_name, target_label_ids, training_box_ids): """'target_label_ids' and 'training_box_ids' should be sets""" target_labels = [Label.get(l_id) for l_id in target_label_ids] assert all(target_labels), "Bad target label id" training_boxes = [Box.get(b_id) for b_id in training_box_ids] assert all(training_boxes), "Bad training box id" clf = FaceRecognizeClassifier.create(name=clf_name) clf.add_targets(target_labels) for box_id in training_box_ids: TrainingBox(detector_id=clf.id, box_id=box_id) session.flush() self.tar_and_upload(clf) return clf
def save_training_videos(detector_id, true_vid_list): """ Store all the pairs (detector_id, video_id) in TrainingVideo table Args: detector_id true_vid_list: list of int values that correspond to positive training video ids NOTE: This function assumes the detector_id and video_ids exists, so should be checked before calling to inject_detector """ true_vid_list = set(true_vid_list) for v_id in true_vid_list: TrainingVideo(detector_id=detector_id, video_id=v_id) session.flush()
def inject_detector(self, detector_name, label_id, true_vid_list, confidence_th=None, acceptance_th=None): """ Create the detector in the table VideoMotionColorDetector, upload the tar file with all the model files to s3, and save the video_ids used as positive training data Args: detector_name: string with detector name (it has to be unique because this detector can't be replaced) label_id: int, target label id of the detector true_vid_list: list of ints that correspond to the ids of the videos used as positive training data Returns: det: the created VideoMotionColorDetector object Raise/Assertions: This function asserts if the label_id does not correspond to any existing label and if the detector_name already exists in the db """ l = Label.get(label_id) assert (l != None), 'Label id %d does not correspond to any Label!'\ % (label_id) det = VideoMotionColorDetector.by_name(detector_name) assert not det, 'VideoMotionColorDetector with name %s already exists!'\ % detector_name det = VideoMotionColorDetector(name=detector_name) if confidence_th and acceptance_th: det.confidence_th = confidence_th det.acceptance_th = acceptance_th session.flush() det.add_targets([l]) self.tar_and_upload(det) logger.info('VideoMotionColorDetector detector injected %s' % det) save_training_videos(det.id, true_vid_list) return det
def inject_detector(self, detector_name, list_label_ids, true_vid_list=None): """ Create the detector in the table Cnn classifier, upload the tar file with all the model files to s3, and save the video_ids used as positive training data Args: detector_name: string with detector name (it has to be unique because this detector can't be replaced) label_id: int, target label id of the detector true_vid_list: list of ints with ids of the videos used as positive training data Returns: det: the created CnnClassifier object Raise/Assertions: This function asserts if the label_id does not correspond to any existing label and if the detector_name already exists in the db """ target_label_list = { Label.get(label_id) for label_id in list_label_ids if Label.get(label_id) } target_label_list = list(target_label_list) assert len(target_label_list),\ "Target label list needs at least one Label that exists in the DB" det = self.detector_cls.by_name(detector_name) assert not det, 'Cnn Classifier with name %s already exists!'\ % detector_name det = self.detector_cls(name=detector_name) session.flush() det.add_targets(target_label_list) self.tar_and_upload(det) logger.info('CnnClassifier detector injected %s' % det) if true_vid_list: save_training_videos(det.id, true_vid_list) return det
def save_training_images(detector_id, images, labels): """Save collection of images used for training. Args: detector_id: Id of detector. images: List of (video id, timestamp)s. labels: List of labels. Raises: AssertionError """ assert len(images) == len(labels) assert all([label in [POS_LABEL, NEG_LABEL] for label in labels]) for (video_id, timestamp), label in zip(images, labels): TrainingImage(detector_id=detector_id, video_id=video_id, timestamp=timestamp, label=label) session.flush()
def inject_detector(self, detector_name, label_id, replace_old, true_pid_file): l = Label.get(label_id) assert l is not None, "Label with id %s does not exist" % label_id det = NerDetector.by_name(detector_name) if replace_old: assert det, 'NerDetector with name %s does not exist!'\ % detector_name else: assert not det, 'NerDetector with name %s already exists!'\ % detector_name # create the new detector det = NerDetector(name=detector_name) session.flush() det.add_targets([l]) self.tar_and_upload(det) det.updated_at = datetime.utcnow() session.flush() logger.info('NER detector injected %s' % det) save_training_pages(det.id, det.updated_at, true_pid_file)
def inject_classifier(self, replace_old): # TODO: This seems like it could be generalized for all classifiers cfg_obj = validate_config(self.model_path(CFG_FILE), CFG_SPEC) clf_name = cfg_obj['classifier_name'] label = Label.by_name(cfg_obj['target_label_name']) assert label clf = UrlClassifier.by_name(clf_name) if replace_old: assert clf, 'UrlClassifier with name %s does not exist!'\ % clf_name else: assert not clf, 'UrlClassifier with name %s already exists!'\ % clf_name # create the new classifier clf = UrlClassifier.create(name=clf_name) # note that failures above while running the script does not roll back # previously inserted models self.tar_and_upload(clf) clf.updated_at = datetime.utcnow() session.flush() clf.add_targets([label]) logger.info('URL classifier injected %s' % clf)
def upload_transcript(self): """Upload transcript to s3""" self.video.upload_transcript(self.transcript_path) self.video.mark_transcript_uploaded() session.flush()