def inject_model(self): cfg_obj = validate_config(self.model_path(CFG_FILE), CFG_SPEC) model_name = cfg_obj['model_name'] label = Label.by_name(cfg_obj['target_label_name']) assert label url_model = UrlModel.create(name=model_name) self.tar_and_upload(url_model) logger.info('URL model injected %s' % url_model)
def process_page(page): """ Runs langid's language detection on webpage text""" logger.info("Detecting language for page: %d" % page.id) lang_name = LanguageDetector.detect_language(page.title_and_text) lang_label = Label.by_name(lang_name) assert lang_label is not None, "Label %s does not exist" % lang_name det = LanguageDetector.query.one() LanguageDetector.delete_detector_results(page, [det.id]) det.save_result(page.id, lang_label.id) return lang_label
def inject_classifier(self, replace_old): # TODO: This seems like it could be generalized for all classifiers cfg_obj = validate_config(self.model_path(CFG_FILE), CFG_SPEC) clf_name = cfg_obj['classifier_name'] label = Label.by_name(cfg_obj['target_label_name']) assert label clf = UrlClassifier.by_name(clf_name) if replace_old: assert clf, 'UrlClassifier with name %s does not exist!'\ % clf_name else: assert not clf, 'UrlClassifier with name %s already exists!'\ % clf_name # create the new classifier clf = UrlClassifier.create(name=clf_name) # note that failures above while running the script does not roll back # previously inserted models self.tar_and_upload(clf) clf.updated_at = datetime.utcnow() session.flush() clf.add_targets([label]) logger.info('URL classifier injected %s' % clf)