def test_missing_instance_dir(self, fs: FakeFilesystem) -> None: fs.add_real_directory('./testdata/test_model/test_instance_unreleased') model_path = os.path.join(self.BASE_CLASSIFIER_PATH, 'test_model') with pytest.raises(Exception, match=('No valid instance of model found in %s, ' + 'instances were %s') % (model_path, r'\[\'test_instance_unreleased\'\]')): Classifier(self.BASE_CLASSIFIER_PATH, 'test_model')
def test_missing_labels_file(self, fs: FakeFilesystem) -> None: fs.add_real_directory('./testdata/test_model/test_instance') fs.remove_object('./testdata/test_model/test_instance/label.vocab') with pytest.raises( Exception, match=(r'Failure to load labels file from {0} with exception'). format('./testdata/test_model/test_instance/label.vocab')): Classifier(self.BASE_CLASSIFIER_PATH, 'test_model')
def test_missing_model(self, fs: FakeFilesystem) -> None: instance_path = os.path.join(self.BASE_CLASSIFIER_PATH, 'test_model', 'test_instance_missing_model') fs.add_real_directory(instance_path) with pytest.raises(Exception, match=('SavedModel file does not exist at: {0}' ).format(instance_path)): Classifier(self.BASE_CLASSIFIER_PATH, 'test_model')
def index_page(text="", prediction_message=""): """Main page of web app""" if request.method == "POST": clf = Classifier(DEFAULT_MODEL_PATH, DEFAULT_VECTORIZER_PATH, DEFAULT_MLB_PATH) dialogue = request.form["text"] prediction = clf.predict(dialogue) prediction_message = " ".join(sorted(prediction)) return render_template('prediction_page.html', text=text, prediction_message=prediction_message)
def test_missing_variables(self, fs: FakeFilesystem) -> None: instance_path = os.path.join(self.BASE_CLASSIFIER_PATH, 'test_model', 'test_instance_missing_variables') fs.add_real_directory(instance_path) with pytest.raises( Exception, match=('{0}/variables; No such file or directory'.format( instance_path))): Classifier(self.BASE_CLASSIFIER_PATH, 'test_model')
def classifier(): form = ClassifierForm() if form.validate_on_submit(): text = form.index.data category = Classifier(form.index.data) articles = Articles.query.filter_by(topic=category).limit(10) #article = SimilarArticles.query.filter_by(id=form.index.data).first() return render_template('results.html', title='Search Results', category=category, text=text, articles=articles) return render_template('classifier.html', title='Search Article', form=form)
def __init__(self, base_classifier_dir: str, model_name: str = '') -> None: self.logger = logging.getLogger() self.base_classifier_dir = base_classifier_dir self.model_name = model_name self.classifier: Optional[Classifier] = None if self.model_name: try: self.classifier = Classifier(self.base_classifier_dir, model_name) self.topic_infos: Dict[str, ModelStatus.TopicStatus] = {} for t, ti in self.classifier.topic_infos.items(): self.topic_infos[t] = ModelStatus.TopicStatus(t, ti) except Exception: self.logger.info( 'No model %s found in classifier directory=%s' % (model_name, self.base_classifier_dir))
def test_classify(self, fs: FakeFilesystem) -> None: fs.add_real_directory('./testdata/test_model/test_instance') fs.add_real_directory('./testdata/test_model/test_instance_unreleased') c = Classifier('./testdata', 'test_model') result = c.classify(['Increase access to health care']) assert c.labels is not None assert c.embedder is not None assert c.predictor is not None assert c.instance == 'test_instance' assert result # result ~ [{topic: probability, topic2: probability, ...}, ...] for topic, _ in result[0].items(): assert topic in c.labels assert len(result) == 1 assert result[0]['Right to health'] >= 0.8
def test_classify(self, fs: FakeFilesystem) -> None: fs.add_real_directory('./testdata/test_model/test_instance') fs.add_real_directory('./testdata/test_model/test_instance_unreleased') c = Classifier('./testdata', 'test_model') result = c.classify(['Where is my medical book?']) assert c.vocab is not None assert c.embedder is not None assert c.predictor is not None assert c.instance == 'test_instance' print(result) assert result # result ~ [{topic: probability, topic2: probability, ...}, ...] for topic, _ in result[0].items(): assert topic in c.vocab assert result[0]['Right to education'] >= 0.7
def test_invalid_bert(self, fs: FakeFilesystem) -> None: bad_bert_path = './bad/path/to/bert' config = """ { "bert": "%s", "labels": "label.vocab", "is_released": true, "description": "This is the latest model from Sascha.", "metadata": { "thesaurus": "issues" } } """ % (bad_bert_path) fs.add_real_directory('./testdata/test_model/test_instance') fs.remove_object('./testdata/test_model/test_instance/config.json') fs.create_file('./testdata/test_model/test_instance/config.json', contents=config) with pytest.raises(Exception, match='SavedModel file does not exist at'): c = Classifier(self.BASE_CLASSIFIER_PATH, 'test_model') # Bad bert is only used on uncached embed. c.classify(['some string'])
import uvicorn from fastapi import FastAPI, File, UploadFile from fastapi.responses import HTMLResponse from app.classifier import Classifier model = Classifier('./data/classfier.h5') app = FastAPI() @app.post('/predict') def predict(image: UploadFile = File(...)): # predict label return model.predict_from_file(image.file)
def test_missing_base_classify_dir(self) -> None: fake_classifier_path = './fake_testdata' with pytest.raises( Exception, match='Invalid base_classifier_dir: ./fake_testdata'): Classifier(fake_classifier_path, 'test_model')
from app.classifier import Classifier Classifier()
# Gunicornを起動する際のエントリーポイント. # 保存してある学習済みのパラメータを読んでClassiferとFlaskアプリをつくる。 # (パラメータファイルのパスは引数じゃなくて環境変数でわたす) # wsgi.py import torch from smart_getenv import getenv from app import create_app from app.classifier import Classifier # パラメータファイルのパスを環境変数から取得 prm_file = getenv("PRM_FILE", default="taco_burrito.prm") # パラメータファイルを読み込む params = torch.load(prm_file, map_location=lambda storage, loc: storage) # ClassifierとFlaskアプリケーションを作成 classifier = Classifier(params) app = create_app(classifier)
def call_cls(urls, callback, kws, labels): """ Provides the communication of the status and results between the classifier process, the main process and the client (when using via web interface). # Input: - urls (list): a list of urls to be classified. - callback(str): the callback url. - kws (list): list of pre-defined keywords in the database. - labels (list): list of pre-defined labels in the database. """ context = zmq.Context() socket = context.socket(zmq.REP) socket.bind('tcp://*:{PORT}'.format(PORT=ZMQ_LISTENING_PORT)) poller = zmq.Poller() poller.register(socket, zmq.POLLIN) print("calling classifier") msg = None while msg is None and callback is None: socks = dict(poller.poll()) if socket in socks: msg = socket.recv() print("callback: ", callback) cls = Classifier(model=model) results = dict() if callback is None: url = urls[0] for status in cls.classify(url, kws, labels): print("status:", status) if type(status) == str: socket.send_string(json.dumps({'status':status, 'url':url})) if status == 'error': break gevent.sleep(0.1) msg = socket.recv() else: socket.send_string(json.dumps(status)) else: print("calculating for direct post") results = [] for url in urls: for status in cls.classify(url, kws, labels): if type(status) == str: data = json.dumps({'status':status + " in url " + url}) print("sending status to callback") requests.post(callback, json=data) if status == 'error': results += [{'url':url, 'restrict':False, 'reasons':['error']}] break else: results += [status] #TODO call update db data = json.dumps({'sites':results}) print("sending results to callback") requests.post(callback, json=data) sys.exit()
def test_missing_model_dir(self) -> None: with pytest.raises( Exception, match='Invalid model path: ./testdata/missing_model'): Classifier(self.BASE_CLASSIFIER_PATH, 'missing_model')
print "%d republican speeches" % len(rep_speeches) print "%d democratic speeches" % len(dem_speeches) # ipdb.set_trace() # bayseian_prior_a_rep = len(rep_speeches) / len(speeches) # bayseian_prior_b_dem = len(dem_speeches) / len(speeches) # this frame vocabulary proba has tuples for the proba of class a and b # frame_vocabulary_proba = { word: vocabulary_proba[word] if vocabulary_proba.get(word) != None else [0, 0] for word in frame.word_string.split() } # sum_log_probability_a_rep = sum(map(lambda (word,log_probabilities): log_probabilities[0],frame_vocabulary_proba.items())) # sum_log_probability_b_dem = sum(map(lambda (word,log_probabilities): log_probabilities[1],frame_vocabulary_proba.items())) # final_prob_a = bayseian_prior_a_rep * sum_log_probability_a_rep # final_prob_b = bayseian_prior_b_dem * sum_log_probability_b_dem print "Recompute Naieve Bayes Output For Classifying Frame (%s) Within Window (%s) for phrase %s" % (frame.seed_word, speech_window_key, analysis.phrase) naive_bayes = Classifier(vocab=frame.word_string.split()) training_set = Classifier.bunch_with_targets(speeches, analysis.target_function2) naive_bayes.train_classifier(training_set.data, training_set.target) probabilities = naive_bayes.classify_document(frame.word_string) tfidf_frames_vector = naive_bayes.vectorizer.transform([frame.word_string]) print "Predicted Class: ", naive_bayes.classifier.predict(tfidf_frames_vector)[0] print "Predict Proba: ", naive_bayes.classifier.predict_proba(tfidf_frames_vector)[0] print "Probability A (Rep): ", probabilities[0] print "Probability B (Dem): ", probabilities[1] if probabilities[0] > probabilities[1]: print t.red("A (Rep) NB Proba > B (Dem) NB Proba: Classify Republican") else: