def index_page(text="", prediction_message=""):
    """Main page of web app"""
    if request.method == "POST":
        clf = Classifier(DEFAULT_MODEL_PATH,
                         DEFAULT_VECTORIZER_PATH, DEFAULT_MLB_PATH)
        dialogue = request.form["text"]
        prediction = clf.predict(dialogue)
        prediction_message = " ".join(sorted(prediction))
    return render_template('prediction_page.html', text=text, prediction_message=prediction_message)
Beispiel #2
0
    def __init__(self, parent, controller):
        tk.Frame.__init__(self, parent)

        self.file = None
        self.clf = Clf('./results/pascalvoc_A.pt')

        backButton = tk.Button(self,
                               text='Back',
                               command=lambda: controller.show_frame(MainView))
        backButton.pack(side='top', fill='x')

        self.leftFrame = tk.Frame(self)
        self.leftFrame.pack(side="left", fill="both", expand=True)
        self.rightFrame = tk.Frame(self)
        self.rightFrame.pack(side="right", fill="both", expand=True)

        self.imgPanel = tk.Label(self.leftFrame,
                                 text=str(self.file or 'No file uploaded'))
        self.imgPanel.pack(side="left", fill="both", expand=True)

        self.results = tk.Label(self.rightFrame,
                                text='Prediction:\n {}'.format(None),
                                anchor='e')
        self.results.config(font=('Arial', 14))
        self.results.pack(side='top', fill='y', expand=True)

        openFile = tk.Button(self.rightFrame,
                             text="Open a Image",
                             command=self.uploadFile)
        openFile.config(bg='#8e8d8d', font=('Arial', 14))
        openFile.pack(side='bottom', fill='both', expand=True)
Beispiel #3
0
def outputCsv(c: classifier.Classifier) -> None:
    filename = '/tmp/%s_%d%s.csv' % (FLAGS.model, FLAGS.limit,
                                     '_diff' if FLAGS.csv_diff_only else '')
    with open(filename, 'w') as csvFile:
        writer = csv.writer(csvFile)
        writer.writerow([
            'sharedId', 'sequence', 'training_labels', 'predicted_sure',
            'predicted_unsure', 'revised_training_labels'
        ])
        with sessionLock:
            samples: List[ClassificationSample] = list(
                ClassificationSample.query.find(
                    dict(model=FLAGS.model, use_for_training=True)).sort([
                        ('seqHash', -1)
                    ]).limit(FLAGS.limit))
        predicted = c.classify([s.seq for s in samples])
        for sample, pred in zip(samples, predicted):
            train_str = ';'.join([l.topic for l in sample.training_labels])
            sorted_pred: List[Tuple[str, float]] = sorted(pred.items(),
                                                          key=lambda e: -e[1])
            pred_sure_str = ';'.join(
                [t for t, q in sorted_pred if q >= FLAGS.csv_sure])
            pred_unsure_str = ';'.join(
                [t for t, q in sorted_pred if q < FLAGS.csv_sure])
            if not FLAGS.csv_diff_only or train_str != pred_sure_str:
                writer.writerow([
                    sample.sharedId, sample.seq, train_str, pred_sure_str,
                    pred_unsure_str, ''
                ])
    print('Wrote %s.' % filename)
Beispiel #4
0
 def test_missing_instance_dir(self, fs: FakeFilesystem) -> None:
     fs.add_real_directory('./testdata/test_model/test_instance_unreleased')
     model_path = os.path.join(self.BASE_CLASSIFIER_PATH, 'test_model')
     with pytest.raises(Exception,
                        match=('No valid instance of model found in %s, ' +
                               'instances were %s') %
                        (model_path, r'\[\'test_instance_unreleased\'\]')):
         Classifier(self.BASE_CLASSIFIER_PATH, 'test_model')
Beispiel #5
0
 def test_missing_labels_file(self, fs: FakeFilesystem) -> None:
     fs.add_real_directory('./testdata/test_model/test_instance')
     fs.remove_object('./testdata/test_model/test_instance/label.vocab')
     with pytest.raises(
             Exception,
             match=(r'Failure to load labels file from {0} with exception').
             format('./testdata/test_model/test_instance/label.vocab')):
         Classifier(self.BASE_CLASSIFIER_PATH, 'test_model')
    def test_classify(self, fs: FakeFilesystem) -> None:
        fs.add_real_directory('./testdata/test_model/test_instance')
        fs.add_real_directory('./testdata/test_model/test_instance_unreleased')
        c = Classifier('./testdata', 'test_model')

        result = c.classify(['Where is my medical book?'])

        assert c.vocab is not None
        assert c.embedder is not None
        assert c.predictor is not None

        assert c.instance == 'test_instance'
        print(result)
        assert result
        # result ~ [{topic: probability, topic2: probability, ...}, ...]
        for topic, _ in result[0].items():
            assert topic in c.vocab
        assert result[0]['Right to education'] >= 0.7
Beispiel #7
0
    def test_classify(self, fs: FakeFilesystem) -> None:
        fs.add_real_directory('./testdata/test_model/test_instance')
        fs.add_real_directory('./testdata/test_model/test_instance_unreleased')
        c = Classifier('./testdata', 'test_model')

        result = c.classify(['Increase access to health care'])

        assert c.labels is not None
        assert c.embedder is not None
        assert c.predictor is not None

        assert c.instance == 'test_instance'
        assert result
        # result ~ [{topic: probability, topic2: probability, ...}, ...]
        for topic, _ in result[0].items():
            assert topic in c.labels
        assert len(result) == 1
        assert result[0]['Right to health'] >= 0.8
Beispiel #8
0
    def test_missing_model(self, fs: FakeFilesystem) -> None:
        instance_path = os.path.join(self.BASE_CLASSIFIER_PATH, 'test_model',
                                     'test_instance_missing_model')
        fs.add_real_directory(instance_path)

        with pytest.raises(Exception,
                           match=('SavedModel file does not exist at: {0}'
                                  ).format(instance_path)):
            Classifier(self.BASE_CLASSIFIER_PATH, 'test_model')
Beispiel #9
0
    def test_missing_variables(self, fs: FakeFilesystem) -> None:
        instance_path = os.path.join(self.BASE_CLASSIFIER_PATH, 'test_model',
                                     'test_instance_missing_variables')
        fs.add_real_directory(instance_path)

        with pytest.raises(
                Exception,
                match=('{0}/variables; No such file or directory'.format(
                    instance_path))):
            Classifier(self.BASE_CLASSIFIER_PATH, 'test_model')
Beispiel #10
0
 def test_invalid_bert(self, fs: FakeFilesystem) -> None:
     bad_bert_path = './bad/path/to/bert'
     config = """
     {
         "bert": "%s",
         "labels": "label.vocab",
         "is_released": true,
         "description": "This is the latest model from Sascha.",
         "metadata": {
             "thesaurus": "issues"
         }
     }
     """ % (bad_bert_path)
     fs.add_real_directory('./testdata/test_model/test_instance')
     fs.remove_object('./testdata/test_model/test_instance/config.json')
     fs.create_file('./testdata/test_model/test_instance/config.json',
                    contents=config)
     with pytest.raises(Exception,
                        match='SavedModel file does not exist at'):
         c = Classifier(self.BASE_CLASSIFIER_PATH, 'test_model')
         # Bad bert is only used on uncached embed.
         c.classify(['some string'])
Beispiel #11
0
def classifier():
    form = ClassifierForm()
    if form.validate_on_submit():
        text = form.index.data
        category = Classifier(form.index.data)
        articles = Articles.query.filter_by(topic=category).limit(10)
        #article = SimilarArticles.query.filter_by(id=form.index.data).first()
        return render_template('results.html',
                               title='Search Results',
                               category=category,
                               text=text,
                               articles=articles)
    return render_template('classifier.html',
                           title='Search Article',
                           form=form)
 def __init__(self, base_classifier_dir: str, model_name: str = '') -> None:
     self.logger = logging.getLogger()
     self.base_classifier_dir = base_classifier_dir
     self.model_name = model_name
     self.classifier: Optional[Classifier] = None
     if self.model_name:
         try:
             self.classifier = Classifier(self.base_classifier_dir,
                                          model_name)
             self.topic_infos: Dict[str, ModelStatus.TopicStatus] = {}
             for t, ti in self.classifier.topic_infos.items():
                 self.topic_infos[t] = ModelStatus.TopicStatus(t, ti)
         except Exception:
             self.logger.info(
                 'No model %s found in classifier directory=%s' %
                 (model_name, self.base_classifier_dir))
Beispiel #13
0
def outputCsv(c: classifier.Classifier) -> None:
    filename = './%s_%d%s.csv' % (FLAGS.model, FLAGS.limit,
                                  '_diff' if FLAGS.csv_diff_only else '')
    if FLAGS.csv:
        subset_seqs: List[str] = []
        with open(FLAGS.csv, 'r') as csvFile, sessionLock:
            for row in csv.DictReader(csvFile):
                subset_seqs.append(row[FLAGS.text_col])
            print(subset_seqs[:10])

    with open(filename, 'w') as csvFile:
        writer = csv.writer(csvFile)
        writer.writerow([
            'sharedId', 'sequence', 'training_labels', 'predictions',
            'probabilities'
        ])
        with sessionLock:
            samples: List[ClassificationSample] = list(
                ClassificationSample.query.find(
                    dict(model=FLAGS.model, use_for_training=False)).sort([
                        ('seqHash', -1)
                    ]).limit(FLAGS.limit))
            if FLAGS.csv:
                samples = [
                    s for s in samples if any(x in s.seq for x in subset_seqs)
                ]
        predicted = c.classify([s.seq for s in samples])
        for sample, pred in zip(samples, predicted):
            training_labels = [l.topic for l in sample.training_labels]
            train_str = ';'.join(sorted(training_labels))

            sorted_pred: List[Tuple[str, float]] = sorted(pred.items())
            predictions = ';'.join([t for t, q in sorted_pred])
            probabilities = ';'.join([str(q) for t, q in sorted_pred])

            if not FLAGS.csv_diff_only or train_str != predictions:
                writer.writerow([
                    sample.sharedId, sample.seq, train_str, predictions,
                    probabilities
                ])
    print('Wrote %s.' % filename)
Beispiel #14
0
def call_cls(urls, callback, kws, labels):
    """
    Provides the communication of the status and results between the classifier process, the main process and the client (when using via web interface).
    
    # Input:
        - urls (list): a list of urls to be classified.
        - callback(str): the callback url.
        - kws (list): list of pre-defined keywords in the database.
        - labels (list): list of pre-defined labels in the database.
    """
    context = zmq.Context()
    socket = context.socket(zmq.REP)
    socket.bind('tcp://*:{PORT}'.format(PORT=ZMQ_LISTENING_PORT))
    poller = zmq.Poller()
    poller.register(socket, zmq.POLLIN)
    print("calling classifier")


    msg = None
    while msg is None and callback is None:
        socks = dict(poller.poll())
        if socket in socks:
            msg = socket.recv()

    print("callback: ", callback)
    cls = Classifier(model=model)
    results = dict()
    if callback is None:
        url = urls[0]
        for status in cls.classify(url, kws, labels):
            print("status:", status)
            if type(status) == str:
                socket.send_string(json.dumps({'status':status, 'url':url}))
                if status == 'error':
                    break
                gevent.sleep(0.1)
                msg = socket.recv()
            else:
                socket.send_string(json.dumps(status))
    else:
        print("calculating for direct post")
        results = []
        for url in urls:
            for status in cls.classify(url, kws, labels):
                if type(status) == str:
                    data = json.dumps({'status':status + " in url " + url})
                    print("sending status to callback")
                    requests.post(callback, json=data)
                    if status == 'error':
                        results += [{'url':url,
                                     'restrict':False,
                                     'reasons':['error']}]
                        break
                else:
                    results += [status]

        #TODO call update db
        data = json.dumps({'sites':results})
        print("sending results to callback")
        requests.post(callback, json=data)
    sys.exit()
Beispiel #15
0
import uvicorn
from fastapi import FastAPI, File, UploadFile
from fastapi.responses import HTMLResponse
from app.classifier import Classifier

model = Classifier('./data/classfier.h5')

app = FastAPI()

@app.post('/predict')
def predict(image: UploadFile = File(...)):
    # predict label
    return model.predict_from_file(image.file)
Beispiel #16
0
import logging
from datetime import datetime
from flask import Blueprint, jsonify, request
from utils.config_parser import Config

from app.classifier import Classifier

logger = logging.getLogger(__name__)

api_controller = Blueprint('api_controller', __name__)
cl = Classifier.getInstance()


@api_controller.route('/ping', methods=['GET'])
def ping():
    return jsonify("pong", "%s" % datetime.now().isoformat())


@api_controller.route('/hs', methods=['POST'])
def post_hs():
    json = request.json
    if json is None:
        return "Bad request", 400

    if 'body' not in json:
        return "Body not found in json", 400

    text = str(json['body'])
    logger.debug("TEXT {}".format(text))
    results = cl.check(text)
Beispiel #17
0
class Main_1(tk.Frame):
    def __init__(self, parent, controller):
        tk.Frame.__init__(self, parent)

        self.file = None
        self.clf = Clf('./results/pascalvoc_A.pt')

        backButton = tk.Button(self,
                               text='Back',
                               command=lambda: controller.show_frame(MainView))
        backButton.pack(side='top', fill='x')

        self.leftFrame = tk.Frame(self)
        self.leftFrame.pack(side="left", fill="both", expand=True)
        self.rightFrame = tk.Frame(self)
        self.rightFrame.pack(side="right", fill="both", expand=True)

        self.imgPanel = tk.Label(self.leftFrame,
                                 text=str(self.file or 'No file uploaded'))
        self.imgPanel.pack(side="left", fill="both", expand=True)

        self.results = tk.Label(self.rightFrame,
                                text='Prediction:\n {}'.format(None),
                                anchor='e')
        self.results.config(font=('Arial', 14))
        self.results.pack(side='top', fill='y', expand=True)

        openFile = tk.Button(self.rightFrame,
                             text="Open a Image",
                             command=self.uploadFile)
        openFile.config(bg='#8e8d8d', font=('Arial', 14))
        openFile.pack(side='bottom', fill='both', expand=True)

    def predict(self, img_path):
        self.results.config(text='Prediction:\n {}'.format('Calculating!!!'))
        self.results.update_idletasks()
        results = self.clf.predict(img_path)
        results_string = '\n'.join(results)
        if not results_string:
            results_string = None

        self.results.config(text='Prediction:\n {}'.format(results_string))
        self.results.update_idletasks()
        return

    def uploadFile(self):
        f = filedialog.askopenfilename()
        self.file = f
        _, ext = os.path.splitext(f)
        valid_ext = ['.png', '.jpg', '.jpeg', '.JPG', '.PNG']
        if ext not in valid_ext:
            self.imgPanel.configure(text='Invalid File Type: {}'.format(ext),
                                    image='')
            self.imgPanel.image = None

            self.imgPanel.update_idletasks()
            return

        else:
            to_predict = ImageTk.PhotoImage(pad_and_resize(f, 400))

            self.imgPanel.config(image=to_predict)
            self.imgPanel.image = to_predict
            self.imgPanel.update_idletasks()

            self.predict(f)
Beispiel #18
0
 def test_missing_model_dir(self) -> None:
     with pytest.raises(
             Exception,
             match='Invalid model path: ./testdata/missing_model'):
         Classifier(self.BASE_CLASSIFIER_PATH, 'missing_model')
	print "%d republican speeches" % len(rep_speeches)
	print "%d democratic speeches" % len(dem_speeches)

	# ipdb.set_trace()

	# bayseian_prior_a_rep = len(rep_speeches) / len(speeches)
	# bayseian_prior_b_dem = len(dem_speeches) / len(speeches)
	# this frame vocabulary proba has tuples for the proba of class a and b
	# frame_vocabulary_proba =  { word: vocabulary_proba[word] if vocabulary_proba.get(word) != None else [0, 0] for word in frame.word_string.split() }
	# sum_log_probability_a_rep = sum(map(lambda (word,log_probabilities): log_probabilities[0],frame_vocabulary_proba.items()))
	# sum_log_probability_b_dem = sum(map(lambda (word,log_probabilities): log_probabilities[1],frame_vocabulary_proba.items()))
	# final_prob_a = bayseian_prior_a_rep * sum_log_probability_a_rep
	# final_prob_b = bayseian_prior_b_dem * sum_log_probability_b_dem

	print "Recompute Naieve Bayes Output For Classifying Frame (%s) Within Window (%s) for phrase %s" % (frame.seed_word, speech_window_key, analysis.phrase)
	naive_bayes = Classifier(vocab=frame.word_string.split())
	training_set = Classifier.bunch_with_targets(speeches, analysis.target_function2)
	naive_bayes.train_classifier(training_set.data, training_set.target)
	probabilities = naive_bayes.classify_document(frame.word_string)

	tfidf_frames_vector = naive_bayes.vectorizer.transform([frame.word_string])

	print "Predicted Class: ", naive_bayes.classifier.predict(tfidf_frames_vector)[0]
	print "Predict Proba: ", naive_bayes.classifier.predict_proba(tfidf_frames_vector)[0]

	print "Probability A (Rep): ", probabilities[0]
	print "Probability B (Dem): ", probabilities[1]

	if probabilities[0] > probabilities[1]:
		print t.red("A (Rep) NB Proba > B (Dem) NB Proba: Classify Republican")
	else:
Beispiel #20
0
from app.classifier import Classifier
Classifier()
Beispiel #21
0
# Gunicornを起動する際のエントリーポイント.
# 保存してある学習済みのパラメータを読んでClassiferとFlaskアプリをつくる。
# (パラメータファイルのパスは引数じゃなくて環境変数でわたす)

# wsgi.py
import torch
from smart_getenv import getenv
from app import create_app
from app.classifier import Classifier
# パラメータファイルのパスを環境変数から取得
prm_file = getenv("PRM_FILE", default="taco_burrito.prm")
# パラメータファイルを読み込む
params = torch.load(prm_file, map_location=lambda storage, loc: storage)
# ClassifierとFlaskアプリケーションを作成
classifier = Classifier(params)
app = create_app(classifier)
# Build Targets
print("Build target vector and data vector from documents")


def party_fn(speech):
    if speech.speaker_party == 'D':
        return 1
    elif speech.speaker_party == 'R':
        return 0
    else:
        raise Exception("Speech must have party 'D' or 'R': " +
                        str(speech.speech_id))


bunch = Classifier.bunch_with_targets(speeches=speeches,
                                      target_function=party_fn)
data = vectorizer.fit_transform(bunch.data)  #.tocsr()#.toarray()

# Print Stuff
learned_vocabulary = vectorizer.get_feature_names()
print("Learned %d words in vocabulary" % len(learned_vocabulary))
print(learned_vocabulary)
print("")
print("Sparse Matrix of TfIdf Values pf each term for each document")
print data

target = array(bunch.target)

print("")

# Run Cross Validation Checks
Beispiel #23
0
 def test_missing_base_classify_dir(self) -> None:
     fake_classifier_path = './fake_testdata'
     with pytest.raises(
             Exception,
             match='Invalid base_classifier_dir: ./fake_testdata'):
         Classifier(fake_classifier_path, 'test_model')