Beispiel #1
0
def recog():
    if flask.request.files.get("img"):
        img = flask.request.files["img"]
        img.save(os.path.join("static", img.filename))
        img = Image.open(open(os.path.join("static", img.filename), "rb"))
        y_hat = classify(img)
        return flask.jsonify({"class": y_hat[0], "prob": y_hat[1]}), 201
    return flask.jsonify({"status": "not an image file"})
async def recog():
    files = await quart.request.files
    if files.get("img", None):
        img_str = files["img"].read()
        img = Image.open(io.BytesIO(img_str))
        y_hat = classify(img)
        return quart.jsonify({"class": y_hat[0], "prob": y_hat[1]}), 201
    return quart.jsonify({"status": "not an image file"})
Beispiel #3
0
def _example_run(args):
    """Command to run example for ECT-584. First runs the extract on the raw emails to produce a raw dataset.
    This dataset is then cleaned and the training data is staged to produce a pre_processed dataset test dataset
    with predicted event values appended. This is then processed to create a test and training arff dataset.
    The training dataset is used to create a classification model using weka's J48 implementation of the C4.5 
    classification algorithm. Once the model has been created the test dataset has events predicated and these 
    predicated values are appended to the test dataset and saved to an excel file. Then this file is loaded and
    compared against the validation dataset to access the accuracy of the model.
    """
    #calls main.py run)extract with relative file paths
    #run_extract(os.path.normpath(os.path.join(__file__, '..', '..', 'data', 'raw')),
    #            os.path.normpath(os.path.join(__file__, '..', '..', 'data', 'stage')), 
    #            'extract', 'text', False)
    #calls main.py run_clean with relative file paths
    run_clean(os.path.normpath(os.path.join(__file__, '..', '..', 'data', 'stage')),
              os.path.normpath(os.path.join(__file__, '..', '..', 'data', 'stage')),
              'extract', 'text')
    #calls main.py stage_train with relative file paths
    stage_train(os.path.normpath(os.path.join(__file__, '..', '..', 'data', 'stage')),
                'extract_cleaned',
                'text',
                os.path.normpath(os.path.join(__file__, '..', '..', 'data', 'train')),
                't_data',
                'excel',
                os.path.normpath(os.path.join(__file__, '..', '..', 'data', 'stage')))
    #calls main.py run_process with relative file paths
    run_process(os.path.normpath(os.path.join(__file__, '..', '..', 'data', 'stage')),
                os.path.normpath(os.path.join(__file__, '..', '..', 'data', 'process')), 
                'pre_processed', 
                'text')
    #calls main.py build_model with relative file paths
    build_model(os.path.normpath(os.path.join(__file__, '..', '..', 'data', 'process', 'train.arff')), 
                os.path.normpath(os.path.join(__file__, '..', '..', 'data', 'process', 'model.model')))        
    #calls main.py classify with relative file paths
    classify(os.path.normpath(os.path.join(__file__, '..', '..', 'data', 'process', 'test')), 
             os.path.normpath(os.path.join(__file__, '..', '..', 'data', 'process', 'model.model')), 
             os.path.normpath(os.path.join(__file__, '..', '..', 'data', 'results', 'output.xls')), 
             'event')
    #calls main.py validate with relative file paths
    validate(os.path.normpath(os.path.join(__file__, '..', '..', 'data', 'results')), 
             'output')
Beispiel #4
0
def knn_tuning(data, label, k_range=range(1,11)):
    from models import get_knn

    k_f1=[]
    for k in k_range:
        knn = get_knn(k=k)
        scores = classify(knn, data, label)
        k_f1.append(scores['f1'])
        print(f'k = {k} done! f1 = {k_f1[-1]}')

    print(k_f1)

    import matplotlib.pyplot as plt
    plt.figure()
    plt.plot(k_range,k_f1)
    plt.xlabel('Value of k for KNN')
    plt.ylabel('CrossValidation f1')
    plt.show()
Beispiel #5
0
def upload():
    file_path = filedialog.askopenfile(initialdir='.',
                                       filetypes=[('*.jpeg', '*.jpg')]).name
    # get fun fact
    if file_path:
        fun_fact = classify(file_path)
    else:
        fun_fact = ''

    label['text'] = fun_fact

    # display image
    im = Image.open(file_path)
    # im = im.resize((50,50))
    photo = ImageTk.PhotoImage(im)
    canvas.delete('all')
    canvas.create_image((0, 0), image=photo, anchor="s")
    canvas.pack()
Beispiel #6
0
def forward_feature_selection(model, features_names, score_name = 'roc_auc'):
    current = []
    best_select = None
    for i in range(len(features_names)-len(current)):
        stage_best = None
        for f in features_names:
            if f not in current:
                current.append(f)
                print(current)
                data,label = get_data(current)
                scores = classify(model, data, label)
                print(f"try {current}, ", scores)
                if stage_best is None or stage_best[1]<scores[score_name]:
                    stage_best = (f, scores[score_name])
                current.pop()
        current.append(stage_best[0])
        if best_select is None or stage_best[1]>best_select[1]:
            best_select = (copy.deepcopy(current), stage_best[1])
        print(f"k = {i}, best_{score_name} = {best_select[1]}, ", best_select[0])

    return best_select
Beispiel #7
0
def login(image_URIs, confidence):
    #todo, only checking 3
    for i in range(len(image_URIs)):
        if i == 0 and not imgURItoFile("unknown", image_URIs[i]):
            print 'i=1 failed'
            return None
        if i == 1 and not imgURItoFile("unknown_left", image_URIs[i]):
            print 'i=2 failed'
            return None
        if i == 2 and not imgURItoFile("unknown_right", image_URIs[i]):
            print 'i=3 failed'
            return None

    result = main.classify(
        KNOWN_IMAGE_DIR, confidence
    )  #todo, make sure unknown is saved and recent. This may cause problems if multiple people try and log in at once. Make separate, dedicated thread on server per login request?
    os.remove("unknown")  #cleanup
    os.remove("unknown_left")
    os.remove("unknown_right")

    return result
Beispiel #8
0
def main():
    # train_data = {
    #     'test/spam1' : 'spam',
    #     'test/spam2' : 'spam',
    #     'test/spam3' : 'spam',
    #     'test/spam4' : 'spam',
    #     'test/ham1' : 'ham',
    #     'test/ham2' : 'ham',
    #     'test/ham3' : 'ham',
    #     'test/ham4' : 'ham',
    # }
    #
    # Setup

    train_data, test_data = parse_labels()
    stats(test_data)


    test_data = {
        'test/test0' : 'spam',
    }


    print datetime.datetime.now().strftime("%I:%M:%S") + " : " + " Generating Counts"
    word_dict, spam_count, ham_count = create_word_counts(train_data)
    print datetime.datetime.now().strftime("%I:%M:%S") + " : " + " Done."

    # Iterate through possible values of lambda for lambda smoothing.
    # la_set = [0.005, 0.1, 0.5, 1.0, 2.0]
    la_set = [0]

    for la in la_set:
        print datetime.datetime.now().strftime("%I:%M:%S") + " : " + " Computing Probs for lambda={0}".format(la)
        word_probs = compute_probs(word_dict, spam_count, ham_count, la)
        spam_prior_prob = (float)(spam_count) / (spam_count + ham_count)
        ham_prior_prob = (float)(ham_count) / (spam_count + ham_count)
        print datetime.datetime.now().strftime("%I:%M:%S") + " : " + " Done"

        TP, FP, TN, FN, problems = classify(test_data, word_probs, spam_prior_prob, ham_prior_prob)
        summarize_findings(la, TP, FP, TN, FN, problems)
def c(params):
    f = (labels == params['first_digit']) | (labels == params['second_digit'])
    i = images[f.ravel()]
    l = params['first_digit'] == labels[f.ravel()]
    return classify(params['theta'], np.concatenate((i, np.ones((i.shape[0], 1))), axis=1)), l, params['lambda']
Beispiel #10
0
            continue
        if command == "\n" or command == "\r":
            # if maxdatasize == len(datacollectionarray):
            #     print "Flushing collected data", datacollectionarray
            #     # preprocessed = icatest.preprocess_data_array(datacollectionarray)
            #     extracted = dataprocessing.find_signals(datacollectionarray)
            #     result = icatest.classify(extracted)
            #     print result
            #     datacollectionarray = []
            stripped = readbuffer.strip()
            if stripped != '':
                mappedvalue = int(stripped) / 4
                try:
                    mappedvalue = float(m(mappedvalue))
                except:
                    continue
                datacollectionarray.append(mappedvalue)
                print mappedvalue

            readbuffer = ""
            continue
        readbuffer += command

        if time.time() - start > sampletake_secoonds:
            print "Flushing collected data", len(datacollectionarray)
            extracted = dataprocessing.find_signals(datacollectionarray)
            result = main.classify(extracted)
            print result
            datacollectionarray = []
            break
Beispiel #11
0
    print datetime.datetime.now().strftime("%I:%M:%S") + " : " + " Generating Counts"
    word_dict, total_doc_count, spam_doc_count, ham_doc_count = create_word_counts(train_data)
    print datetime.datetime.now().strftime("%I:%M:%S") + " : " + " Done."

    la = 2.000

    print datetime.datetime.now().strftime("%I:%M:%S") + " : " + " Computing Probs for lambda={0}".format(la)
    word_probs = compute_probs(word_dict, spam_doc_count, ham_doc_count, la)
    spam_prior_prob = (float)(spam_doc_count) / (spam_doc_count + ham_doc_count)
    ham_prior_prob = (float)(ham_doc_count) / (spam_doc_count + ham_doc_count)
    print datetime.datetime.now().strftime("%I:%M:%S") + " : " + " Spam Prior Prob : {0}".format(spam_prior_prob)
    print datetime.datetime.now().strftime("%I:%M:%S") + " : " + " Ham Prior Prob : {0}".format(ham_prior_prob)

    best_mi = compute_mi(word_probs, spam_prior_prob, ham_prior_prob)
    top_200 = best_mi[0:200]
    print [word[0] for word in top_200]
    mi_word_probs = {}

    for (word, mi) in top_200:
        mi_word_probs[word] = word_probs[word]
    mi_word_probs["*"] = word_probs["*"]

    TP, FP, TN, FN = classify(test_data, mi_word_probs, spam_prior_prob, ham_prior_prob)
    summarize_findings(TP, FP, TN, FN)

    print "Done."


if __name__ == "__main__":
    main()
Beispiel #12
0
def processPic():
    picbase64 = request.get_json()["picbase64"]
    commaIndex = picbase64.find(",")
    pic = picbase64[(commaIndex + 1):]
    result = classify(pic)
    return result