def run(self, label, features):
		self.reset_spatial_encoder()
		self.reset_temporal_encoder()

		for i in range(len(features)):
			self.run_spatial_encoder(features.iloc[i,:])
			if (self.is_early_fusion):
				self.output_R_fused = utils.bundle([self.spatial_encoder_GSR.output_R, 
									 				self.spatial_encoder_ECG.output_R, 
									 				self.spatial_encoder_EEG.output_R])
			self.run_temporal_encoder()
			if (not self.is_early_fusion):
				self.output_T_fused = utils.bundle([self.temporal_encoder_GSR.output_T, 
									 				self.temporal_encoder_ECG.output_T, 
									 				self.temporal_encoder_EEG.output_T])
			if (i > 1):
				if (label == 'test'):
					actual_label_v = utils.classify(self.feature_memory.ds_label_v.iloc[i-self.ngram_size+1:i+1,0])
					actual_label_a = utils.classify(self.feature_memory.ds_label_a.iloc[i-self.ngram_size+1:i+1,0])
					self.predict_am_internal(actual_label_v, actual_label_a)
				else:
					self.accumulate_am(label)

		if (label == 'test'):
			self.compute_summary()
		else:
			self.bundle_am(label)
Ejemplo n.º 2
0
def question_f():
    logging.info("<Question F> SVM Classification with Cross Validation")
    clfy = svm.SVC(kernel='linear')
    utils.classify(clfy,
                   "Cross validated SVM",
                   proc_train_set,
                   proc_test_set,
                   cv=True)
Ejemplo n.º 3
0
def question_i():
    categories = [
        "comp.graphics", "comp.os.ms-windows.misc", "comp.sys.ibm.pc.hardware",
        "comp.sys.mac.hardware", "rec.autos", "rec.motorcycles",
        "rec.sport.baseball", "rec.sport.hockey"
    ]

    train, test = utils.fetch_data(categories)
    train.target = list(map(lambda x: int(0 <= x and x < 4), train.target))
    test.target = list(map(lambda x: int(0 <= x and x < 4), test.target))

    params = list(range(-3, 4))
    l1_accuracies = []
    l2_accuracies = []

    for param in params:
        l1_classifier = LogisticRegression(penalty='l1',
                                           C=10**param,
                                           solver='liblinear')
        logging.info("Regularization Parameter set to {0}".format(param))
        l1_accuracies.append(
            utils.classify(l1_classifier,
                           "Logistic Regression l1",
                           train,
                           test,
                           cv=False,
                           mean=True))
        l2_classifier = LogisticRegression(penalty='l2',
                                           C=10**param,
                                           solver='liblinear')
        l2_accuracies.append(
            utils.classify(l2_classifier,
                           "Logistic Regression l2",
                           train,
                           test,
                           cv=False,
                           mean=True))

    plt.figure(1)
    plt.subplot(211)
    plt.plot(l1_accuracies)
    plt.xticks(range(6), [10**param for param in params])
    plt.title("Accuracy of L1 Logistic Regression vs regularization parameter")

    plt.subplot(212)
    plt.plot(l2_accuracies)
    plt.xticks(range(6), [10**param for param in params])
    plt.title("Accuracy of L2 Logistic Regression vs regularization parameter")
    plt.show()
Ejemplo n.º 4
0
def single_param_cross_validator_func(subject, config, dataset, label, values, apply_func):
    windows = config['windows']

    all_scores = []

    full_data = dataset.get_data([subject])[subject]

    for this_value in values:
        print('using {} = {}'.format(label, this_value))

        apply_func(config, this_value)

        print('extracting epoch for subject ', subject)
        this_subject_data = extract_epochs(full_data, config)
        print('extraction complete for ', subject)

        scores = []
        for window_start in windows:
            print('start at ', window_start, end=', ')
            data = get_window(this_subject_data, config=config, start=window_start)
            score = classify(data, config=config)
            scores.append(score)
            print(score)
        all_scores.append(scores)

    return all_scores
Ejemplo n.º 5
0
 def run(self):
     with ExitStack() as outer_stack:
         infile = outer_stack.enter_context(self.input().open('r'))
         outfile = outer_stack.enter_context(self.output().open('w'))
         archive = outer_stack.enter_context(ZipFile(infile, 'r'))
         raster_data = outer_stack.enter_context(
             rasterio.open(self.path_to_raster_data))
         
         files = archive.infolist()
         band = raster_data.read(1)
         
         for i, file in enumerate(files):
             message = 'Progress: {0:.0%}'.format(i / len(files))
             self.set_status_message(message)
             print(message)
             with ExitStack() as inner_stack:
                 binary = inner_stack.enter_context(archive.open(file))
                 text = inner_stack.enter_context(io.TextIOWrapper(binary,
                     encoding='utf-8'))
                 reader = csv.reader(text, delimiter='\t',
                     quoting=csv.QUOTE_NONE)
                 next(reader) # Skips header.
                 for row in reader:
                     coord_uncertainty = row[18]
                     x = row[17] #longitude
                     y = row[16] #latitude
                     species_key = row[29]
                     args = [coord_uncertainty, x, y, 
                             raster_data, band,
                             self.coord_uncertainty_limit]
                     belt = utils.classify(*args)
                     if belt:
                         data = {'skey':species_key, 'belt':belt} 
                         outfile.write('{skey},{belt}\n'.format(**data))
Ejemplo n.º 6
0
def classify(question):
    for key in KEYS:
        if utils.classify(question,KEYWORDS[key]):
            return key 
        
        
    return CLASS[0]
Ejemplo n.º 7
0
        def post(self, model_id):
            '''
            Apply a published model to the provided input data and return the results.
            This request is only applicable to analytics that are of type 'Model' and have been published.
            '''
            # get the analytic
            _, col = analytics_collection()
            try:
                analytic = col.find({'analytic_id': model_id})[0]
            except IndexError:
                return 'No resource at that URL.', 404

            # make sure it is of type 'Model'
            if analytic['type'] != 'Model':
                return "This analytic is not of type 'Model'", 406

            if 'published' not in analytic or not analytic['published']:
                return "No resource at that URL", 404

            #get the input data
            print("hi25")
            data = request.get_json()
            print(data)
            parameters = data['parameters']
            inputs = data['inputs']
            result = utils.classify(model_id, parameters, inputs)
            return result, 200
Ejemplo n.º 8
0
 def test_prediction(self):
     self.X = np.concatenate([np.ones((self.m, 1)), self.X], axis=1)
     theta = np.zeros((self.n + 1, 1))
     theta_optimized, _ = gradient_descent(self.X, self.y, theta)
     test_data = np.array([1, 45, 85]).reshape((1, 3))
     prediction = hypothesis(test_data, theta_optimized)
     self.assertAlmostEqual(prediction, 0.776, places=3)
     self.assertEqual(classify(test_data, self.X, theta_optimized), 1)
Ejemplo n.º 9
0
def hello_world():
    if request.method == 'GET':
        return 'Hello, World!'

    if request.method == 'POST':
        content = request.get_json()
        results = classify(content['text'], synapse_0, synapse_1, words, classes)
        return jsonify({'sentence':content['text'], 'results': results })
Ejemplo n.º 10
0
async def route_label_item(request, dataset_name):
    result = resp('success')
    dataset = datasets.get(name=dataset_name)
    labels = classify(dataset, datasets_bundle, request)
    if len(labels) > 0:
        result['data'] = labels
    else:
        result = resp('error')
        result['reason'] = "Maybe you have to train this dataset first."

    return json(result, status=201)
Ejemplo n.º 11
0
    def find(sentence):
        classes = dict()
        for word, label in sentence:
            if str(label) not in classes.keys():
                classes[str(label)] = dict()

            if word["tag"] not in classes[str(label)].keys():
                classes[str(label)][word["tag"]] = 0

            classes[str(label)][word["tag"]] += 1

        classes = utils.classify(classes)
        return classes
Ejemplo n.º 12
0
def main():
    print("Reading Arguments: ")
    args = get_arguments()
    print("Output root directory: ", args.output)

    create_dirs(args.output)

    images = []
    for r, d, f in os.walk(args.img_dir):
        for file in f:
            if '.jpg' in file:
                images.append(osp.join(r, file))

    print("Image list: ")
    print(images)

    for img in images:
        img = img.split('/')
        img_name = img[-1].strip('.jpg')
        gridtype = int(img[-2])
        image = cv2.cvtColor(
            cv2.imread('/'.join(img)),
            cv2.COLOR_BGR2RGB)  # read and convert the image to RGB.

        print("gridtype: ", gridtype)
        if gridtype == 9:
            crop_dims, gridw, gridh = cfg.CROP_DIMS, cfg.GRIDW, cfg.GRIDH
        else:
            raise ValueError(INVALID_GRID_TYPE)

        # Process the image
        rgb = RGBPreprocess(crop_dims)
        data = rgb.process_img(image, gridh, gridw)
        for i, im in enumerate(data):
            ret_val = classify(im)
            print(ret_val)

            if ret_val == 2:
                img_path = osp.join(args.output, 'contaminated',
                                    img_name + '_' + str(i) + '.jpg')
            elif ret_val == 1:
                img_path = osp.join(args.output, 'notcontaminated',
                                    img_name + '_' + str(i) + '.jpg')
            else:
                continue  # do not save this image
            cv2.imwrite(img_path, cv2.cvtColor(im, cv2.COLOR_RGB2BGR))
Ejemplo n.º 13
0
def by_window_func(subject, config, dataset):
    print("loading data for subject", subject)

    this_subject_data = dataset.get_data([subject])[subject]
    this_subject_data = extract_epochs(this_subject_data, config)

    scores = []

    windows = config['windows']

    for window_start in windows:
        data = get_window(this_subject_data, config=config, start=window_start)
        score = classify(data, config=config)
        print(score)
        scores.append(score)

    return scores
Ejemplo n.º 14
0
def sent_search(sentences, keyword, N):
    score = [0]*len(sentences)
        
    for i in range(0, len(sentences)):
        s = sentences[i]
        for w in keyword:
            if utils.classify(s, [w]):
                score[i] = score[i] + 1
    
    index = sorted(range(len(score)), key=lambda i: score[i])[-N:]
    return index
#testing 
#import parse
# questions = parse.parse_test("exams/102.txt") 
# for x in range(41,57):
#     question = utils.get_question(questions, x)
#     print '#', x, ' ', solver(question)
Ejemplo n.º 15
0
def main():
    model = utils.load_model("data_model")
    cap = cv2.VideoCapture(0)

    utils.create_window("Display", (900, 600))

    while True:
        ok, frame = cap.read()
        if not ok:
            break

        resized = utils.resize_raw(frame)

        (name, probability) = utils.classify(model, resized)
        img = utils.label_image(name, probability, resized)

        cv2.imshow("Display", img)

        if utils.is_escape(cv2.waitKey(5)):
            break

    cap.release()
    cv2.destroyAllWindows()
Ejemplo n.º 16
0
#Why duplicating one tweet from test corpus?
classifications = {}

def iqr(data):
	try:
		return 0.5*(np.percentile(data,75) - np.percentile(data,25))
	except:
		print data
def get(lst,field):
	return [item[field] for item in lst]

for i,tweet in enumerate(text):
	if langid.classify(' '.join(tweet))[0] == 'en':
	  	tweet,usernames,hashtags =  tech.extract_tokens(tweet)
		classifications[i] =  tech.classify(tweet)

print len(classifications)
print len(text)

positive,negative, unsure = [],[], []

json.dump(classifications,open('case-control-classifications.json','wb'))

for idx,classification in classifications.iteritems():
	if classification == 1:
		positive.append(data[idx])
	elif classification ==0:
		negative.append(data[idx])
	else:
		unsure.append(data[idx])
Ejemplo n.º 17
0
    )

    plt.figure()
    plt.subplot(121)
    plt.plot(train_losses)
    plt.plot(val_losses)
    plt.subplot(122)
    plt.plot(train_acc)
    plt.plot(val_acc)
    plt.show()
    """

    model = resnet18(pretrained=False)
    model.fc = torch.nn.Linear(512, 2)
    model.load_state_dict(torch.load("weights"))
    model.eval()

    """
    test_loader = DataLoader(dataset=test_data, batch_size=1, shuffle=True, drop_last=True)
    test_acc = evaluate(model, test_loader)
    """

    classify(
        model=model,
        cascade_classifier_path_xml='C:/Users/msure/Anaconda3/pkgs/libopencv-4.4.0-py37_2/Library/etc/haarcascades/haarcascade_frontalface_default.xml',
        transformations=transformations
    )



Ejemplo n.º 18
0
def question_h():
    logging.info("<Question H> Logistic Regression")
    clfy = LogisticRegression(C=10)
    utils.classify(clfy, "Logistic Regression", proc_train_set, proc_test_set)
Ejemplo n.º 19
0
print "*** Loading Instagram metadata from {} ...".format(df_file)

# Loop over images in the dataframe
for index, row in df.iterrows():
    # Define path
    ipath = "test_output/" + row['Filename']

    # Load image
    image = cv2.imread(ipath)

    # Extract features
    features = describe(image)

    # Classify image
    prediction = classify(features, model)

    print "*** Classifying {} ... prediction: {}".format(ipath, prediction)

    # Take action based on prediction
    if prediction == 'photo':
        cv2.imwrite("test_output/photos/%s" % row['Filename'], image)
    if prediction == 'other':
        df = df[df.index != index]
        cv2.imwrite("test_output/others/%s" % row['Filename'], image)

# Reset dataframe index
df = df.reset_index(drop=True)
df.index += 1
print "*** Updating dataframe index ..."
Ejemplo n.º 20
0
# -*- coding: utf-8 -*-
"""
Created on Sun Sep 24 13:40:16 2017

@author: Hugh Krogh-Freeman
"""
import sys
import utils 

train_filename = sys.argv[1]
test_filename = sys.argv[2]

utils.classify(train_filename, test_filename)
Ejemplo n.º 21
0
import pandas as pd
from google_drive_downloader import GoogleDriveDownloader as gdd

# download the model
gdd.download_file_from_google_drive(file_id='1KPuETrEQSAdIVpvFYz1-cf3Xji5zj8b6', dest_path='./ifo_model.pt', unzip=False)

# setup the model
model = torch.load('ifo_model.pt')

st.set_option('deprecation.showfileUploaderEncoding', False)

st.title("Identified Flying Object Classifier")

st.markdown('Implementation of [TSAI-EVA4-P2-MobileNet](https://github.com/satyajitghana/TSAI-DeepVision-EVA4.0-Phase-2/tree/master/02-MobileNet)')

file: BytesIO = st.file_uploader("Upload an image file", type=["jpg", "png"])

if file:
    predicted: str
    probabilities: Dict[str, float]
    predicted, probabilities = classify(model, file)

    st.image(Image.open(file), use_column_width=True)
    st.markdown(f"## I've identified it as a {predicted}")

    st.markdown('## Class Confidences')
    st.write(pd.Series(probabilities))
else:
    st.markdown("**Please upload a file first**")

Ejemplo n.º 22
0

if __name__ == '__main__':
    """
        If we use the entire set to train we will get maximum accuracy
        Splitting the dataset will will decrease the accuracy
    """
    accuracy = float(input("Enter the accuracy of prediction you desire: "))
    data_set = readData('dataset.data')
    train, test = split_train_test(data_set, accuracy)
    tree = build_tree(data_set)
    good_B = 0
    good_R = 0
    good_L = 0
    for row in train:
        prediction = classify(row, tree)
        letter = max(prediction.items(), key=operator.itemgetter(1))[0]

        if row[0] == letter and row[0] == 'L':
            good_L += 1

        if row[0] == letter and row[0] == 'B':
            good_B += 1

        if row[0] == letter and row[0] == 'R':
            good_R += 1
#=============================================================================
    print(good_L, " out of 288 left")
    print(good_L, " out of 288 right")
    print(good_B, " out of 49 balanced")
    print("The ones left are not correctly predicted")
Ejemplo n.º 23
0
print "*** Loading Instagram metadata from {} ...".format(df_file)

# Loop over images in the dataframe
for index, row in df.iterrows():
    # Define path
    ipath = "test_output/" + row['Filename']

    # Load image
    image = cv2.imread(ipath)

    # Extract features
    features = describe(image)

    # Classify image
    prediction = classify(features, model)

    print "*** Classifying {} ... prediction: {}".format(ipath, prediction)

    # Take action based on prediction
    if prediction == 'photo':
        cv2.imwrite("test_output/photos/%s" % row['Filename'], image)
    if prediction == 'other':
        df = df[df.index != index]
        cv2.imwrite("test_output/others/%s" % row['Filename'], image)

# Reset dataframe index
df = df.reset_index(drop=True)
df.index += 1
print "*** Updating dataframe index ..."
Ejemplo n.º 24
0
Archivo: score.py Proyecto: tjcsl/wedge
def get_score(added, deled):
    notspam, spam = utils.classify(added, deled)
    return ((notspam * 20) - (spam * 20))
Ejemplo n.º 25
0
def question_e():
    logging.info("<Question E> SVM Classification")
    clfy = svm.SVC(kernel='linear')
    utils.classify(clfy, "SVM", proc_train_set, proc_test_set, cv=False)
Ejemplo n.º 26
0
original_file = "reuters-train.en"
training_file = data_transform(original_file)

# In[3]:

print("Preprocessing data...")
processed_file = data_process(training_file)

# In[ ]:

print("Vectorising features...")
X, y = data_vectorise(processed_file)

# In[ ]:

print("Splitting data...")
X_train, X_test, y_train, y_test = train_test_split(X[:30000],
                                                    y[:30000],
                                                    test_size=0.3)

# In[ ]:

print("Training classifier...")
y_pred = classify(X_train, y_train, X_test)

# In[ ]:

print("Evaluating results...")
label_list = np.unique(y)
evaluate(y_test, y_pred, label_list)
    for k in xrange(1, K+1):
        precision_dict[k] = []
        recall_dict[k] = []

    depth = int(raw_input("Enter the maximum depth of the tree (0 for no limit): "))
    
    start = timer()
    for index in xrange(len(users_db)):
        X_train, Y_train, X_test, Y_test = utils.extract_data(users_db[index], items_db, 70)
        dataset = {'X': X_train, 'Y': Y_train}
        
        classes = utils.get_classes(dataset)
        features = range(len(X_train[0]))

        root = DT(dataset, classes, features, 0, depth)
        Y_pred = utils.classify(root, X_test)

        for k in xrange(1, K+1):
            if k <= len(Y_test):
                top_K_indices = utils.get_recommendations(Y_pred, k)
                precision, recall = utils.compute_metrics(Y_pred, Y_test, top_K_indices)
                precision_dict[k].append(precision)
                recall_dict[k].append(recall)

        MAE = utils.calc_MAE(Y_pred, Y_test)
        RMSE = utils.calc_RMSE(Y_pred, Y_test)
        accu = utils.accuracy(Y_pred, Y_test)
        
        MAE_arr.append(MAE)
        RMSE_arr.append(RMSE)
        accuracy_arr.append(accu)
Ejemplo n.º 28
0
def main() :
    # Import training and testing data
    train_1 = IrisDataset('iris_train.txt')
    train_2 = IrisDataset('iris_train.txt')
    train = IrisDataset('iris_train.txt')
    test = IrisDataset('iris_test.txt')

    # This network consists of two output neurons classifying 3 labels (the 3rd label is inferred by not belonging to either the 1st or 2nd label). Each neuron is trained seperately,
    # so split up labels for training each one.
    train_1.labels = train_1.labels[:,0]
    train_2.labels = train_2.labels[:,1]

    # For the 2nd classifier (Iris-versicolor vs Iris-virginica), omit Iris-setosa data. For some reason, the 2nd classifier does not train well at all with the inclusion of Iris-setosa data.
    train_2.data = train_2.data[40:]
    train_2.labels = train_2.labels[40:]
    
    # Now, train both classifiers.
    classifier_1 = Perceptron()
    classifier_1.train(train_1.data, train_1.labels)
    classifier_2 = Perceptron()
    classifier_2.train(train_2.data, train_2.labels)  
   
    # The classify function in utils.py defines the overall architecture of the classification system, and returns an array of 2 element tuples containing the predicted and actual labels of
    # every point in the test set.
    results, errors_loc1, num_errors1 = classify(test, classifier_1, classifier_2)
    # Print out the results to a file. As you can see in the output, there are 3 total missclassifications using the test data.
    with open('results1_test.txt', 'w') as f:
        sys.stdout = f # Change the standard output to the file we created.
        print('Prediction', '\t', 'Actual')
        for _, value in enumerate(results):
            a, b = value
            print(a, '\t', b)
        sys.stdout = original_stdout # Reset the standard output to its original value
    
    # Run the classifier on the training data just to identify the linearly inseparable pointa
    results, errors_loc1, num_errors1 = classify(train, classifier_1, classifier_2)
    with open('results1_train.txt', 'w') as f:
        sys.stdout = f # Change the standard output to the file we created.
        print('Prediction', '\t', 'Actual')
        for _, value in enumerate(results):
            a, b = value
            print(a, '\t', b)
        sys.stdout = original_stdout # Reset the standard output to its original value


    # Next, let's train both classifiers again but with the pocket algorithm.
    pclassifier_1 = PocketPerceptron()
    pclassifier_1.train(train_1)
    pclassifier_2 = PocketPerceptron()
    pclassifier_2.train(train_2) 
    
    # Let's feed the test data through the pocket algorithm classifier
    results, errors_loc2, num_errors2 = classify(test, pclassifier_1, pclassifier_2)
    # Print out the results to a file.
    with open('results2_test.txt', 'w') as f:
        sys.stdout = f # Change the standard output to the file we created.
        print('Prediction', '\t', 'Actual')
        for _, value in enumerate(results):
            a, b = value
            print(a, '\t', b)
        sys.stdout = original_stdout # Reset the standard output to its original value
    
    # Run the classifier on the training data just to identify the linearly inseparable pointa
    results, errors_loc1, num_errors1 = classify(train, pclassifier_1, pclassifier_2)
    with open('results2_train.txt', 'w') as f:
        sys.stdout = f # Change the standard output to the file we created.
        print('Prediction', '\t', 'Actual')
        for _, value in enumerate(results):
            a, b = value
            print(a, '\t', b)
        sys.stdout = original_stdout # Reset the standard output to its original value
Ejemplo n.º 29
0
def question_g():
    logging.info("<Question G> Bayes Classification")
    clfy = GaussianNB()
    utils.classify(clfy, "Bayes", proc_train_set, proc_test_set, cv=False)