def run(self, label, features): self.reset_spatial_encoder() self.reset_temporal_encoder() for i in range(len(features)): self.run_spatial_encoder(features.iloc[i,:]) if (self.is_early_fusion): self.output_R_fused = utils.bundle([self.spatial_encoder_GSR.output_R, self.spatial_encoder_ECG.output_R, self.spatial_encoder_EEG.output_R]) self.run_temporal_encoder() if (not self.is_early_fusion): self.output_T_fused = utils.bundle([self.temporal_encoder_GSR.output_T, self.temporal_encoder_ECG.output_T, self.temporal_encoder_EEG.output_T]) if (i > 1): if (label == 'test'): actual_label_v = utils.classify(self.feature_memory.ds_label_v.iloc[i-self.ngram_size+1:i+1,0]) actual_label_a = utils.classify(self.feature_memory.ds_label_a.iloc[i-self.ngram_size+1:i+1,0]) self.predict_am_internal(actual_label_v, actual_label_a) else: self.accumulate_am(label) if (label == 'test'): self.compute_summary() else: self.bundle_am(label)
def question_f(): logging.info("<Question F> SVM Classification with Cross Validation") clfy = svm.SVC(kernel='linear') utils.classify(clfy, "Cross validated SVM", proc_train_set, proc_test_set, cv=True)
def question_i(): categories = [ "comp.graphics", "comp.os.ms-windows.misc", "comp.sys.ibm.pc.hardware", "comp.sys.mac.hardware", "rec.autos", "rec.motorcycles", "rec.sport.baseball", "rec.sport.hockey" ] train, test = utils.fetch_data(categories) train.target = list(map(lambda x: int(0 <= x and x < 4), train.target)) test.target = list(map(lambda x: int(0 <= x and x < 4), test.target)) params = list(range(-3, 4)) l1_accuracies = [] l2_accuracies = [] for param in params: l1_classifier = LogisticRegression(penalty='l1', C=10**param, solver='liblinear') logging.info("Regularization Parameter set to {0}".format(param)) l1_accuracies.append( utils.classify(l1_classifier, "Logistic Regression l1", train, test, cv=False, mean=True)) l2_classifier = LogisticRegression(penalty='l2', C=10**param, solver='liblinear') l2_accuracies.append( utils.classify(l2_classifier, "Logistic Regression l2", train, test, cv=False, mean=True)) plt.figure(1) plt.subplot(211) plt.plot(l1_accuracies) plt.xticks(range(6), [10**param for param in params]) plt.title("Accuracy of L1 Logistic Regression vs regularization parameter") plt.subplot(212) plt.plot(l2_accuracies) plt.xticks(range(6), [10**param for param in params]) plt.title("Accuracy of L2 Logistic Regression vs regularization parameter") plt.show()
def single_param_cross_validator_func(subject, config, dataset, label, values, apply_func): windows = config['windows'] all_scores = [] full_data = dataset.get_data([subject])[subject] for this_value in values: print('using {} = {}'.format(label, this_value)) apply_func(config, this_value) print('extracting epoch for subject ', subject) this_subject_data = extract_epochs(full_data, config) print('extraction complete for ', subject) scores = [] for window_start in windows: print('start at ', window_start, end=', ') data = get_window(this_subject_data, config=config, start=window_start) score = classify(data, config=config) scores.append(score) print(score) all_scores.append(scores) return all_scores
def run(self): with ExitStack() as outer_stack: infile = outer_stack.enter_context(self.input().open('r')) outfile = outer_stack.enter_context(self.output().open('w')) archive = outer_stack.enter_context(ZipFile(infile, 'r')) raster_data = outer_stack.enter_context( rasterio.open(self.path_to_raster_data)) files = archive.infolist() band = raster_data.read(1) for i, file in enumerate(files): message = 'Progress: {0:.0%}'.format(i / len(files)) self.set_status_message(message) print(message) with ExitStack() as inner_stack: binary = inner_stack.enter_context(archive.open(file)) text = inner_stack.enter_context(io.TextIOWrapper(binary, encoding='utf-8')) reader = csv.reader(text, delimiter='\t', quoting=csv.QUOTE_NONE) next(reader) # Skips header. for row in reader: coord_uncertainty = row[18] x = row[17] #longitude y = row[16] #latitude species_key = row[29] args = [coord_uncertainty, x, y, raster_data, band, self.coord_uncertainty_limit] belt = utils.classify(*args) if belt: data = {'skey':species_key, 'belt':belt} outfile.write('{skey},{belt}\n'.format(**data))
def classify(question): for key in KEYS: if utils.classify(question,KEYWORDS[key]): return key return CLASS[0]
def post(self, model_id): ''' Apply a published model to the provided input data and return the results. This request is only applicable to analytics that are of type 'Model' and have been published. ''' # get the analytic _, col = analytics_collection() try: analytic = col.find({'analytic_id': model_id})[0] except IndexError: return 'No resource at that URL.', 404 # make sure it is of type 'Model' if analytic['type'] != 'Model': return "This analytic is not of type 'Model'", 406 if 'published' not in analytic or not analytic['published']: return "No resource at that URL", 404 #get the input data print("hi25") data = request.get_json() print(data) parameters = data['parameters'] inputs = data['inputs'] result = utils.classify(model_id, parameters, inputs) return result, 200
def test_prediction(self): self.X = np.concatenate([np.ones((self.m, 1)), self.X], axis=1) theta = np.zeros((self.n + 1, 1)) theta_optimized, _ = gradient_descent(self.X, self.y, theta) test_data = np.array([1, 45, 85]).reshape((1, 3)) prediction = hypothesis(test_data, theta_optimized) self.assertAlmostEqual(prediction, 0.776, places=3) self.assertEqual(classify(test_data, self.X, theta_optimized), 1)
def hello_world(): if request.method == 'GET': return 'Hello, World!' if request.method == 'POST': content = request.get_json() results = classify(content['text'], synapse_0, synapse_1, words, classes) return jsonify({'sentence':content['text'], 'results': results })
async def route_label_item(request, dataset_name): result = resp('success') dataset = datasets.get(name=dataset_name) labels = classify(dataset, datasets_bundle, request) if len(labels) > 0: result['data'] = labels else: result = resp('error') result['reason'] = "Maybe you have to train this dataset first." return json(result, status=201)
def find(sentence): classes = dict() for word, label in sentence: if str(label) not in classes.keys(): classes[str(label)] = dict() if word["tag"] not in classes[str(label)].keys(): classes[str(label)][word["tag"]] = 0 classes[str(label)][word["tag"]] += 1 classes = utils.classify(classes) return classes
def main(): print("Reading Arguments: ") args = get_arguments() print("Output root directory: ", args.output) create_dirs(args.output) images = [] for r, d, f in os.walk(args.img_dir): for file in f: if '.jpg' in file: images.append(osp.join(r, file)) print("Image list: ") print(images) for img in images: img = img.split('/') img_name = img[-1].strip('.jpg') gridtype = int(img[-2]) image = cv2.cvtColor( cv2.imread('/'.join(img)), cv2.COLOR_BGR2RGB) # read and convert the image to RGB. print("gridtype: ", gridtype) if gridtype == 9: crop_dims, gridw, gridh = cfg.CROP_DIMS, cfg.GRIDW, cfg.GRIDH else: raise ValueError(INVALID_GRID_TYPE) # Process the image rgb = RGBPreprocess(crop_dims) data = rgb.process_img(image, gridh, gridw) for i, im in enumerate(data): ret_val = classify(im) print(ret_val) if ret_val == 2: img_path = osp.join(args.output, 'contaminated', img_name + '_' + str(i) + '.jpg') elif ret_val == 1: img_path = osp.join(args.output, 'notcontaminated', img_name + '_' + str(i) + '.jpg') else: continue # do not save this image cv2.imwrite(img_path, cv2.cvtColor(im, cv2.COLOR_RGB2BGR))
def by_window_func(subject, config, dataset): print("loading data for subject", subject) this_subject_data = dataset.get_data([subject])[subject] this_subject_data = extract_epochs(this_subject_data, config) scores = [] windows = config['windows'] for window_start in windows: data = get_window(this_subject_data, config=config, start=window_start) score = classify(data, config=config) print(score) scores.append(score) return scores
def sent_search(sentences, keyword, N): score = [0]*len(sentences) for i in range(0, len(sentences)): s = sentences[i] for w in keyword: if utils.classify(s, [w]): score[i] = score[i] + 1 index = sorted(range(len(score)), key=lambda i: score[i])[-N:] return index #testing #import parse # questions = parse.parse_test("exams/102.txt") # for x in range(41,57): # question = utils.get_question(questions, x) # print '#', x, ' ', solver(question)
def main(): model = utils.load_model("data_model") cap = cv2.VideoCapture(0) utils.create_window("Display", (900, 600)) while True: ok, frame = cap.read() if not ok: break resized = utils.resize_raw(frame) (name, probability) = utils.classify(model, resized) img = utils.label_image(name, probability, resized) cv2.imshow("Display", img) if utils.is_escape(cv2.waitKey(5)): break cap.release() cv2.destroyAllWindows()
#Why duplicating one tweet from test corpus? classifications = {} def iqr(data): try: return 0.5*(np.percentile(data,75) - np.percentile(data,25)) except: print data def get(lst,field): return [item[field] for item in lst] for i,tweet in enumerate(text): if langid.classify(' '.join(tweet))[0] == 'en': tweet,usernames,hashtags = tech.extract_tokens(tweet) classifications[i] = tech.classify(tweet) print len(classifications) print len(text) positive,negative, unsure = [],[], [] json.dump(classifications,open('case-control-classifications.json','wb')) for idx,classification in classifications.iteritems(): if classification == 1: positive.append(data[idx]) elif classification ==0: negative.append(data[idx]) else: unsure.append(data[idx])
) plt.figure() plt.subplot(121) plt.plot(train_losses) plt.plot(val_losses) plt.subplot(122) plt.plot(train_acc) plt.plot(val_acc) plt.show() """ model = resnet18(pretrained=False) model.fc = torch.nn.Linear(512, 2) model.load_state_dict(torch.load("weights")) model.eval() """ test_loader = DataLoader(dataset=test_data, batch_size=1, shuffle=True, drop_last=True) test_acc = evaluate(model, test_loader) """ classify( model=model, cascade_classifier_path_xml='C:/Users/msure/Anaconda3/pkgs/libopencv-4.4.0-py37_2/Library/etc/haarcascades/haarcascade_frontalface_default.xml', transformations=transformations )
def question_h(): logging.info("<Question H> Logistic Regression") clfy = LogisticRegression(C=10) utils.classify(clfy, "Logistic Regression", proc_train_set, proc_test_set)
print "*** Loading Instagram metadata from {} ...".format(df_file) # Loop over images in the dataframe for index, row in df.iterrows(): # Define path ipath = "test_output/" + row['Filename'] # Load image image = cv2.imread(ipath) # Extract features features = describe(image) # Classify image prediction = classify(features, model) print "*** Classifying {} ... prediction: {}".format(ipath, prediction) # Take action based on prediction if prediction == 'photo': cv2.imwrite("test_output/photos/%s" % row['Filename'], image) if prediction == 'other': df = df[df.index != index] cv2.imwrite("test_output/others/%s" % row['Filename'], image) # Reset dataframe index df = df.reset_index(drop=True) df.index += 1 print "*** Updating dataframe index ..."
# -*- coding: utf-8 -*- """ Created on Sun Sep 24 13:40:16 2017 @author: Hugh Krogh-Freeman """ import sys import utils train_filename = sys.argv[1] test_filename = sys.argv[2] utils.classify(train_filename, test_filename)
import pandas as pd from google_drive_downloader import GoogleDriveDownloader as gdd # download the model gdd.download_file_from_google_drive(file_id='1KPuETrEQSAdIVpvFYz1-cf3Xji5zj8b6', dest_path='./ifo_model.pt', unzip=False) # setup the model model = torch.load('ifo_model.pt') st.set_option('deprecation.showfileUploaderEncoding', False) st.title("Identified Flying Object Classifier") st.markdown('Implementation of [TSAI-EVA4-P2-MobileNet](https://github.com/satyajitghana/TSAI-DeepVision-EVA4.0-Phase-2/tree/master/02-MobileNet)') file: BytesIO = st.file_uploader("Upload an image file", type=["jpg", "png"]) if file: predicted: str probabilities: Dict[str, float] predicted, probabilities = classify(model, file) st.image(Image.open(file), use_column_width=True) st.markdown(f"## I've identified it as a {predicted}") st.markdown('## Class Confidences') st.write(pd.Series(probabilities)) else: st.markdown("**Please upload a file first**")
if __name__ == '__main__': """ If we use the entire set to train we will get maximum accuracy Splitting the dataset will will decrease the accuracy """ accuracy = float(input("Enter the accuracy of prediction you desire: ")) data_set = readData('dataset.data') train, test = split_train_test(data_set, accuracy) tree = build_tree(data_set) good_B = 0 good_R = 0 good_L = 0 for row in train: prediction = classify(row, tree) letter = max(prediction.items(), key=operator.itemgetter(1))[0] if row[0] == letter and row[0] == 'L': good_L += 1 if row[0] == letter and row[0] == 'B': good_B += 1 if row[0] == letter and row[0] == 'R': good_R += 1 #============================================================================= print(good_L, " out of 288 left") print(good_L, " out of 288 right") print(good_B, " out of 49 balanced") print("The ones left are not correctly predicted")
print "*** Loading Instagram metadata from {} ...".format(df_file) # Loop over images in the dataframe for index, row in df.iterrows(): # Define path ipath = "test_output/" + row['Filename'] # Load image image = cv2.imread(ipath) # Extract features features = describe(image) # Classify image prediction = classify(features, model) print "*** Classifying {} ... prediction: {}".format(ipath, prediction) # Take action based on prediction if prediction == 'photo': cv2.imwrite("test_output/photos/%s" % row['Filename'], image) if prediction == 'other': df = df[df.index != index] cv2.imwrite("test_output/others/%s" % row['Filename'], image) # Reset dataframe index df = df.reset_index(drop=True) df.index += 1 print "*** Updating dataframe index ..."
def get_score(added, deled): notspam, spam = utils.classify(added, deled) return ((notspam * 20) - (spam * 20))
def question_e(): logging.info("<Question E> SVM Classification") clfy = svm.SVC(kernel='linear') utils.classify(clfy, "SVM", proc_train_set, proc_test_set, cv=False)
original_file = "reuters-train.en" training_file = data_transform(original_file) # In[3]: print("Preprocessing data...") processed_file = data_process(training_file) # In[ ]: print("Vectorising features...") X, y = data_vectorise(processed_file) # In[ ]: print("Splitting data...") X_train, X_test, y_train, y_test = train_test_split(X[:30000], y[:30000], test_size=0.3) # In[ ]: print("Training classifier...") y_pred = classify(X_train, y_train, X_test) # In[ ]: print("Evaluating results...") label_list = np.unique(y) evaluate(y_test, y_pred, label_list)
for k in xrange(1, K+1): precision_dict[k] = [] recall_dict[k] = [] depth = int(raw_input("Enter the maximum depth of the tree (0 for no limit): ")) start = timer() for index in xrange(len(users_db)): X_train, Y_train, X_test, Y_test = utils.extract_data(users_db[index], items_db, 70) dataset = {'X': X_train, 'Y': Y_train} classes = utils.get_classes(dataset) features = range(len(X_train[0])) root = DT(dataset, classes, features, 0, depth) Y_pred = utils.classify(root, X_test) for k in xrange(1, K+1): if k <= len(Y_test): top_K_indices = utils.get_recommendations(Y_pred, k) precision, recall = utils.compute_metrics(Y_pred, Y_test, top_K_indices) precision_dict[k].append(precision) recall_dict[k].append(recall) MAE = utils.calc_MAE(Y_pred, Y_test) RMSE = utils.calc_RMSE(Y_pred, Y_test) accu = utils.accuracy(Y_pred, Y_test) MAE_arr.append(MAE) RMSE_arr.append(RMSE) accuracy_arr.append(accu)
def main() : # Import training and testing data train_1 = IrisDataset('iris_train.txt') train_2 = IrisDataset('iris_train.txt') train = IrisDataset('iris_train.txt') test = IrisDataset('iris_test.txt') # This network consists of two output neurons classifying 3 labels (the 3rd label is inferred by not belonging to either the 1st or 2nd label). Each neuron is trained seperately, # so split up labels for training each one. train_1.labels = train_1.labels[:,0] train_2.labels = train_2.labels[:,1] # For the 2nd classifier (Iris-versicolor vs Iris-virginica), omit Iris-setosa data. For some reason, the 2nd classifier does not train well at all with the inclusion of Iris-setosa data. train_2.data = train_2.data[40:] train_2.labels = train_2.labels[40:] # Now, train both classifiers. classifier_1 = Perceptron() classifier_1.train(train_1.data, train_1.labels) classifier_2 = Perceptron() classifier_2.train(train_2.data, train_2.labels) # The classify function in utils.py defines the overall architecture of the classification system, and returns an array of 2 element tuples containing the predicted and actual labels of # every point in the test set. results, errors_loc1, num_errors1 = classify(test, classifier_1, classifier_2) # Print out the results to a file. As you can see in the output, there are 3 total missclassifications using the test data. with open('results1_test.txt', 'w') as f: sys.stdout = f # Change the standard output to the file we created. print('Prediction', '\t', 'Actual') for _, value in enumerate(results): a, b = value print(a, '\t', b) sys.stdout = original_stdout # Reset the standard output to its original value # Run the classifier on the training data just to identify the linearly inseparable pointa results, errors_loc1, num_errors1 = classify(train, classifier_1, classifier_2) with open('results1_train.txt', 'w') as f: sys.stdout = f # Change the standard output to the file we created. print('Prediction', '\t', 'Actual') for _, value in enumerate(results): a, b = value print(a, '\t', b) sys.stdout = original_stdout # Reset the standard output to its original value # Next, let's train both classifiers again but with the pocket algorithm. pclassifier_1 = PocketPerceptron() pclassifier_1.train(train_1) pclassifier_2 = PocketPerceptron() pclassifier_2.train(train_2) # Let's feed the test data through the pocket algorithm classifier results, errors_loc2, num_errors2 = classify(test, pclassifier_1, pclassifier_2) # Print out the results to a file. with open('results2_test.txt', 'w') as f: sys.stdout = f # Change the standard output to the file we created. print('Prediction', '\t', 'Actual') for _, value in enumerate(results): a, b = value print(a, '\t', b) sys.stdout = original_stdout # Reset the standard output to its original value # Run the classifier on the training data just to identify the linearly inseparable pointa results, errors_loc1, num_errors1 = classify(train, pclassifier_1, pclassifier_2) with open('results2_train.txt', 'w') as f: sys.stdout = f # Change the standard output to the file we created. print('Prediction', '\t', 'Actual') for _, value in enumerate(results): a, b = value print(a, '\t', b) sys.stdout = original_stdout # Reset the standard output to its original value
def question_g(): logging.info("<Question G> Bayes Classification") clfy = GaussianNB() utils.classify(clfy, "Bayes", proc_train_set, proc_test_set, cv=False)