def setup(save_path, layers=[1,2,3], curve_num=5000, chunk_size=1000, noisy=False, xray=False, show_plots=True, generate_data=True, train_classifier=True, train_regressor=True, classifer_epochs=2, regressor_epochs=2): """Sets up the pipeline for predictions on .dat files by generating data and training. Args: save_path (string): a path to the directory where data and models will be saved. layers (list): a list of layers to generate and train for. curve_num (int): the number of curves to generate per layer. chunk_size (int): the size of chunks to use in the h5 storage of images for curves. noisy (Boolean): whether to add noise to generated data. xray (Boolean): whether to use an x-ray probe or not. show_plots (Boolean): whether to display classification confusion matrix and regression plots or not. generate_data (Boolean): whether to generate data or use existing data. train_classifier (Boolean): whether to train the classifier or not. train_regressor (Boolean): whether to train the regressors or not. classifer_epochs (int): the number of epochs to train the classifier for. regressor_epochs (int): the number of epochs to train the regressor for. """ if generate_data: print("-------------- Data Generation ------------") for layer in layers: #Generate curves for each layer specified. print(">>> Generating {}-layer curves".format(layer)) if xray: #Generate data using x-ray probe. structures = XRayGenerator.generate(curve_num, layer) XRayGenerator.save(save_path + "/data", LAYERS_STR[layer], structures, noisy=noisy) else: #Generate data using neutron probe. structures = NeutronGenerator.generate(curve_num, layer) NeutronGenerator.save(save_path + "/data", LAYERS_STR[layer], structures, noisy=noisy) print(">>> Creating images for {}-layer curves".format(layer)) save_path_layer = data_path_layer = save_path + "/data/{}".format(LAYERS_STR[layer]) #Create images for the generated curves, ready for input to the classifier and regressors. generate_images(data_path_layer, save_path_layer, [layer], xray=xray, chunk_size=chunk_size, display_status=False) layers_paths = [save_path + "/data/{}".format(LAYERS_STR[layer]) for layer in layers] merge(save_path + "/data", layers_paths, display_status=False) #Merge the curves for each layer for classification. print("\n-------------- Classification -------------") if train_classifier: print(">>> Training classifier") classify(save_path + "/data/merged", save_path, train=True, epochs=classifer_epochs, show_plots=show_plots) #Train the classifier. else: print(">>> Loading classifier") load_path = save_path + "/classifier/full_model.h5" #Load a classifier. classify(save_path + "/data/merged", load_path=load_path, train=False, show_plots=show_plots) print("\n---------------- Regression ---------------") for layer in layers: #Train or load regressors for each layer that we are setting up for. data_path_layer = save_path + "/data/{}".format(LAYERS_STR[layer]) if train_regressor: print(">>> Training {}-layer regressor".format(LAYERS_STR[layer])) regress(data_path_layer, layer, save_path, epochs=regressor_epochs, show_plots=show_plots, xray=xray) #Train the regressor. else: print(">>> Loading {}-layer regressor".format(LAYERS_STR[layer])) load_path_layer = save_path + "/{}-layer-regressor/full_model.h5".format(LAYERS_STR[layer]) #Load an existing regressor. regress(data_path_layer, layer, load_path=load_path_layer, train=False, show_plots=show_plots, xray=xray) print()
def main(model_name, model_type, model_opts, data_dir, iteration, sep_direction=True, test_aug=False, description=None): results = {} for key in [pm.D_ASCENDING, pm.D_DESCENDING]: results[key] = np.zeros((pm.NUM_CLASS, pm.NUM_CLASS), dtype=int) if description is not None: print('Description: {}'.format(description)) audio_dir = os.path.join(data_dir, 'audio') mc_dir = os.path.join(data_dir, 'melody') model_class = getattr(models, model_type) param_set = getattr(pm, model_opts) output_dir = clf.output_dir clf.model_dir = os.path.join(clf.model_dir, model_name) clf.output_dir = os.path.join(clf.output_dir, model_name) if not os.path.isdir(clf.model_dir): os.mkdir(clf.model_dir) if not os.path.isdir(clf.output_dir): os.mkdir(clf.output_dir) ### load and pre-process input features feature_bank = clf.load_n_preprocess_input_feature(audio_dir, mc_dir, model_class, sep_direction) # np.save('feature_bank_spec+mc.npy', feature_bank) # feature_bank = np.load('feature_bank_mfcc.npy').item() print('Run {} iterations.'.format(iteration)) for i in range(iteration): print('iteration: {}'.format(i)) cm = clf.classify(feature_bank, model_name + '_' + str(i), model_class, param_set, sep_direction=True, test_aug=False) for key in cm: if key in results: results[key] += cm[key] for key in results: print('Final result of {}'.format(key)) csv_fn = 'evaluation.' + key + '.csv' save_fp = os.path.join(output_dir, model_name, csv_fn) clf.eval_scores(results[key], key, print_scores=True, save_fp=save_fp)
def csvToJson(path): CSV_FILE_PATH = path filename = Path(path).stem JSON_FILE_PATH = filename + '.json' data = [] with open(CSV_FILE_PATH) as csvFile: csvReader = csv.DictReader(csvFile) for rows in csvReader: if (not rows['Longitude'] or not rows['Latitude']): continue date = rows['Month'] year = int(date[:4]) month = int(date[5:7]) category = classify(rows['Crime type']) entry = { 'timestamp': datetime.datetime(year, month, 1).timestamp(), 'longitude': rows['Longitude'], 'latitude': rows['Latitude'], 'crimeType': rows['Crime type'], 'category': category } data.append(entry) with open(JSON_FILE_PATH, 'w') as jsonFile: jsonFile.write(json.dumps(data, indent=2))
def main(cfg): try: # nltk.download("vader_lexicon") # nltk.download('wordnet') glbs = GlobalParameters() configs = get_cfg_files(cfg) total_files = len(configs) results = {} for i, config in enumerate(configs): print_message("Running config {}/{}".format(i + 1, total_files)) set_global_parameters(config) print_run_details() dataset_dir = normalize() X, y = extract_features(dataset_dir) config_result = classify(X, y, glbs.K_FOLDS, glbs.ITERATIONS) glbs.RESULTS[glbs.FILE_NAME] = config_result glbs.RESULTS = add_results(glbs.RESULTS, glbs) if glbs.EXPORT_AS_BASELINE: export_as_baseline(config_result, config[1]) if glbs.WORDCLOUD: print_message("Generating word clouds (long processes)") generate_word_clouds() add_results_glbs(results, glbs) write_results(divide_results(glbs.RESULTS)) send_work_done(glbs.DATASET_DIR) print_message("Done!") except Exception as e: traceback.print_exc() send_work_done(glbs.DATASET_DIR, "", error=str(e), traceback=str(traceback.format_exc()))
def masking(image, emoji_points, detector, predictor, model, transforms): tmp = process_image(image, detector, predictor) if tmp is None: return None shape0, rect = tmp # im = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) im = image[max(rect.top(), 0):rect.bottom() + 25, max(rect.left(), 0):rect.right()] im = PIL.Image.fromarray(im) im = transforms(im) im = torch.unsqueeze(im, 0) label = classification.classify(model, im) emoji = cv2.imread("./emojis/" + label + ".png") # emoji = cv2.imread("./emojis/" + "smile" + ".png") image_pts = np.array([(shape0[51, :]), (shape0[8, :]), (shape0[36, :]), (shape0[45, :]), (shape0[48, :]), (shape0[54, :])], dtype="double") emoji, emoji_pts = process_emoji(emoji, emoji_points) homography, stats = cv2.findHomography(emoji_pts, image_pts) warped = cv2.warpPerspective(emoji, homography, (image.shape[1], image.shape[0])) warped_circle = cv2.warpPerspective(circle_mask(emoji.shape[0], 10), homography, (image.shape[1], image.shape[0])) warped_circle[warped_circle > 100] = 255.0 warped_circle[warped_circle <= 100] = 0 warped_circle = -1 * (warped_circle - 255) mask_warped = np.stack([warped_circle] * 3, axis=2) image = np.where(mask_warped, image, warped) # cv2.rectangle(image, (rect.left(), rect.top()), (rect.right(), rect.bottom()), (255, 0, 0), 5) return image
def run_single_test(data_dir, output_dir): from classification import train_classifier, classify from keras import backend as K from keras.models import load_model from os import environ from os.path import abspath, dirname, join train_dir = join(data_dir, 'train') test_dir = join(data_dir, 'test') train_gt = read_csv(join(train_dir, 'gt.csv')) train_img_dir = join(train_dir, 'images') train_classifier(train_gt, train_img_dir, fast_train=True) code_dir = dirname(abspath(__file__)) print('loading model...') model = load_model(join(code_dir, 'birds_model.hdf5')) print('loaded') test_img_dir = join(test_dir, 'images') img_classes = classify(model, test_img_dir) save_csv(img_classes, join(output_dir, 'output.csv')) if environ.get('KERAS_BACKEND') == 'tensorflow': K.clear_session()
def insertCrashReport(self, crash_report, commit=False, todict=True): """"Classify crash_report, insert to the database, return updated.""" crash_report = crash_report['crash_report'] # Find crash group crash_group_id = classify(crash_report) try: crash_group = self.session.query(CrashGroups).\ filter(CrashGroups.id == crash_group_id).one() except: print "Clusterization error" exit(1) # Find application data app_dict = crash_report['application'] try: application = self.session.query(Applications).\ filter(Applications.name == app_dict['name']).\ filter(Applications.version == app_dict['version']).\ one() except NoResultFound: application = self.instertApplication(app_dict) # Find system_info data sys_info_dict = crash_report['system_info'] try: system_info = self.session.query(SystemInfo).\ filter(SystemInfo.version == sys_info_dict['version']).\ one() except NoResultFound: system_info = self.insertSystemInfo(sys_info_dict) result = CrashReports( exit_code=crash_report['exit_code'], stderr_output=crash_report['stderr_output'], crash_group=crash_group, application=application, system_info=system_info ) self.session.add(result) if commit: self.session.commit() if todict: return { 'crash_report_ack': { 'crash_report_id': result.id, 'crash_report_url': 'vd1/crash_reports/' + str(result.id), 'crash_group_id': result.crash_group.id, 'crash_group_url': ('vd1/crash_groups/' + str(result.crash_group.id)), 'solution': { "solution_id": result.crash_group.solution.id, "solution_url": ('vd1/solutions/' + str(result.crash_group.solution.id)), "shell_script": result.crash_group.solution.details } } } else: return result
def ranking_with_classifier(train_corpus, test_corpus, train_rels, topics, p, ix, alpha1=0.5, alpha2=0.5): lst = [] for topic_id in topics: results = ranking(topic_id, p, ix, "TF-IDF") results = [(el[0], el[1] / results[0][1]) for el in results] new_corpus = [] for id1 in (el[0] for el in results): for el in test_corpus: if el[0] == id1: new_corpus.append((id1, el[1])) topic = process_topic(topic_id, topic_directory) model = training(topic, train_corpus, train_rels, model=KNeighborsClassifier(n_neighbors=25, metric="euclidean")) classes = [ classify(new_corpus[i][1], topic, model) for i in range(len(new_corpus)) ] results = [(el[0], el[1] * alpha1 + classes[i] * alpha2) for i, el in enumerate(results)] results.sort(reverse=True, key=lambda x: x[1]) lst.append(results) return lst
def bulk_insert(self): # insert all documents from the corpus model = classification.train() actions = [] for i in range(1, 13): with open("corpusi/reddit/month" + str(i) + ".csv", 'rb') as datafile: csv_file_obj = csv.reader(datafile) csv.field_size_limit(500 * 1024 * 1024) for row in csv_file_obj: try: v = {'body': '\n'.join(row), 'language': classification.classify(model, row)} except UnicodeDecodeError: print "unicode error" actions.append(self.format_comment(v)) # add new document into array #now parse and insert the documentation, python then java python_dict = parse_python_docs() for key in python_dict: name = key document = python_dict[key] for method in document: description = document[method] actions.append(self.format_documentation(method,description,name,"Python")) java_dict = parse_java_docs() for key in java_dict: name = key document = java_dict[key] for method in document: description = document[method] actions.append(self.format_documentation(method,description,name,"Java")) return helpers.bulk(self.es, actions, stats_only=True) # perform bulk insert Input: array of nested dictionaries
def rank(embeddings_dict, list_of_texts, annotated_tokens, classifier): if(annotated_tokens is None or len(annotated_tokens) == 0): #Cannot rank None return None words_not_found_in_text = [] for index, text in enumerate(list_of_texts): #There are multiple texts in the order of increasing precision # text = text_preprocessors.text_to_lower(text) annotated_tokens = text_preprocessors.list_to_lower(annotated_tokens) tokenized_text = text_preprocessors.preprocess_text(text) # print tokenized_text # print annotated_tokens # tokenized_text = text_preprocessors.tokenize(text) # tokenized_text = text_preprocessors.tokens_remove_non_alpha(tokenized_text) feature_vectors_dict = classification.get_feature_vectors(tokenized_text, annotated_tokens, embeddings_dict) if(len(feature_vectors_dict['words_not_found_in_text']) > 0): if((index+1) != len(list_of_texts)): #A word was not found and there are remaining texts to be seen continue else: #This is the last text. Have to do with it words_not_found_in_text = feature_vectors_dict['words_not_found_in_text'] predictions = classification.classify(feature_vectors_dict['words'], feature_vectors_dict['feature_vectors'], classifier) # print predictions if(predictions is None): print 'no predictions. Returning original list...' return annotated_tokens ranked_list = _rank_using_predictions(feature_vectors_dict['words'], predictions) # print ranked_list #Putting the words not found at the end ranked_list = ranked_list + words_not_found_in_text return ranked_list
def loop_classify(args, train_frac, test_frac=None, learning_rates=LEARNING_RATES, tests=TESTS): name = f"{get_name(args)}_train_{train_frac}_test_{test_frac}" with open("./res/{}{}.txt".format(name, get_time()), "w") as file: file.write( f"training set fraction is {train_frac}, test set fraction is {test_frac}" ) ress = {} for model in models_full: args.model = model print('model - {}'.format(model)) file.write('\n\nModel: {}\n'.format(model)) res = np.zeros((len(learning_rates), tests)) for t in range(tests): embeddings, labels = embed_and_load(args) for i in range(len(learning_rates)): lr = learning_rates[i] train, train_labels, test, test_labels = select( embeddings, labels, train_frac, test_frac) res[i, t] = classify(train, train_labels, test, test_labels, args, iterations, lr) ress[model] = res exps = np.arange(tests) save_model(file, learning_rates, res) plot_results_vertical(learning_rates, ress, tests, name)
def get_true_sim(self, i, j, true_result): assert (true_result.dist_or_sim() == 'dist') _, d = true_result.dist_sim(i, j, FLAGS.dist_norm) c = classify(d, FLAGS.thresh_val_test_pos, FLAGS.thresh_val_test_neg) if c != 0: return c else: return None
def semi_supervised_learning(train_file, unlabeled_file, eval_file, num_indicators, tfidf): seed_tweets = read_in_data(train_file) unlabeled_tweets = [t[0] for t in read_in_data(unlabeled_file)] #defaults for optional parameters nl = DistantLabels(seed_tweets, num_indicators=num_indicators, tfidf=tfidf) distant_tweets = [] for t in unlabeled_tweets: prediction = nl.predict_distant_label(t) if prediction: distant_tweets.append(prediction) write_out_data(distant_tweets, eval_file) #testing aggress_count = len([a for a in distant_tweets if a[1] == 'aggress']) loss_count = len([a for a in distant_tweets if a[1] == 'loss']) other_count = len([a for a in distant_tweets if a[1] == 'other']) print 'num distant aggress = ' + str(aggress_count) print 'num distant loss = ' + str(loss_count) print 'num distant other = ' + str(other_count) print 'num distant total = ' + str(aggress_count + loss_count + other_count) print 'num before distant labeling = ' + str(len(unlabeled_tweets)) #\end testing num_updated = 1 while num_updated > 0: num_updated = 0 results = classify(train_file, eval_file, model, label, feats=feats, pos_tagger=pos_tagger, C=C, svm_loss=loss) eval_tweets = read_in_data(eval_file) verified_tweets = read_in_data(train_file) predictions = results[-1] for i in range(0, len(predictions)): if predictions[i] == 1 and eval_tweets[i][1] == label: verified_tweets.append(eval_tweets[i]) eval_tweets[i] = '' num_updated += 1 elif predictions[i] == 0 and eval_tweets[i][1] != label: verified_tweets.append(eval_tweets[i]) eval_tweets[i] = '' num_updated += 1 eval_tweets = [e for e in eval_tweets if e != ''] write_out_data(eval_tweets, eval_file) write_out_data(verified_tweets, verified_file) train_file = verified_file return train_file
def drive(): # train_file = 'data/classification/_train/train_full.csv' # dev_file = 'data/classification/_dev/dev_full.csv' # train_file = "nov-new-dataset/train.csv" # train_file = 'train.csv' train_file = 'distant_train.csv' # dev_file = "nov-new-dataset/dev.csv" # dev_file = 'dev.csv' # dev_file = 'add.csv' #dev_file = 'data/preprocessed/arrogant_bubba.csv' dev_file = "nov-new-dataset/test.csv" model = 'svm' label = 'aggress' feats = [1, 1, 'n', 0, 'min_max/all', 1300] C = 0.3 # original: C=0.3, modified for experimentation purposes on # distantly labeled dataset loss = 'squared_hinge' print 'Training on ' + train_file + ', testing on ' + dev_file pos_tagger = None if feats[2] == 'u' or feats[2] == 'b': pos_tagger = train_tagger() results = classify(train_file, dev_file, model, label, feats=feats, pos_tagger=pos_tagger, C=C, svm_loss=loss) #output results print print 'Results' print print 'sought precision: ' + str(results[0]) print 'sought recall: ' + str(results[1]) print 'sought f-score: ' + str(results[2]) print print 'nsought precision: ' + str(results[3]) print 'nsought recall: ' + str(results[4]) print 'nsought f-score: ' + str(results[5]) print print 'sought precision: ' + str(results[6]) print 'sought recall: ' + str(results[7]) print 'sought f-score: ' + str(results[8]) pickle.dump(results[-1], open('predictions.txt', 'w')) return results
def classify_photo(): print(request.args.get('key', '')) print(request.form['name']) image_name = request.form['name'] image_path = 'images/' + image_name image_url = request.form['url'] #download photo from storage storage.child(image_name).download(image_path) labels = classify(image_path) data = {'name': image_name, 'url': image_url, 'descriptions': labels} return json.dumps(data)
def without_pca(): df = pd.read_csv('../Data/Final.csv') del df['serial_number'] del df['Unnamed: 0'] del df['dt'] del df['manufacturer'] X = df.iloc[:, 0:-1] y = df.iloc[:, -1] clf = GaussianNB() clf.fit(X, y) print('Accuracy without PCA: ') print('Naive Bayes: ', clf.score(X, y) * 100, ' %') print('Logistic Regression: ', clf_2.classify(X, y) * 100, ' %')
def getBBoxes(cv2_image): ans = [] bboxes = faster_rcnn.detect(cv2_image) for bbox in bboxes: cropped = cv2_image[bbox[1]:bbox[3], bbox[0]:bbox[2]] if classification.classify( Image.fromarray(cv2.cvtColor(cropped, cv2.COLOR_BGR2RGB))): rbbox = [] rbbox.append([bbox[0], bbox[1]]) rbbox.append([bbox[2], bbox[1]]) rbbox.append([bbox[2], bbox[3]]) rbbox.append([bbox[0], bbox[3]]) rbbox.append(number.give_result(model, cropped)) ans.append(rbbox) return ans
def upload_image(): name = request.args.get('name') session_id = request.args.get('name') img = request.args.get('img') pos = request.args.get('pos') img = base64.b64decode(img) with open(name, 'wb') as f: f.write(img) # TODO: process image data (this should be done first) result = classifier.classify(name) print('Classification result: ' + str(result[0]) + ' - ' + str(result[1])) return str(result[0])
def draw_boxes(detections, image, colors, model): import cv2 from PIL import Image for label, confidence, bbox in detections: left, top, right, bottom = bbox2points(bbox) cropped = image[top:bottom, left:right] if classification.classify( Image.fromarray(cv2.cvtColor(cropped, cv2.COLOR_BGR2RGB))): cv2.rectangle(image, (left, top), (right, bottom), colors[label], 1) height = number.give_result(model, cropped) cv2.putText(image, "{} [{:.2f}]".format("height limit", float(height)), (left, top - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[label], 2) return image
def process(df_train, X_train, y_train, df_test, X_test, y_test, clf_name): df_complete = pd.concat([df_train, df_test]) X_complete = pd.concat([X_train, X_test]) y_complete = pd.concat([y_train, y_test]) augmenter = RelevantFeatureAugmenter(column_id='id') augmenter.timeseries_container = df_complete augmenter.fit(X_complete, y_complete) augmenter.timeseries_container = df_train X_train = augmenter.fit_transform(X_train, y_train) augmenter.timeseries_container = df_test transformed_X_test = augmenter.transform(X_test) y_pred = classify(X_train, y_train, transformed_X_test, clf_name) return log_of_classification_results(y_test, y_pred)
def loop_classify_reweightings(args, train_frac, test_frac, reweight_value, learning_rates=LEARNING_RATES, seperate=False, tests=TESTS): args.raw_ricci = False name = get_name(args) with open( "./res/{}_rew{}_sep{}_{}.txt".format(name, reweight_value, seperate, get_time()), "w") as file: file.write( f"training set fraction is {train_frac}, test set fraction is {test_frac}, reweight value is {reweight_value}" ) ress = {} for model in models_full: args.model = model print('model - {}'.format(model)) file.write('\n\nModel: {}\n'.format(model)) res = np.zeros((len(learning_rates), tests)) for t in range(tests): for i in range(len(learning_rates)): train, train_labels, test, test_labels = embed_and_select( args, train_frac, test_frac, reweight=True, seperate=True, reweight_value=reweight_value) lr = learning_rates[i] res[i, t] = classify(train, train_labels, test, test_labels, args, iterations, lr) ress[model] = res save_model(file, learning_rates, res) plot_results_vertical(learning_rates, ress, tests, f'{name}_rew{reweight_value}_sep{seperate}')
def compute_score(train, test): """ From each simple example_x taken in test, obtains a classification using train as examples for classify() Checks if classify finds the correct class. Returns the proportion of successful classifications. Parameters ---------- train: {list} of {tuple} test: {list} of {tuple} Returns ------- {float} : the proportion of successful classifications in test using train """ return sum([classify(sample, train) == sample[4] for sample in test]) / len(test)
def run_analysis(): with open(constants.OUTPUT_FOLDER + 'results.csv', 'w', newline='') as file: writer = csv.writer(file) writer.writerow([ "obj_sizes", "grid_size", "padding", "iou", "batch_size", "resize", "interpolation", "loss_0", "accuracy_0", "loss_1", "accuracy_1" ]) file.flush() for crop_params in crop_grid: print("Clearing current directory...") clear_out_folder() print("Completed\n") print("Deconstructing images...") images = discretize()[crop_params['size']] print(images) exit(0) for image in images: print("Processing image" + image) crop_obj = ObjectCrop(out_folder=constants.OUTPUT_FOLDER, img_label=image) crop_obj.deconstruct(gh=crop_params['grid_size'][1], gw=crop_params['grid_size'][0], padding=crop_params['padding'], iou_thresh=crop_params['iou'], test_run=False) print("Completed\n") print("Running classifications...") for classify_params in classify_grid: classify_results = classify(classify_params['batch_size'], classify_params['size'], classify_params['interpolation']) writer.writerow([ crop_params['size'], crop_params['grid_size'], crop_params['padding'], crop_params['iou'], classify_params['batch_size'], classify_params['size'], classify_params['interpolation'], classify_results[0], classify_results[1], classify_results[2], classify_results[3] ]) file.flush() print("Completed")
def main(cfg): try: glbs = GlobalParameters() configs = get_cfg_files(cfg) results = {} n_test_dir = "" total_files = len(configs) for i, config in enumerate(configs): print_message("Running config {}/{}".format(i + 1, total_files)) set_global_parameters(config) print_run_details() n_train_dir = normalize() if glbs.TEST_DIR != "": n_test_dir = normalize(test=True) train, tr_labels, test, ts_labels, all_features = extract_features( n_train_dir, n_test_dir) for selection in glbs.SELECTION: try: train, test = get_selected_features( selection, train, tr_labels, test, ts_labels, all_features) except: pass results[glbs.FILE_NAME] = classify(train, tr_labels, test, ts_labels, all_features, model_number=i) results = add_results(results) if glbs.WORDCLOUD: print_message("Generating word clouds (long processes)") generate_word_clouds() write_results(divide_results(results)) send_work_done(glbs.TRAIN_DIR) print_message("Done!") # clean_backup_files() except Exception as e: traceback.print_exc() send_work_done(glbs.TRAIN_DIR, "", error=str(e), traceback=str(traceback.format_exc()))
def upload_file(): if request.method == 'POST': # check if the post request has the file part if 'file' not in request.files: #flash('No file part') return redirect(request.url) file = request.files['file'] # if user does not select file, browser also # submit a empty part without filename if file.filename == '': #flash('No selected file') return redirect(request.url) if file and allowed_file(file.filename): filename = secure_filename(file.filename) file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) classificationResult = classification.classify() return redirect(url_for('upload_file', filename=filename)) return '''
def mine_to_database(self): current_url = "http://jobs.scotiabank.com/careers/it-jobs/job-list-1" db_cursor = self.__conn.cursor() db_cursor.execute("""CREATE TABLE IF NOT EXISTS job_table ( Title VARCHAR (200), Company VARCHAR (45), Date_Posted DATE , Location VARCHAR (45), URL VARCHAR (200), Description TEXT, Difficulty ENUM('Co-op/internship','Entry','Experienced','Manager'), Active TINYINT(1) DEFAULT '1')""") db_cursor.execute("update job_table SET Active = '0' WHERE Company = 'Scotia Bank'") while 1: soup = BeautifulSoup(requests.get(current_url).content) rows = soup.find("table", {"class": "info-table"}).find_all("tr") for job in rows[4:-1]: title = job.find("td", {"class": "jobTitle"}).find('a').text location = job.find("td", {"class": "location"}).text date = datetime.datetime.strptime(job.find("td", {"class": "custom1"}).text, '%m/%d/%Y').strftime( '%Y-%m-%d') job_url = urljoin(self.url, job.find("td", {"class": "jobTitle"}).find('a').get("href")) desc = self.get_desc(job_url) difficulty = str(classify(desc)) db_cursor.execute("""INSERT INTO job_table (Title, Company, Date_Posted, Location, URL, Description,Difficulty, Active) VALUES (%s, %s, %s, %s, %s, %s,%s, %s) ON DUPLICATE KEY UPDATE Date_Posted=%s, Location=%s, Description=%s, Company='Scotia Bank', Active = %s, difficulty=%s""", (title, "Scotia Bank", date, location, job_url, desc, difficulty, '1', date, location, desc, '1', difficulty)) # break next_page = soup.find("td", {"class": "pagination"}).find("a", {"class": "pagination-more"}) if next_page != None: current_url = urljoin(self.url, next_page.get("href")) else: break self.__conn.commit() db_cursor.close()
def mine_to_database(self): db_cursor = self.__conn.cursor() current_num = 0 id = 200000 db_cursor.execute("""CREATE TABLE IF NOT EXISTS job_table ( Title VARCHAR (200), Company VARCHAR (45), Date_Posted DATE , Location VARCHAR (45), URL VARCHAR (200), Description TEXT, Difficulty ENUM('Co-op/internship','Entry','Experienced','Manager'), Active TINYINT(1) DEFAULT '1')""") db_cursor.execute("update job_table SET Active = '0' WHERE Company = 'RBC'") while 1: current_url = str(self.url + `current_num` + "/?q=&sortColumn=referencedate&sortDirection=desc") soup = BeautifulSoup(requests.get(current_url).content) total_job_num = int(soup.find("span", {"class" : "paginationLabel"}).find_all("b")[1].text) rows = soup.find("table", {"class" : "searchResults full table table-striped table-hover"}).find("tbody").find_all("tr") if current_num > total_job_num: break for job in rows: id += 1 title = job.find("td",{"class":"colTitle"}).find("span").find("a").text location = job.find("td",{"class":"colLocation hidden-phone"}).find("span").text raw_date = job.find("td",{"class":"colDate hidden-phone"}).find("span").text.replace("\n","").replace("\t","") date = datetime.datetime.strptime(raw_date,"%b %d, %Y").strftime("%Y-%m-%d") job_url = urljoin(self.url,job.find("td",{"class":"colTitle"}).find("a",{"class":"jobTitle-link"}).get("href")) desc = self.get_desc(job_url) difficulty = str(classify(desc)) db_cursor.execute("""INSERT INTO job_table (Title, Company, Date_Posted, Location, URL, Description, Difficulty, Active) VALUES (%s, %s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE Date_Posted=%s, Location=%s, Description=%s, Active = %s, Difficulty = %s""", (title, "RBC", date, location, job_url, desc, difficulty, '1', date, location, desc, '1',difficulty )) current_num += 25 self.__conn.commit() db_cursor.close()
def inference(event, context): try: # Fetch data data = fetch_post_data(event) infer_config = fetch_inference_json() print('post data and inference config fetched') # Check if token exists if not data['token'] in infer_config: print(f'Token {data["token"]} not found') return create_response({ 'result': 'error', 'message': 'No such token found.' }) # Make predictions task_config = infer_config[data['token']] if task_config['task_type'] == 'classification': model = fetch_classification_model(task_config['model_filename']) output = classify(model, data['input'], task_config['classes']) else: model_path, model_metadata_path = fetch_sa_data( task_config['model_filename'], task_config['metadata_filename'], ) output = get_sentiment(data['input'], model_path, model_metadata_path) return create_response({ 'result': 'success', 'prediction': output, }) except Exception as e: print(repr(e)) return create_response( { 'result': 'internal_error', 'message': repr(e), }, status_code=500)
def selectionHalfMethod(X, y, all_features): glbs = GlobalParameters() filename = glbs.FILE_NAME results = {} # nxt = (glbs.SELECTION[0][0], int(glbs.SELECTION[0][1])) nxt = (glbs.SELECTION[0][0], int(glbs.SELECTION[0][1])) max_last_result = 0 bottom = (0, 0) top = nxt while top != bottom: max_nxt_result = 0 print_message(nxt[0]) print_message(nxt[1]) glbs.FILE_NAME = glbs.FILE_NAME + str(nxt[1]) select = select_k_best(nxt[0], int(nxt[1])) glbs.FEATURE_MODEL[1] = select results[glbs.FILE_NAME] = classify(X, y, glbs.K_FOLDS, glbs.ITERATIONS) for method in results[glbs.FILE_NAME].items(): if mean(method[1]["accuracy"]) > max_nxt_result: max_nxt_result = mean(method[1]["accuracy"]) results = add_results(results, glbs, nxt) if max_nxt_result >= max_last_result: top = nxt if bottom[1] == 0: nxt = (nxt[0], int(int(nxt[1]) / 2)) if bottom[1] != 0: nxt = (nxt[0], int((int(nxt[1]) + bottom[1]) / 2)) max_last_result = max_nxt_result elif max_nxt_result < max_last_result: bottom = nxt nxt = (nxt[0], int((top[1] + bottom[1]) / 2)) glbs.SELECTION[0] = nxt if bottom[1] - top[1] == -1 and bottom == nxt: break glbs.FILE_NAME = filename add_results_glbs(results, glbs)
im = cv2.imread('faces/face_85.png', 0) plt.imshow(im, cmap=plt.cm.gray) plt.show() # set up window parameters window_size = 100 shift_size = 25 # scale the strokes scale_factor = 8 # how many faces do you want to run? num_faces = 10 for face in load_faces(n=num_faces): im = cv2.imread(face, 0) # documentation is in classification.py # thresh is minimum confidence # eyes, noses, mouths take the confident bounding boxes # verbose shows each window classification and accuracy classify(im, window_size=window_size, shift_size=shift_size, scale_factor=scale_factor, thresh=0.5, eyes=2, noses=1, mouths=1, verbose=True)
from sklearn.metrics import classification_report from sklearn.metrics import auc import time import pprint names = ["svm", "adaboost", "random_forest", "decision_tree", "MultinomialNB"] for algorithm in names: for use_nlp in [False]: for use_tfidf in [False]: if not use_nlp and use_tfidf: continue for n_gram in [1, 2, 3]: if not use_nlp and n_gram > 1: continue pprint.pprint(algorithm) pprint.pprint("use_nlp = " + str(use_nlp)) pprint.pprint("use_tfidf = " + str(use_tfidf)) pprint.pprint("n_gram = " + str(n_gram)) t = time.time() test_true, test_predict = classify(name=algorithm, pr=True, use_CV=True, use_nlp=use_nlp, use_tfidf=use_tfidf, n_grams=n_gram, combine_numerical_nlp=False) run_time = time.time() - t print(classification_report(test_true, test_predict)) print "Run time: " + str(run_time)
def main(): dataSet, listClassses = NBL.loadDataSet() nb = NBL.NBayes() nb.tran_set(dataSet, listClassses) print(CL.classify(nb.tf[3], nb.tf, listClassses, k))
features, 'safe_loans', max_depth=6, min_node_size=100, min_error_reduction=0.0) my_decision_tree_old = decision_tree_create(train_data, features, 'safe_loans', max_depth=6, min_node_size=0, min_error_reduction=-1) validation_set[0] print 'Predicted class: %s ' % classify(my_decision_tree_new, validation_set[0]) classify(my_decision_tree_new, validation_set[0], annotate=True) classify(my_decision_tree_old, validation_set[0], annotate=True) evaluate_classification_error(my_decision_tree_new, validation_set) model_1 = decision_tree_create(train_data, features, 'safe_loans', max_depth=2, min_node_size=0, min_error_reduction=-1) model_2 = decision_tree_create(train_data, features,
tracklet_clustering.cluster(tracklets_path, videonames, INSTANCE_ST, INSTANCE_TOTAL, clusters_path, visualize=False) tracklet_representation.train_bovw_codebooks(tracklets_path, videonames, traintest_parts, INTERNAL_PARAMETERS['feature_types'], intermediates_path, pca_reduction=False) tracklet_representation.train_fv_gmms(tracklets_path, videonames, traintest_parts, INTERNAL_PARAMETERS['feature_types'], intermediates_path) tracklet_representation.compute_bovw_descriptors(tracklets_path, intermediates_path, videonames, traintest_parts, \ INSTANCE_ST, INSTANCE_TOTAL, \ INTERNAL_PARAMETERS['feature_types'], feats_path + 'bovwtree/', \ pca_reduction=False, treelike=True, global_repr=True, clusters_path=clusters_path) tracklet_representation.compute_fv_descriptors(tracklets_path, intermediates_path, videonames, traintest_parts, \ INSTANCE_ST, INSTANCE_TOTAL, \ INTERNAL_PARAMETERS['feature_types'], feats_path + 'fvtree/', \ treelike=True, global_repr=True, clusters_path=clusters_path) c = [0.001, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 50, 100, 500, 1000, 5000, 10000] st_time = time.time() results = classification.classify(feats_path + 'bovwtree/', videonames, class_labels, traintest_parts, \ np.linspace(0, 1, 11), INTERNAL_PARAMETERS['feature_types'], \ c=c) print('ATEP classification (bovwtree) took %.2f secs.' % (time.time() - st_time)) print_results(results) st_time = time.time() results = classification.classify(feats_path + 'fvtree/', videonames, class_labels, traintest_parts, \ np.linspace(0, 1, 11), INTERNAL_PARAMETERS['feature_types'], \ c=c) print('ATEP classification (bovwtree) took %.2f secs.' % (time.time() - st_time)) print_results(results) quit() # TODO: remove this for further processing
preprocessing_steps = None x_tr, x_te, groups_tr, _ = preprocess_data( x_tr, x_te, preprocessing_steps=preprocessing_steps ) # Classification clf = classify( est=est_list[est_name], x_tr=x_tr.values, y_tr=y_tr.values.ravel(), groups_tr=groups_tr.values, x_te=x_te.values, test_index=x_te.index, perform_evaluation=perform_evaluation, perform_cross_validation=perform_cross_validation, cv_params=cv_params[est_name], compute_submission=compute_submission, submission_path=(submission_folder + 'y_te_pred.csv'), random_state=42 ) # Feature importance if plot_feature_importance: try: plot_avg_feature_importance(clf.feature_importances_, x_tr.columns) except: print('Feature importance not available\n')
#!/usr/bin/env python import os import os.path import sys from haar_cascades import create_crops from classification import classify if len(sys.argv) == 4: source_dir = sys.argv[1] haar_model = sys.argv[2] caffe_model = sys.argv[3] deploy_file = sys.argv[4] else: print( "usage: %s source_dir haar_model caffe_model caffe_deploy_file" % __file__) sys.exit(1) source_paths = [] for image in os.listdir(source_dir): source_paths.append = os.path.join(source_dir, image) create_crops(haar_model, source_paths, crop_path) classifications = classify(caffe_model, deploy_file, crop_path)
crop_file = "/home/user/FinalAssignment/Downloads/DownloadFile_"+map_type+".tif" #Download the image try: select_image(url, key, point, map_type, crop_file) except: print "ERROR: No map available" #Cut to size cut_size(crop_file, 1500, 1000, 1500, 2000, in_file) #Creating model to classify trees create_model(in_file, statistics_file, training_poly, output_model, confusion_matrix) #Apply model classify(output_model, in_file, statistics_file, output_map) #Delete all none trees from dataset select_trees(output_map, selection_map) #Calculate percentage green per quadrant (format: nw, sw, ne, se) print greencalculator(output_map) ##Apply model to other map #Files output_map1 = "/home/user/FinalAssignment/output/ClassifiedImage1.tif" selection_map1 = "/home/user/FinalAssignment/output/ClassifiedImageTrees1.tif" in_file1 = "/home/user/FinalAssignment/output/InputMap1.tif" #Coordinates of point
if 'atep-bovw' in args.methods: tracklet_representation.train_bovw_codebooks(tracklets_path, videonames, traintest_parts, xml_config['features_list'], intermediates_path, pca_reduction=True, nt=args.nt, verbose=args.verbose) tracklet_representation.compute_bovw_descriptors_multithread(tracklets_path, intermediates_path, videonames, traintest_parts, xml_config['features_list'], \ feats_path + '/bovwtree/', \ treelike=True, pca_reduction=True, clusters_path=clusters_path, nt=args.nt, verbose=args.verbose) atep_bovw = kernels.compute_ATEP_kernels(feats_path + '/bovwtree/', videonames, traintest_parts, xml_config['features_list'], \ kernels_path + '/atep-bovw/', kernel_type='intersection', norm='l1', power_norm=False, \ use_disk=False, nt=args.nt, verbose=args.verbose) params = [[[1]], [1], np.linspace(0,1,21), desc_weights_gbl] results = classification.classify(atep_bovw, \ class_labels, traintest_parts, params, \ xml_config['features_list'], \ C=C_gbl, strategy=strategy_gbl, opt_criterion=opt_criterion, verbose=args.verbose) classification.print_results(results) if 'atep-fv' in args.methods: tracklet_representation.train_fv_gmms(tracklets_path, videonames, traintest_parts, xml_config['features_list'], intermediates_path, pca_reduction=True, nt=args.nt, verbose=args.verbose) tracklet_representation.compute_fv_descriptors_multithread(tracklets_path, intermediates_path, videonames, traintest_parts, xml_config['features_list'], \ feats_path + '/fvtree/', \ treelike=True, pca_reduction=True, clusters_path=clusters_path, nt=args.nt, verbose=args.verbose) atep_fv = kernels.compute_ATEP_kernels(feats_path + '/fvtree/', videonames, traintest_parts, xml_config['features_list'], \ kernels_path + '/atep-fv/', use_disk=False, nt=args.nt, verbose=args.verbose) params = [[[1]], [1], np.linspace(0,1,21), desc_weights_gbl]
from classification import classify import pylab as pl from sklearn.metrics import classification_report from sklearn.metrics import auc import time import pprint names = ["svm", "adaboost", "random_forest", "decision_tree", "MultinomialNB"] for algorithm in names: for use_nlp in [False]: for use_tfidf in [False]: if not use_nlp and use_tfidf : continue for n_gram in [1,2,3] : if not use_nlp and n_gram > 1 : continue pprint.pprint( algorithm ) pprint.pprint( "use_nlp = " + str( use_nlp) ) pprint.pprint( "use_tfidf = " + str( use_tfidf) ) pprint.pprint( "n_gram = " + str( n_gram) ) t = time.time() test_true, test_predict = classify(name = algorithm, pr= True, use_CV=True, use_nlp=use_nlp, use_tfidf=use_tfidf, n_grams = n_gram, combine_numerical_nlp = False) run_time = time.time() - t print(classification_report(test_true, test_predict)) print "Run time: " + str(run_time)