def grafic (params,etiqueta): params['split']=etiqueta get_features(params) ap_list,dict_=eval_rankings(params) mean=np.mean(dict_) return mean
def models_combined(): df = load_data() df = process_df(df) X, y = get_features(df) print "running gradient boosted model with 4000 estimators..." gb4000_clf, gb4000_Xtest, gb4000_ytest = run_gradient_boosted(X, y, n_estimators = 4000) print "running gradient boosted model with 1000 estimators..." gb1000_clf, gb1000_Xtest, gb1000_ytest = run_gradient_boosted(X, y, n_estimators = 1000) print "running random forest model..." rf1_clf, rf1_Xtest, rf1_ytest = run_rf_churn(X,y) df = load_data() df = process_df(df) train_df, test_df = train_test_equal_weight(df) X_train, y_train = get_features(train_df) X_test, y_test = get_features(test_df) print "running gradient boosted model with even fraud and non-fraud..." gbEF_clf, gbEF_Xtest, gbEF_ytest = run_gradient_boosted_evenFraud(X_train, y_train, X_test, y_test) print "running random forest model with even fraud and non-fraud..." rf2_clf, rf2_Xtest, rf2_ytest = run_rf_churn2(X_train, y_train, X_test, y_test) return gb4000_clf, gb4000_Xtest, gb4000_ytest, gb1000_clf, gb1000_Xtest, gb1000_ytest, gbEF_clf, gbEF_Xtest, gbEF_ytest, rf1_clf, rf1_Xtest, rf1_ytest, rf2_clf, rf2_Xtest, rf2_ytest
def _write_feature_files(files, truth_db, truth_function): """Function used to output a feature file for use in WEKA. """ get_features.get_features(files[RELATION_FILE_PATH], files[PMI_FILE_PATH], files[COOCCURRENCE_FILE_PATH], files[FEATURE_FILE_PATH], truth_db, truth_function)
def main(): # ffs = [features_funcs.dists_2_refs_lips, # features_funcs.dists_2_refs_lower_lip, # features_funcs.dists_2_refs_contour, # features_funcs.dists_2_refs_contour_top4, # features_funcs.dists_2_refs_nose, # features_funcs.dists_2_refs_nose_top4, # features_funcs.dists_2_refs_eyebrows, # features_funcs.dists_2_refs_eyebrows_corners, # features_funcs.dists_2_refs_eyes] # ffs = [features_funcs.dists_lips, # features_funcs.dists_lower_lip, # features_funcs.dists_contour, # features_funcs.dists_contour_top4, # features_funcs.dists_nose, # features_funcs.dists_nose_top4, # features_funcs.dists_eyebrows, # features_funcs.dists_eyebrows_corners, # features_funcs.dists_eyes] # ffs = [features_funcs.butterfly_catastrophe]#, features_funcs.ellipse_picked, # features_funcs.ellipse, features_funcs.polyfit6, # features_funcs.face_contour] ffs = [features_funcs.fwhr] test_data = True for feature_func in ffs: print('get features (with gender)', flush=True) t0 = time() if test_data: features = get_features("../data/testdata.csv", feature_func, "../data/testdata_labels.txt", None, face_id_clean=True, limit_deg=180) else: features = get_features("../data/data.csv", feature_func, "../data/data_labels.txt", "../data/gender.csv", face_id_clean=False, limit_deg=1) print(time() - t0) feature_length = 0 for v in features.values(): feature_length = len(v) break print('save as np array') t0 = time() arr = np.zeros((len(features), feature_length)) for row, v in enumerate(features.values()): arr[row] = v if test_data: np.save('../data/'+feature_func.__name__+'_test', arr) else: np.save('../data/'+feature_func.__name__, arr) print(time() - t0)
def fetch(self, entries): recordID = int(entries[0][1].get()) if recordID in self.patient_data: for widget in self.body.winfo_children(): widget.destroy() Label(self.body, text = "Patient with given record ID already exists ").grid(row = 0, sticky = 'news', pady = 5) Button(self.body, text = "Try Again", command=self.add_patient).grid(row = 1, pady = 5) return self.patient_data[recordID] = {} for i in range(1, len(entries)): field = entries[i][0] try: value = float(entries[i][1].get()) except: value = -1 self.patient_data[recordID][field] = value with open('patient_data.pkl', 'wb') as f: pickle.dump(self.patient_data, f) for widget in self.body.winfo_children(): widget.destroy() Label(self.body, text = "Patient details recorded successfully! ", font = ('Courier', 20)).grid(row = 0, sticky = 'news', pady = 5) Button(self.body, text = "Add more", command=self.add_patient).grid(row = 1, pady = 5) Label(self.body, text = "Most important parameters: ", font = ('Courier', 20)).grid(row = 2, sticky = 'news', pady = 5) features = get_features.get_features(self.patient_data[recordID]['ICUType']) #features = ['Albumin', 'Temp', 'Urine', 'HCT', 'NiMAP'] for i in range(len(features)): Label(self.body, text = features[i], font = ('Courier', 30)).grid(row = i+3, sticky = 'news', pady = 5)
def main(): inventory = [] huur, verhuurd, koop, verkocht = get_url_list.get_aparts(60, 230, 390, 760) url_list = [] #Compile the four lists into one for item in huur: url_list.append([item, "huur"]) for item in verhuurd: url_list.append([item, "verhurrd"]) for item in koop: url_list.append([item, "koop"]) for item in verkocht: url_list.append([item, "verkocht"]) print "Done getting all the urls!" new_url_list = get_chunks(url_list, 1000) threads = [] for i in range(len(new_url_list)): t = threading.Thread(target = get_inventory, args=(inventory, new_url_list[i], )) threads.append(t) t.start() for t in threads: t.join() print "Done getting all the features!" threading_output = unicodecsv.writer(open("FundaInventoryLatest.csv", "wb"), encoding='utf-8', delimiter='|') sample = get_features.get_features(sample_url) sample["type"] = "sample" threading_output.writerow(sample.keys()) for row in inventory: threading_output.writerow(row.values()) print "Done writing output file!"
def main(): print('getting faces') g1, g2 = split_genders( get_features("../data/testdata.csv", pure_landmarks, "../data/testdata_labels.txt", None, face_id_clean=True, limit_deg=180)) # g1, g2 = split_genders( # get_features("../data/data.csv", pure_landmarks, # "../data/data_labels.txt", # "../data/gender.csv", face_id_clean=False, limit_deg=1) # ) # g1 = g1[:1000] # g2 = g2[:1000] print('averaging') g1 = [(x, y) for x, y in zip(np.mean(g1, 0)[::2], np.mean(g1, 0)[1::2])] g2 = [(x, y) for x, y in zip(np.mean(g2, 0)[::2], np.mean(g2, 0)[1::2])] g1 = np.array(g1) g2 = np.array(g2) # np.save('g0_2deg', g1) # np.save('g1_2deg', g2) plt.plot(g1[:, 0], g1[:, 1], '.') # plt.savefig('gender0.png') # plt.clf() plt.plot(g2[:, 0], g2[:, 1], '.') # plt.savefig('gender1.png') plt.show()
def predict_price(image_file, model, symbol_search='$'): df = get_text.get_text(image_file, symbol_search) df = get_features.get_features(df) features = [ 'block_confidence', 'paragraph_confidence', 'word_confidence', 'block_weigh', 'paragraph_weigh', 'word_weigh', 'rel_word_block_area', 'rel_word_parag_area', 'rel_parag_block_area', 'prev_symbol', 'next_symbol', 'text_type', 'prev_text_type', 'next_text_type', 'text_len', 'is_a_symbol', 'number_type', ] X = df[features].copy() y_pred = model.predict(X.values) df['predict'] = y_pred return df
def get_training_data(dataframe): # print(ravdess.head()) print(dataframe.head()) X = [] Y = [] for index, row in dataframe.iterrows(): # print(index, row) emotion = row["emotion"] path = row["path"] print("processing file", path) # print(emotion, path) # duration and offset are used to take care of the no audio # in start and the ending of each audio files as seen above. data, sample_rate = librosa.load(path, duration=2.5, offset=0.6) # augment_data returns the original data as the first element for augmented_data in augment_data(data, sample_rate): features = get_features(augmented_data, sample_rate) # Storing a single list of all the features plus the last one is the target label X.append(features) Y.append(emotion) df = pd.DataFrame(X) df["label"] = Y return df
def auto_feature_select(ds, obj): obj.features = [] # Reset the features best_score = 0 for i in range(0, np.size(ds.X, 1)): print('Current features: ') get_features(obj.features) print('Now trying feature: ') get_features([i]) obj.features.append(i) cross_validation(ds) print_results([obj]) if (obj.best_score > best_score): improvement = obj.best_score - best_score print('Kept feature; score improved by %0.2f%%: ' % improvement) best_score = obj.best_score else: print('Did not keep feature') obj.features.pop()
def predict(df, model): ''' INPUT: Processed datapoint / dataframe OUTPUT: Array of Predictions, Array of Probabilities ''' X = get_features(df, point=True) predictions = model.predict(X) probs = model.predict_proba(X) return predictions, probs
def start_prediction(model): df = pd.read_csv(TEST_PATH) print(df) features = get_features(df) print("Test features len") print(len(features)) predictions = model.predict(features) with open("submission.csv", 'w') as f: f.write("id,target\n") for i in range(0, len(predictions)): f.write(str(df.at[i, 'id']) + "," + str(predictions[i]) + "\n")
def objectTracking(filename): cap = cv2.VideoCapture(filename) img1 = None img2 = None writer = skvideo.io.FFmpegWriter('Easy.avi') bboxs = np.load('easy.npy') frame_num = 0 while (cap.isOpened()): frame_num += 1 ret, frame = cap.read() if not ret: break if img1 is None and img2 is None: img2 = frame img2 = cv2.GaussianBlur(img2, (7, 7), 0) gray = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY) startYs, startXs = get_features(gray, bboxs) continue img1 = img2 img2 = frame img2 = cv2.GaussianBlur(img2, (7, 7), 0) newXs, newYs = estimateAllTranslation(startXs, startYs, img1, img2) startXs, startYs, bboxs = applyGeometricTransformation( startXs, startYs, newXs, newYs, bboxs) bb_img = frame delete_mask = np.ones(bboxs.shape[0], dtype=bool) for idx, bbox in enumerate(bboxs): mask = np.logical_or(startXs[:, idx] >= frame.shape[1], startYs[:, idx] >= frame.shape[0]) startXs[:, idx][mask] = -1 startYs[:, idx][mask] = -1 if (startXs[:, idx] < 0).all() and (startYs[:, idx] < 0).all(): delete_mask[idx] = False continue bb_img = draw_bounding_box(bbox, bb_img) bboxs = bboxs[delete_mask, :, :] startXs = startXs[:, delete_mask] startYs = startYs[:, delete_mask] for idx, (x, y) in enumerate(zip(startXs, startYs)): for ind in range(bboxs.shape[0]): if x[ind] >= 0 and y[ind] >= 0: cv2.circle(bb_img, (np.int32(x[ind]), np.int32(y[ind])), 3, (0, 0, 255), -1) writer.writeFrame(bb_img[:, :, [2, 1, 0]]) cv2.imshow('frame', bb_img) if cv2.waitKey(1) & 0xFF == ord('q'): break writer.close() cap.release() cv2.destroyAllWindows()
def start_training(): df = pd.read_csv(TRAIN_PATH) print('Number of training sentences: ', len(df)) #df = df.sample(200) labels = df['target'] print(labels) features = get_features(df) print(len(features)) train_features, test_features, train_labels, test_labels = train_test_split( features, labels) lr_clf = LogisticRegression() lr_clf.fit(train_features, train_labels) print(lr_clf.score(test_features, test_labels)) return lr_clf
def __init__(self, articles): Corpus.__init__(self, articles) for article in articles: features = get_features( article) # Get the feature values for the current article. if article.train: # put feature dict in either testing or training. self.train_feats.append(features) self.train_articles.append( article ) # keep a list of all articles in the training set. else: self.test_feats.append(features) self.test_articles.append( article) # keep a list of all articles in the testing set. self.feat_names = features.keys()
def get_inventory(inventory, url_list): # failed_urls = unicodecsv.writer(open("failed_urls_all_threading_20150516.csv", "wb"), encoding='utf-8', delimiter='|') count = 0 #Loop through all huur and verhuurd urls for row in url_list: try: apart = get_features.get_features(row[0]) apart["type"] = row[1] inventory.append(apart) except: print "error url:" + row[0] print sys.exc_info() # failed_urls.writerow([row[0], sys.exc_info()]) count += 1 if count % 100 == 0: print "We have gathered properties: " + str(count) time.sleep(2)
def get_inventory(backfill = False): inventory = [] properties = get_property_list(backfill = backfill) total_count = len(properties) print "Total Properties: ", total_count count = 0 for prop in properties: try: features = get_features(prop['Id']) inventory.append(features) count += 1 except: print "Count: ", count print "error id: ", prop['Id'] print sys.exc_info() if count % 100 == 0: print "Crawled Properties: ", count return inventory
def update_loss(target, vgg, content_features, style_weights, style_grams, content_weight, style_weight): # for displaying the target image intermittently show_every = 400 # iteration hyperparamaters optimizer = optim.Adam([target], lr=0.004) steps = 2000 # variable; can be updated as needed for ii in range(1, steps+1): target_features = get_features(target, vgg) # calculate the content loss content_loss = torch.mean((target_features['conv4_2'] - content_features['conv4_2'])**2) # calculate the style loss iterating through a number of layers style_loss = 0 for layer in style_weights: # get the "target" style representation for the layer target_feature = target_features[layer] target_gram = gram_matrix(target_feature) batch_size, d, h, w = target_feature.shape # get the "style" style representation for the layer style_gram = style_grams[layer] # the style loss for one layer weighted layer_style_loss = style_weights[layer] * torch.mean((target_gram - style_gram)**2) # add to the style loss style_loss += layer_style_loss / (d * h * w) # calculate the total loss total_loss = content_weight * content_loss + style_weight * style_loss # update the target image optimizer.zero_grad() total_loss.backward() optimizer.step() # display intermediate images and print the loss if ii % show_every == 0: print('Total loss: ', total_loss.item()) plt.imshow(im_convert(target)) plt.show()
def detect(img, x_start_stop, y_start_stop, window_sz, overlap): y_start = y_start_stop[0] y_stop = y_start_stop[1] - window_sz[1] x_start = x_start_stop[0] x_stop = x_start_stop[1] - window_sz[0] # Compute the number of pixels per step in x/y x_step = int(window_sz[0] * (1-overlap[0])) y_step = int(window_sz[1] * (1-overlap[1])) # Step through image at current scale and aspect ratio; append if positive prediction corners = [] for y in np.arange(y_start, y_stop, y_step): for x in np.arange(x_start, x_stop, x_step): cur_img = img[y:y+window_sz[1], x:x+window_sz[0]] cur_img_resize = cv2.resize(cur_img, (64,64)) if clf.predict(get_features(cur_img_resize)): corners.append( np.array([x, y, x+window_sz[0], y+window_sz[1]]) ) #corners.append( np.array([x, y, x+window_sz[0], y+window_sz[1]]) ) return corners
devfile = open(devfilename) devJson = json.load(devfile) devfile.close() testfile = open(testfilename) testJson = json.load(testfile) testfile.close() negativefilename = 'negative_train.json' negativefile = open(negativefilename) nJson = json.load(negativefile) negativefile.close() # acquire features for training train_tokens,train_tags,train_otherfeats = get_features(trainJson) neg_tokens,neg_tags,neg_otherfeats = get_features(nJson) labels = tokenizer.getlabels(trainJson) + tokenizer.getlabels(nJson) train_tokens += neg_tokens train_tags += neg_tags train_otherfeats += neg_otherfeats #vec = TfidfVectorizer(tokenizer=dummy,preprocessor=dummy) vec = CountVectorizer(tokenizer=dummy,preprocessor=dummy) train_vec = vec.fit_transform(train_tokens).toarray() train_tags = csr_matrix(train_tags).toarray() train_otherfeats = csr_matrix(train_otherfeats).toarray() train_X = np.concatenate([train_otherfeats,train_tags,train_vec],axis=1) #train_X = train_vec print(train_X.shape)
gnbObj.isUsed = True annObj.isUsed = False ensObj.isUsed = True apriObj.isUsed = True astaltObj.isUsed = True fib4Obj.isUsed = True cross_validation(toronto) #sens: 63.53 #acc: 68.96 #auc: 0.75 find_misclassifications(toronto, algorithmArray) plot_heat_map(toronto, algorithmArray) get_features(gbcObj.features) ####################### External Validation from Montreal # Import the validation set montreal = dataset_class() montreal.description = 'McGill Liver Clinic Dataset. External test set for validation' montreal.df = pd.read_excel( 'C:/Users/Soren/Desktop/Thesis/Data Analysis/reformatted_mcgill_dataset.xlsx', parse_cols="A:BD") montreal.df = montreal.df.loc[(montreal.df['Fibrosis'] >= 0)\ & (montreal.df['Fibrosis'] != 2) & (montreal.df['Fibrosis'] != 3)\ & (montreal.df['NAFL'] == 1)] montreal.df = montreal.df.sample(frac=1).reset_index(drop=True) montreal.X = montreal.df.iloc[:, 0:49].values montreal.Y = (montreal.df.iloc[:, 49].values > 1) * 4
dirmake(path+'TAMSD/plot/') n = 0 for i in range(Q_TAMSD_plot): plot_TAMSD(list(tamsd_data.loc[i]), path+'TAMSD/plot/tamsd'+str(i)+'.pdf', {'figsize': (4,3)}, ['a','b','c','d','e','f'][i]) n += 1 print(' --- ZAKOŃCZONO') print(64 * '-') if Q_ML_features == 'Y': # wyciąganie danych if not traject_loaded: trajectories = read_trajectories(part, Model) traject_loaded = True if not expo_loaded: exps = read_real_expo(part, Model) get_features(trajectories, exps, part, Model) print(64 * '-') if Q_ML_linreg == 'Y': if not features_loaded: features = read_ML_features(part, Model) linear_regression(features, part, Model) print(64 * '-') if Q_ML_dectree == 'Y': if not features_loaded: features = read_ML_features(part, Model) decision_tree(features, part, Model) print(64 * '-') if Q_ML_randomforest == 'Y':
image1 = cv2.cvtColor(image1, cv2.COLOR_BGR2GRAY) image2 = cv2.cvtColor(image2, cv2.COLOR_BGR2GRAY) scale_factor = 0.5 # make images smaller to speed up the algorithm image1 = cv2.resize(image1, None, fx=scale_factor, fy=scale_factor) image2 = cv2.resize(image2, None, fx=scale_factor, fy=scale_factor) feature_width = 16 # width and height of each local feature, in pixels. ## Find distinctive points in each image. Szeliski 4.1.1 # !!! You will need to implement get_interest_points. !!! x1, y1 = get_interest_points(image1, feature_width) x2, y2 = get_interest_points(image2, feature_width) show_image_point(image1, x1, y1) show_image_point(image2, x2, y2) image1_features = get_features(image1, x1, y1, feature_width) image2_features = get_features(image2, x2, y2, feature_width) matches, confidences = match_features(image1_features, image2_features) num_pts_to_visualize = len(matches) x1 = [x1[match[0]] for match in matches] y1 = [y1[match[0]] for match in matches] x2 = [x2[match[1]] for match in matches] y2 = [y2[match[1]] for match in matches] show_correspondence(image1, image2, x1, y1, x2, y2) evaluate_correspondence( np.array(x1) / scale_factor,
content = load_image('images/content/pic1.jpg').to(device) # resize style to match content style = load_image('images/style/pic2.jpg', shape=content.shape[-2:]).to(device) # display the images from im_convert import im_convert fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 10)) #content and style images shown side by side ax1.imshow(im_convert(content)) ax2.imshow(im_convert(style)) # getting the content and style features before forming the target image from get_features import get_features content_features = get_features(content, vgg) style_features = get_features(style, vgg) # calculating the gram matrices for each layer of the style representation from gram_matrix import gram_matrix style_grams = { layer: gram_matrix(style_features[layer]) for layer in style_features } # creating the target image and prepping it for change # to start of, we use a copy of our content image as the initial target and then iteratively change its style target = content.clone().requires_grad_(True).to(device) # setting the weights for each style layer and setting content and style weights style_weights = {
# Blog: http://www.cnblogs.com/AdaminXie/ # Github: https://github.com/coneypo/Smile_Detector # use the saved model from sklearn.externals import joblib from get_features import get_features import ML_ways_sklearn import cv2 # path of test img path_test_img = "data_imgs/test_imgs/test1.jpg" # 提取单张40维度特征 pos_49to68_test = get_features(path_test_img) # path of models path_models = "data_models/" print("The result of"+path_test_img+":") print('\n') # ######### LR ########### LR = joblib.load(path_models+"model_LR.m") ss_LR = ML_ways_sklearn.model_LR() X_test_LR = ss_LR.transform([pos_49to68_test]) y_predict_LR = str(LR.predict(X_test_LR)[0]).replace('0', "no smile").replace('1', "with smile") print("LR:", y_predict_LR) # ######### LSVC ###########
# # Copyright © 2018 weihao <*****@*****.**> # # Distributed under terms of the MIT license. from get_mat import get_mat from get_pqrst import get_pqrst from get_features import get_features from get_ground_truth import get_result_for_classifier from utils import * from train import train import pandas as pd if __name__ == '__main__': data_used = 1000 data = [] #for i in range(1, 8529): for i in range(1, data_used): print(i) file_name = '../training2017/A%s.mat' % str(i).zfill(5) out = get_mat(file_name) ecg = out['filtered'] P_index, Q_index, R_index, S_index, T_index = get_pqrst(out) features = get_features(ecg, P_index, Q_index, R_index, S_index, T_index) data.append(flatten(features)) df = pd.DataFrame(data=data) target = get_result_for_classifier('../training2017/REFERENCE.csv', 1) train(df, target[:data_used - 1])
def scan_text(text_string): import pycrfsuite from get_features import get_features from process_feats import syllable2features, line2features import pickle from only_four_stresses import only_four_stresses from yield_meter_tk import mhgscansion tagger = pycrfsuite.Tagger() tagger.open('MHGMETRICS.crfsuite') text_with_features, sylls = get_features(text_string) lines_features = [line2features(line) for line in text_with_features] text_tags = only_four_stresses(lines_features, tagger, sylls) # add back tags to features features_and_tags = [] for i, line in enumerate(text_with_features): line_features_and_tags = [] for i2, syll in enumerate(line): line_features_and_tags.append(syll + (text_tags[i][i2], )) features_and_tags.append(line_features_and_tags) # return words, sylls and labels words_sylls_labels = [] for line in features_and_tags: line_words = [] rec_word = [] for syll in line: if syll[4] == "WBYR": rec_word.append((syll[0], syll[-1])) line_words.append(rec_word) rec_word = [] elif syll[4] == "MONO": line_words.append([(syll[0], syll[-1])]) rec_word = [] else: rec_word.append((syll[0], syll[-1])) words_sylls_labels.append(line_words) # change primary stresses to secondary where necessary tags_n_stress = [] for line in words_sylls_labels: rev_line = [] for word in line: rev_word = [] stress_present = 0 for syll in word: rev_syll = syll if (syll[-1] == "MORA_HAUPT" or syll[-1] == "DOPPEL" or syll[-1] == "HALB_HAUPT"): stress_present += 1 if stress_present > 1: if syll[-1] == "MORA_HAUPT": rev_syll = (syll[0], "MORA_NEBEN") elif syll[-1] == "HALB_HAUPT": rev_syll = (syll[0], "HALB_NEBEN") rev_word.append(rev_syll) rev_line.append(rev_word) tags_n_stress.append(rev_line) return (mhgscansion(tags_n_stress))
import pickle import argparse import numpy as np from get_features import pos_keys, one_hot_encoded_pos_features_for_dict, get_features parser = argparse.ArgumentParser(add_help=True) parser.add_argument('file_for_analyzing', type=str, help='file_for_analyzing') args = parser.parse_args() loaded_model = pickle.load(open('finalized_model.sav', 'rb')) test_file = args.file_for_analyzing print("predict with trained model") object_features = get_features(test_file, None, pos_keys, one_hot_encoded_pos_features_for_dict, predict=True) object_features_array = np.asarray(object_features) object_features_array = object_features_array.reshape(1, -1) #print(object_features) print(loaded_model.predict(object_features_array))
from build_database import build_database from get_features import get_features from rank import rank from classify import classify from evaluate_ranking import evaluate_ranking from evaluate_classification import evaluate_classification ruta1=os.path.dirname(os.path.abspath(__file__))+'\\TerrassaBuildings900\\val\\images' ruta2=os.path.dirname(os.path.abspath(__file__))+'\\TerrassaBuildings900\\train\\images' savepath1=os.path.dirname(os.path.abspath(__file__))+'\\TerrassaBuildings900\\val' savepath2=os.path.dirname(os.path.abspath(__file__))+'\\TerrassaBuildings900\\train' build_database(ruta1,savepath1); build_database(ruta2,savepath2); get_features(ruta1,savepath1,savepath1); get_features(ruta2,savepath2,savepath2); savepath_principal=os.path.dirname(os.path.abspath(__file__)) features_val=os.path.dirname(os.path.abspath(__file__))+'\\TerrassaBuildings900\\val' features_train=os.path.dirname(os.path.abspath(__file__))+'\\TerrassaBuildings900\\train' rank(features_val,features_train,savepath_principal); feat=os.path.dirname(os.path.abspath(__file__))+'\\TerrassaBuildings900\\val\\Features.txt' path_out=os.path.dirname(os.path.abspath(__file__)) labels=os.path.dirname(os.path.abspath(__file__))+'\\labels.txt' classify(feat,path_out,labels) path=os.path.dirname(os.path.abspath(__file__)) Gt_val_test=os.path.dirname(os.path.abspath(__file__))+'\\TerrassaBuildings900\\val\\annotation.txt' evaluate_ranking(path,Gt_val_test)
# Updated on: 2018-10-09 # 显示嘴部特征点 # Draw the positions of someone's lip import dlib # 人脸识别的库 Dlib import cv2 # 图像处理的库 OpenCv from get_features import get_features # return the positions of feature points path_test_img = "data/data_imgs/test_imgs/i064rc-mn.jpg" detector = dlib.get_frontal_face_detector() predictor = dlib.shape_predictor( 'data/data_dlib_model/shape_predictor_68_face_landmarks.dat') # Get lip's positions of features points positions_lip = get_features(path_test_img) img_rd = cv2.imread(path_test_img) # Draw on the lip points for i in range(0, len(positions_lip), 2): print(positions_lip[i], positions_lip[i + 1]) cv2.circle(img_rd, tuple([positions_lip[i], positions_lip[i + 1]]), radius=1, color=(0, 255, 0)) cv2.namedWindow("img_read", 2) cv2.imshow("img_read", img_rd) cv2.waitKey(0)
from evaluate_ranking import evaluate_ranking from evaluate_classification import evaluate_classification ruta1 = os.path.dirname( os.path.abspath(__file__)) + '\\TerrassaBuildings900\\val\\images' ruta2 = os.path.dirname( os.path.abspath(__file__)) + '\\TerrassaBuildings900\\train\\images' savepath1 = os.path.dirname( os.path.abspath(__file__)) + '\\TerrassaBuildings900\\val' savepath2 = os.path.dirname( os.path.abspath(__file__)) + '\\TerrassaBuildings900\\train' build_database(ruta1, savepath1) build_database(ruta2, savepath2) get_features(ruta1, savepath1, savepath1) get_features(ruta2, savepath2, savepath2) savepath_principal = os.path.dirname(os.path.abspath(__file__)) features_val = os.path.dirname( os.path.abspath(__file__)) + '\\TerrassaBuildings900\\val' features_train = os.path.dirname( os.path.abspath(__file__)) + '\\TerrassaBuildings900\\train' rank(features_val, features_train, savepath_principal) feat = os.path.dirname( os.path.abspath(__file__)) + '\\TerrassaBuildings900\\val\\Features.txt' path_out = os.path.dirname(os.path.abspath(__file__)) labels = os.path.dirname(os.path.abspath(__file__)) + '\\labels.txt' classify(feat, path_out, labels)
import get_features as GF # Make sure that we are using training images only ! params['split'] = 'train' t = time.time() X, pca, scaler = GF.stack_features(params) print "Done. Time elapsed:", time.time() - t print np.shape(X) t = time.time() GF.train_codebook(params, X) print "Done. Time elapsed:", time.time() - t t = time.time() GF.get_features(params) print "Done. Time elapsed for training set:", time.time() - t # Switch to validation set params['split'] = 'val' t = time.time() # Run again GF.get_features(params) print "Done. Time elapsed for validation set:", time.time() - t from rank import * t = time.time() rank(params)
from sklearn.metrics import accuracy_score from sklearn.metrics import confusion_matrix from sklearn import preprocessing from sklearn import utils import numbers current_file_path = os.path.abspath(os.path.join("__file__" ,"../../..")) nb_path = os.path.abspath(os.path.join(current_file_path, 'notebooks')) os.chdir(nb_path) import get_features print(current_file_path) features_path = os.path.abspath(os.path.join(nb_path,'features.csv')) _ = get_features.get_features() df = pd.read_csv(features_path) df = df.set_index(['movie_id', 'title']) feature_list = df.drop('target',axis=1).columns features = np.array(df.drop('target',axis=1)) labels = df['target'] x_train, x_test, y_train, y_test = train_test_split(features, labels, test_size=0.3, random_state = 42) #create param_grid based off of best results from RandomSearchCV res = pd.read_csv(os.path.abspath(os.path.join(nb_path,'random_rf_cvresults.csv'))) params = [i for i in res.columns if 'param_' in i]
for i in range(len(enron_df)): feature_1.iloc[i] = enron_df['bonus'].iloc[i] / enron_df['salary'].iloc[i] if \ enron_df['salary'][i] != 0.0 else 0.0 feature_2.iloc[i] = enron_df['from_poi_to_this_person'].iloc[i] / enron_df['to_messages'].iloc[i] if \ enron_df['to_messages'][i] != 0.0 else 0.0 feature_3.iloc[i] = enron_df['from_this_person_to_poi'][i] / enron_df['from_messages'].iloc[i] if \ enron_df['from_messages'][i] != 0.0 else 0.0 enron_df['bonus-to-salary_ratio'] = feature_1 enron_df['from_poi_ratio'] = feature_2 enron_df['to_poi_ratio'] = feature_3 # Define features list # features_list = get_features(1) # Include all original features features_list = get_features( 2) # Include all original features plus 3 engineered features # features_list = get_features(3) # Only use top features selected by Decision Tree algorithm # Task 2: Remove outliers # As explained in attached Jupyter Notebook, the following outliers will be removed from the data set enron_df.drop('TOTAL', axis=0, inplace=True) enron_df.drop('THE TRAVEL AGENCY IN THE PARK', axis=0, inplace=True) # Convert data into numeric values, option coerce is used to convert non numeric data to NaN enron_df = enron_df.apply(lambda x: pd.to_numeric(x, errors='coerce')) # Convert dataframe back to dictionary my_dataset = enron_df.T.to_dict() # Extract features and labels from dataset for local testing data = featureFormat(my_dataset, features_list, sort_keys=True)
labels = ['neutral', 'calm', 'happy', 'sad', 'angry', 'fear', 'disgust', 'surprise'] model = keras.models.load_model("./model.h5") model.summary() validation_audio = [ "./happy.mp3", "./happy.m4a", "./happy2.m4a", "./lucas_nienpedo.mp3", "./sol_maichu.m4a", "./fran_scared.m4a", "./fran_scared2.m4a", "./lucas_cursing.mp3", "./nacho_happy.mp3", ] for path in validation_audio: print("procesing", path) data, sampling_rate = librosa.load(path, duration=2.5, offset=0.6) features = get_features(data, sampling_rate) features_transposed = np.expand_dims([features], axis=2) res = model.predict(features_transposed) max_id = np.argmax(res[0]) print("prediction", labels[max_id]) print("predictions") for score, label in zip(res[0], labels): print(label, score)
from train_classifier import train_classifier from classify import classify from eval_classification import eval_classification from eval_classification import plot_confusion_matrix import warnings warnings.filterwarnings("ignore") #Extraccio dels parametres params=get_params() #Creacio de la base de dades params['split']='train' build_database(params) params['split']='val' build_database(params) #Extraccio de les caracteristiques get_features(params) #Entrenem un model de classificacio train_classifier(params) #Classificacio classify(params) #Avaluacio de la classificacio f1, precision, recall, accuracy,cm, labels = eval_classification(params) print "Mesures:\n" print f1 print "-F1:", np.mean(f1) print "-Precision:", np.mean(precision) print "-Recall:", np.mean(recall) print "-Accuracy:", accuracy print "-Confusion matrix:\n", cm plot_confusion_matrix(cm, labels,normalize = True)
from train_classifier import train_classifier from classify import classify from eval_classification import eval_classification from eval_classification import plot_confusion_matrix import warnings warnings.filterwarnings("ignore") #Extraccio dels parametres params = get_params() #Creacio de la base de dades params['split'] = 'train' build_database(params) params['split'] = 'val' build_database(params) #Extraccio de les caracteristiques get_features(params) #Entrenem un model de classificacio train_classifier(params) #Classificacio classify(params) #Avaluacio de la classificacio f1, precision, recall, accuracy, cm, labels = eval_classification(params) print "Mesures:\n" print f1 print "-F1:", np.mean(f1) print "-Precision:", np.mean(precision) print "-Recall:", np.mean(recall) print "-Accuracy:", accuracy print "-Confusion matrix:\n", cm plot_confusion_matrix(cm, labels, normalize=True)
import csv import get_features import expand_features import unicodecsv sample_url = "http://www.funda.nl/koop/amsterdam/appartement-49453167-van-boetzelaerstraat-34-2/" header_output = unicodecsv.writer(open("expanded_headers.csv", "wb"), encoding='utf-8', delimiter='|') sample = get_features.get_features(sample_url) sample["type"] = "sample" expand_features.expand_features(sample) header_output.writerow(sample.keys())