def Batch_Loop(self, feature_path, full_path, snippet_path, batch_size, gpu_flag, videos_in_folder, video_count): current_video = 0 self.popup.content = Label(text='Processing Video..(' + str(current_video) + '/' + str(video_count) + ')', color=rgba('#DAA520'), font_size=20) for video in videos_in_folder: if '_noext' not in video: current_video = current_video + 1 self.popup.content = Label(text='Processing Video..(' + str(current_video) + '/' + str(video_count) + ')', color=rgba('#DAA520'), font_size=20) path = full_path + '/' + video feature_extractor.feature_extractor(feature_path, path, snippet_path, batch_size, gpu_flag) self.popup.content = Label( text='Anomalous Snippets of Videos saved in the output folder', color=rgba('#DAA520'), font_size=20) Clock.schedule_once(partial(self.dismisspopup), 1) mainmenu = App.get_running_app().root.get_screen('MainMenu') mainmenu.ids.videoplayer.state = 'play'
def __init__(self, use_echonest_dataset=False): start_time = time.time() ''' initialize svm model ''' self.MODEL = SVM_MODEL.SVC() ''' checks if model is trained ''' self.TRAINED = False ''' accuracy from validation ''' self.HIGHEST_VA_ACC = 0 ''' test accuracy ''' self.TEST_ACC = 0 ''' initialize each feature set ''' self.TR_X = [] self.VA_X = [] self.TE_X = [] ''' initialize each genre set ''' self.TR_T = [] self.VA_T = [] self.TE_T = [] ''' stores validation accuracy of each trained model ''' self.ALL_VA_ACC = [] ''' use feature_extractor to grab data ''' self.DATA = FE.feature_extractor(use_echonest_dataset) ''' initialize track ids for each set ''' self.TR_IDS = self.DATA.get_training_dataset_song_ids() self.VA_IDS = self.DATA.get_validation_dataset_song_ids() self.TE_IDS = self.DATA.get_test_dataset_song_ids() print('Elapsed time to initialize: ' + str(time.time() - start_time) + ' seconds\n') return
def ext(): # Read the pairs print "Read Pairs" print "Read Train" train = d.read_train_pairs() print "Read Valid" valid = d.read_valid_pairs() print "Read Sup1" sup1 = d.read_sup1_train_pairs() print "Read Sup2" sup2 = d.read_sup2_train_pairs() print "Read Sup3" sup3 = d.read_sup3_train_pairs() # Get the feature extractor combined = feat.feature_extractor() # Extract the features print 'Extract the features' print "Extract Train" train_att = combined.fit_transform(train) print "Extract Valid" valid_att = combined.fit_transform(valid) print "Extract Sup1" sup1_att = combined.fit_transform(sup1) print "Extract Sup2" sup2_att = combined.fit_transform(sup2) print "Extract Sup3" sup3_att = combined.fit_transform(sup3) print "Join" total_new_att = np.vstack((train_att, valid_att, sup1_att, sup2_att, sup3_att)) # Save extracted data np.save('total_new_att.npy', total_new_att)
def button_click(self): text = self.url_input.text() #print(text) obj = feature_extractor.feature_extractor(text) str1 = obj.extract() self.output_text.append("{} \n\n\n\n".format(str1))
def get_pipeline(): features = fe.feature_extractor() steps = [("extract_features", features), ("scale", StandardScaler()), ("classify", RandomForestRegressor(n_estimators=1024, verbose=2, n_jobs=1, min_samples_split=10, min_samples_leaf=10, random_state=1))] p = Pipeline(steps) params = dict(classify__n_estimators=[768, 1024, 1536], classify__min_samples_split=[1, 5, 10], classify__min_samples_leaf= [1, 5, 10]) grid_search = GridSearchCV(p, params, n_jobs=8) return grid_search
def generate_desc(self,picture): photo=feature_extractor(picture) text='startseq' for i in range(self.max_length): seq=self.tokenizer.texts_to_sequences([text])[0] seq=pad_sequences([seq],maxlen=self.max_length) yhat=self.model.predict([photo,seq]) yhat=np.argmax(yhat) word=self.int_to_word(yhat) if word=='endseq': break if word is None: break text=text + ' ' + word return text[9:]
def get_pipeline(): features = fe.feature_extractor() classifier = GradientBoostingClassifier(n_estimators=1024, random_state=1, subsample=.8, min_samples_split=10, max_depth=6, verbose=3) steps = [("extract_features", features), ("classify", classifier)] myP = Pipeline(steps) # params = {"classify__n_estimators": [768, 1024, 1536], "classify__min_samples_split": [1, 5, 10], "classify__min_samples_leaf": [1, 5, 10]} # grid_search = GridSearchCV(myP, params, n_jobs=8) # return grid_search # return myP return (features, classifier)
def get_pipeline(): features = fe.feature_extractor() classifier = GradientBoostingClassifier(n_estimators=1024, random_state = 1, subsample = .8, min_samples_split=10, max_depth = 6, verbose=3) steps = [("extract_features", features), ("classify", classifier)] myP = Pipeline(steps) # params = {"classify__n_estimators": [768, 1024, 1536], "classify__min_samples_split": [1, 5, 10], "classify__min_samples_leaf": [1, 5, 10]} # grid_search = GridSearchCV(myP, params, n_jobs=8) # return grid_search # return myP return (features, classifier)
def main2(): extractor_opensmile = feature_extractor() extractor_praat = praat_extractor() p_fralusopark = Process(target=run_one_dataset, args=(extractor_opensmile, extractor_praat, "fralusopark", paths.FRALUSOPARK_OUTPUT, ["CONTROLOS", "DOENTES"], paths.FRALUSOPARK_AUDIOS, )) p_fralusopark.start() p_gita = Process(target=run_one_dataset, args=(extractor_opensmile, extractor_praat, "gita", paths.GITA_OUTPUT, ["hc", "pd"], paths.GITA_AUDIOS, )) p_gita.start() p_mdvr_kcl = Process(target=run_one_dataset, args=(extractor_opensmile, extractor_praat, "mdvr_kcl", paths.MDVR_KCL_OUTPUT, ["HC", "PD"], paths.MDVR_KCL_AUDIOS, )) p_mdvr_kcl.start() p_fralusopark.join() p_gita.join() p_mdvr_kcl.join()
def main(): print("Reading the valid pairs") valid = data_io.read_valid_pairs() features = fe.feature_extractor() print("Transforming features") trans_valid = features.fit_transform(valid) trans_valid = np.nan_to_num(trans_valid) print("Saving Valid Features") data_io.save_valid_features(trans_valid) print("Loading the classifier") #(both_classifier, A_classifier, B_classifier, none_classifier) = data_io.load_model() classifier = data_io.load_model() print("Making predictions") valid_info = data_io.read_valid_info() predictions = list() curr_pred = None """ for i in range(len(trans_valid)): if valid_info["A type"][i] == "Numerical" and valid_info["B type"][i] == "Numerical": curr_pred = both_classifier.predict_proba(trans_valid[i, :]) elif valid_info["A type"][i] == "Numerical" and valid_info["B type"][i] != "Numerical": curr_pred = A_classifier.predict_proba(trans_valid[i, :]) elif valid_info["A type"][i] != "Numerical" and valid_info["B type"][i] == "Numerical": curr_pred = B_classifier.predict_proba(trans_valid[i, :]) else: curr_pred = none_classifier.predict_proba(trans_valid[i, :]) predictions.append(curr_pred[0][2] - curr_pred[0][0]) """ orig_predictions = classifier.predict_proba(trans_valid) predictions = orig_predictions[:, 2] - orig_predictions[:, 0] predictions = predictions.flatten() print("Writing predictions to file") data_io.write_submission(predictions)
def main(): print("Reading the valid pairs") valid = data_io.read_valid_pairs() features = fe.feature_extractor() print("Transforming features") trans_valid = features.fit_transform(valid) trans_valid = np.nan_to_num(trans_valid) print("Saving Valid Features") data_io.save_features(trans_valid) print("Loading the classifier") classifier = data_io.load_model() print("Making predictions") orig_predictions = classifier.predict_proba(trans_valid) predictions = orig_predictions[:, 2] - orig_predictions[:, 0] predictions = predictions.flatten() print("Writing predictions to file") data_io.write_submission(predictions)
import numpy as np import unittest from audio_preprocessor import audio_preprocessor from feature_extractor import feature_extractor from feature_extractor import feature_type from feature_extractor import echonest_feature_type from feature_extractor import statistic_type USE_ECHONEST_DATASET = False extractor = feature_extractor(USE_ECHONEST_DATASET) processor = audio_preprocessor() class TestExtractor(unittest.TestCase): ''' Test feature extractor class ''' ''' Make sure to download the metadata folder ''' ''' using setup_env.sh before running unit tests ''' def test_get_training_dataset_info(self): ''' Get training dataset ''' training_set_song_ids = extractor.get_training_dataset_song_ids() print('---------------------------------------------------------') print('Training dataset (total size: ' + str(len(training_set_song_ids)) + ')') print('---------------------------------------------------------\n') TestExtractor.print_dataset_info(training_set_song_ids, True) def test_get_validation_dataset_info(self): ''' Get validation dataset ''' validation_set_song_ids = extractor.get_validation_dataset_song_ids() print('---------------------------------------------------------')
p = plt.errorbar(n_init_range, inertia.mean(axis=1), inertia.std(axis=1)) plots.append(p[0]) legends.append("%s with %s init" % (factory.__name__, init)) plt.xlabel('n_init') plt.ylabel('inertia') plt.legend(plots, legends) plt.title("Mean inertia for various k-means init across %d runs" % n_runs) plt.show() ''' Step 1: Read and Preprocess data ''' fe = feature_extractor.feature_extractor(use_echonest_dataset=False) #scaler was used to scale each feature to range [0,1] scaler = StandardScaler() #pca was used to reduced feature dimensions pca = PCA(n_components=20) #get training data and apply scaler and pca train_ids = np.asarray(fe.get_training_dataset_song_ids()) train_features, train_genres = read_data( fe, train_ids) #read_echonest_data(fe, train_ids) train_features = scaler.fit_transform(train_features) train_features = pca.fit_transform(train_features) #get validation data and apply scaler and pca validation_ids = np.asarray(fe.get_validation_dataset_song_ids()) validation_features, validation_genres = read_data(
def extract_feature(apk_files_path, dst): samples_id_file = os.path.join(apk_files_path, 'samples_id.txt') fe = feature_extractor(apk_files_path, dst, samples_id_file) fe.get_features()
def main(): extractor_opensmile = feature_extractor() run_one_dataset(extractor_opensmile, None, 'gita', '', [0,1], None) extractor_opensmile.merge([gemaps], ['name', 'frameTime'], "patient_complete.csv")
def main(): t1 = time() print("Reading in the training data") train = data_io.read_train_pairs() train_info = data_io.read_train_info() train = combine_types(train, train_info) #make function later train = get_types(train) target = data_io.read_train_target() print "Reading SUP data..." for i in range(1,4): print "SUP", str(i) sup = data_io.read_sup_pairs(i) sup_info = data_io.read_sup_info(i) sup = combine_types(sup, sup_info) sup = get_types(sup) sup_target = data_io.read_sup_target(i) train_info = train_info.append(sup_info) train = train.append(sup) target = target.append(sup_target) # Old train print "Reading old train data..." old_train = data_io.read_old_train_pairs() old_train_info = data_io.read_old_train_info() old_train = combine_types(old_train, old_train_info) old_train = get_types(old_train) old_target = data_io.read_old_train_target() train = train.append(old_train) target = target.append(old_target) # End old train print "Train size = ", str(train.shape) print("Extracting features and training model") feature_trans = fe.feature_extractor() orig_train = feature_trans.fit_transform(train) orig_train = numpy.nan_to_num(orig_train) classifier = classify_catagory(orig_train, target.Target) #(both_classifier, A_classifier, B_classifier, none_classifier) = create_classifiers(orig_train, target.Target, train_info) print("Saving features") data_io.save_features(orig_train) print("Saving the classifier") #data_io.save_model( (both_classifier, A_classifier, B_classifier, none_classifier) ) data_io.save_model(classifier) #features = [x[0] for x in classifier.steps[0][1].features ] #csv_fea = csv.writer(open('features.csv','wb')) #imp = sorted(zip(features, classifier.steps[1][1].feature_importances_), key=lambda tup: tup[1], reverse=True) #for fea in imp: # print fea[0], fea[1] # csv_fea.writerow([fea[0],fea[1]]) t2 = time() t_diff = t2 - t1 print "Time Taken (min):", round(t_diff/60,1)
def main3(): extractor_opensmile = feature_extractor() extractor_praat = praat_extractor()
def main(): t1 = time() print("Reading in the training data") train = data_io.read_train_pairs() train_info = data_io.read_train_info() train = combine_types(train, train_info) #make function later train = get_types(train) target = data_io.read_train_target() print "Reading SUP data..." for i in range(1, 4): print "SUP", str(i) sup = data_io.read_sup_pairs(i) sup_info = data_io.read_sup_info(i) sup = combine_types(sup, sup_info) sup = get_types(sup) sup_target = data_io.read_sup_target(i) train_info = train_info.append(sup_info) train = train.append(sup) target = target.append(sup_target) # Old train print "Reading old train data..." old_train = data_io.read_old_train_pairs() old_train_info = data_io.read_old_train_info() old_train = combine_types(old_train, old_train_info) old_train = get_types(old_train) old_target = data_io.read_old_train_target() train = train.append(old_train) target = target.append(old_target) # End old train print "Train size = ", str(train.shape) print("Extracting features and training model") feature_trans = fe.feature_extractor() orig_train = feature_trans.fit_transform(train) orig_train = numpy.nan_to_num(orig_train) classifier = classify_catagory(orig_train, target.Target) #(both_classifier, A_classifier, B_classifier, none_classifier) = create_classifiers(orig_train, target.Target, train_info) print("Saving features") data_io.save_features(orig_train) print("Saving the classifier") #data_io.save_model( (both_classifier, A_classifier, B_classifier, none_classifier) ) data_io.save_model(classifier) #features = [x[0] for x in classifier.steps[0][1].features ] #csv_fea = csv.writer(open('features.csv','wb')) #imp = sorted(zip(features, classifier.steps[1][1].feature_importances_), key=lambda tup: tup[1], reverse=True) #for fea in imp: # print fea[0], fea[1] # csv_fea.writerow([fea[0],fea[1]]) t2 = time() t_diff = t2 - t1 print "Time Taken (min):", round(t_diff / 60, 1)
import time import urllib import json from datetime import datetime import feature_extractor if __name__ == "__main__": # Initiate face detection module detector = dlib.get_frontal_face_detector() # Initiate facial landmark detector predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat") # Initiate feature extractor fe = feature_extractor.feature_extractor() # Fetch left right eye id range left_eye_start_id, left_eye_end_id = face_utils.FACIAL_LANDMARKS_IDXS[ "left_eye"] right_eye_start_id, right_eye_end_id = face_utils.FACIAL_LANDMARKS_IDXS[ "right_eye"] # Open the camera try: stream = WebcamVideoStream(0).start() except: # Check if camera opened successfully print("Fail to access camera") raise
def __init__(self, *args, **kwargs): super(test_feature_extractor, self).__init__(*args, **kwargs) self.fea_extractor = feature_extractor( '../samples_balanced/', 'features', '../samples_balanced/samples_id.txt')