Ejemplo n.º 1
0
    def Batch_Loop(self, feature_path, full_path, snippet_path, batch_size,
                   gpu_flag, videos_in_folder, video_count):

        current_video = 0
        self.popup.content = Label(text='Processing Video..(' +
                                   str(current_video) + '/' +
                                   str(video_count) + ')',
                                   color=rgba('#DAA520'),
                                   font_size=20)
        for video in videos_in_folder:
            if '_noext' not in video:
                current_video = current_video + 1
                self.popup.content = Label(text='Processing Video..(' +
                                           str(current_video) + '/' +
                                           str(video_count) + ')',
                                           color=rgba('#DAA520'),
                                           font_size=20)
                path = full_path + '/' + video
                feature_extractor.feature_extractor(feature_path, path,
                                                    snippet_path, batch_size,
                                                    gpu_flag)

        self.popup.content = Label(
            text='Anomalous Snippets of Videos saved in the output folder',
            color=rgba('#DAA520'),
            font_size=20)
        Clock.schedule_once(partial(self.dismisspopup), 1)
        mainmenu = App.get_running_app().root.get_screen('MainMenu')
        mainmenu.ids.videoplayer.state = 'play'
Ejemplo n.º 2
0
    def __init__(self, use_echonest_dataset=False):

        start_time = time.time()
        ''' initialize svm model '''
        self.MODEL = SVM_MODEL.SVC()
        ''' checks if model is trained '''
        self.TRAINED = False
        ''' accuracy from validation '''
        self.HIGHEST_VA_ACC = 0
        ''' test accuracy '''
        self.TEST_ACC = 0
        ''' initialize each feature set '''
        self.TR_X = []
        self.VA_X = []
        self.TE_X = []
        ''' initialize each genre set '''
        self.TR_T = []
        self.VA_T = []
        self.TE_T = []
        ''' stores validation accuracy of each trained model '''
        self.ALL_VA_ACC = []
        ''' use feature_extractor to grab data '''
        self.DATA = FE.feature_extractor(use_echonest_dataset)
        ''' initialize track ids for each set '''
        self.TR_IDS = self.DATA.get_training_dataset_song_ids()
        self.VA_IDS = self.DATA.get_validation_dataset_song_ids()
        self.TE_IDS = self.DATA.get_test_dataset_song_ids()

        print('Elapsed time to initialize: ' + str(time.time() - start_time) +
              ' seconds\n')
        return
Ejemplo n.º 3
0
def ext():
    # Read the pairs
    print "Read Pairs"
    print "Read Train"
    train = d.read_train_pairs()
    print "Read Valid"
    valid = d.read_valid_pairs()
    print "Read Sup1"
    sup1 = d.read_sup1_train_pairs()
    print "Read Sup2"
    sup2 = d.read_sup2_train_pairs()
    print "Read Sup3"
    sup3 = d.read_sup3_train_pairs()

    # Get the feature extractor
    combined = feat.feature_extractor()

    # Extract the features
    print 'Extract the features'
    print "Extract Train"
    train_att = combined.fit_transform(train)
    print "Extract Valid"
    valid_att = combined.fit_transform(valid)
    print "Extract Sup1"
    sup1_att = combined.fit_transform(sup1)
    print "Extract Sup2"
    sup2_att = combined.fit_transform(sup2)
    print "Extract Sup3"
    sup3_att = combined.fit_transform(sup3)

    print "Join"
    total_new_att = np.vstack((train_att, valid_att, sup1_att, sup2_att, sup3_att))

    # Save extracted data
    np.save('total_new_att.npy', total_new_att)
    def button_click(self):
        text = self.url_input.text()
        #print(text)
        obj = feature_extractor.feature_extractor(text)
        str1 = obj.extract()

        self.output_text.append("{} \n\n\n\n".format(str1))
Ejemplo n.º 5
0
def get_pipeline():
    features = fe.feature_extractor()
    steps = [("extract_features", features),
             ("scale", StandardScaler()),
             ("classify", RandomForestRegressor(n_estimators=1024, 
                                                verbose=2,
                                                n_jobs=1,
                                                min_samples_split=10,
                                                min_samples_leaf=10,
                                                random_state=1))]
    p = Pipeline(steps)
    params = dict(classify__n_estimators=[768, 1024, 1536], classify__min_samples_split=[1, 5, 10], classify__min_samples_leaf= [1, 5, 10])
    grid_search = GridSearchCV(p, params, n_jobs=8)
    return grid_search
Ejemplo n.º 6
0
 def generate_desc(self,picture):
     photo=feature_extractor(picture)
     text='startseq'
     for i in range(self.max_length):
         seq=self.tokenizer.texts_to_sequences([text])[0]
         seq=pad_sequences([seq],maxlen=self.max_length)
         yhat=self.model.predict([photo,seq])
         yhat=np.argmax(yhat)
         word=self.int_to_word(yhat)
         if word=='endseq':
             break
         if word is None:
             break
         text=text + ' ' + word
     return text[9:]
Ejemplo n.º 7
0
def get_pipeline():
    features = fe.feature_extractor()
    classifier = GradientBoostingClassifier(n_estimators=1024,
                                            random_state=1,
                                            subsample=.8,
                                            min_samples_split=10,
                                            max_depth=6,
                                            verbose=3)
    steps = [("extract_features", features), ("classify", classifier)]
    myP = Pipeline(steps)
    #    params = {"classify__n_estimators": [768, 1024, 1536], "classify__min_samples_split": [1, 5, 10], "classify__min_samples_leaf": [1, 5, 10]}
    #    grid_search = GridSearchCV(myP, params, n_jobs=8)
    #    return grid_search
    #   return myP
    return (features, classifier)
Ejemplo n.º 8
0
def get_pipeline():
    features = fe.feature_extractor()
    classifier = GradientBoostingClassifier(n_estimators=1024,
                                          random_state = 1,
                                          subsample = .8,
                                          min_samples_split=10,
                                          max_depth = 6,
                                          verbose=3)
    steps = [("extract_features", features),
             ("classify", classifier)]
    myP = Pipeline(steps)
#    params = {"classify__n_estimators": [768, 1024, 1536], "classify__min_samples_split": [1, 5, 10], "classify__min_samples_leaf": [1, 5, 10]}
#    grid_search = GridSearchCV(myP, params, n_jobs=8)
#    return grid_search
#   return myP
    return (features, classifier)
Ejemplo n.º 9
0
def main2():
    extractor_opensmile = feature_extractor()
    extractor_praat     = praat_extractor()
    
    p_fralusopark = Process(target=run_one_dataset, args=(extractor_opensmile, extractor_praat, "fralusopark", paths.FRALUSOPARK_OUTPUT, ["CONTROLOS", "DOENTES"], paths.FRALUSOPARK_AUDIOS, ))
    p_fralusopark.start()

    p_gita = Process(target=run_one_dataset, args=(extractor_opensmile, extractor_praat, "gita", paths.GITA_OUTPUT, ["hc", "pd"], paths.GITA_AUDIOS, ))
    p_gita.start()

    p_mdvr_kcl = Process(target=run_one_dataset, args=(extractor_opensmile, extractor_praat, "mdvr_kcl", paths.MDVR_KCL_OUTPUT, ["HC", "PD"], paths.MDVR_KCL_AUDIOS, ))
    p_mdvr_kcl.start()

    p_fralusopark.join()
    p_gita.join()
    p_mdvr_kcl.join()
Ejemplo n.º 10
0
def main():
    print("Reading the valid pairs") 
    valid = data_io.read_valid_pairs()
    features = fe.feature_extractor()
    print("Transforming features")
    trans_valid = features.fit_transform(valid)
    trans_valid = np.nan_to_num(trans_valid)

    print("Saving Valid Features")
    data_io.save_valid_features(trans_valid)

    print("Loading the classifier")
    #(both_classifier, A_classifier, B_classifier, none_classifier) = data_io.load_model()
    classifier = data_io.load_model()

    print("Making predictions")
    valid_info = data_io.read_valid_info() 
    predictions = list()
    curr_pred = None
    """
    for i in range(len(trans_valid)):
      
      if valid_info["A type"][i] == "Numerical" and valid_info["B type"][i] == "Numerical":
        curr_pred = both_classifier.predict_proba(trans_valid[i, :])
      
      elif valid_info["A type"][i] == "Numerical" and valid_info["B type"][i] != "Numerical":
        curr_pred = A_classifier.predict_proba(trans_valid[i, :])
      
      elif valid_info["A type"][i] != "Numerical" and valid_info["B type"][i] == "Numerical":
        curr_pred = B_classifier.predict_proba(trans_valid[i, :])
     
      else:
        curr_pred = none_classifier.predict_proba(trans_valid[i, :])

      predictions.append(curr_pred[0][2] - curr_pred[0][0])
    """

    orig_predictions = classifier.predict_proba(trans_valid)
    predictions = orig_predictions[:, 2] - orig_predictions[:, 0]
    predictions = predictions.flatten()

    print("Writing predictions to file")
    data_io.write_submission(predictions)
Ejemplo n.º 11
0
def main():
    print("Reading the valid pairs")
    valid = data_io.read_valid_pairs()
    features = fe.feature_extractor()
    print("Transforming features")
    trans_valid = features.fit_transform(valid)
    trans_valid = np.nan_to_num(trans_valid)

    print("Saving Valid Features")
    data_io.save_valid_features(trans_valid)

    print("Loading the classifier")
    #(both_classifier, A_classifier, B_classifier, none_classifier) = data_io.load_model()
    classifier = data_io.load_model()

    print("Making predictions")
    valid_info = data_io.read_valid_info()
    predictions = list()
    curr_pred = None
    """
    for i in range(len(trans_valid)):
      
      if valid_info["A type"][i] == "Numerical" and valid_info["B type"][i] == "Numerical":
        curr_pred = both_classifier.predict_proba(trans_valid[i, :])
      
      elif valid_info["A type"][i] == "Numerical" and valid_info["B type"][i] != "Numerical":
        curr_pred = A_classifier.predict_proba(trans_valid[i, :])
      
      elif valid_info["A type"][i] != "Numerical" and valid_info["B type"][i] == "Numerical":
        curr_pred = B_classifier.predict_proba(trans_valid[i, :])
     
      else:
        curr_pred = none_classifier.predict_proba(trans_valid[i, :])

      predictions.append(curr_pred[0][2] - curr_pred[0][0])
    """

    orig_predictions = classifier.predict_proba(trans_valid)
    predictions = orig_predictions[:, 2] - orig_predictions[:, 0]
    predictions = predictions.flatten()

    print("Writing predictions to file")
    data_io.write_submission(predictions)
Ejemplo n.º 12
0
def main():
    print("Reading the valid pairs") 
    valid = data_io.read_valid_pairs()
    features = fe.feature_extractor()
    print("Transforming features")
    trans_valid = features.fit_transform(valid)
    trans_valid = np.nan_to_num(trans_valid)

    print("Saving Valid Features")
    data_io.save_features(trans_valid)

    print("Loading the classifier")
    classifier = data_io.load_model()

    print("Making predictions") 
    orig_predictions = classifier.predict_proba(trans_valid)
    predictions = orig_predictions[:, 2] - orig_predictions[:, 0]
    predictions = predictions.flatten()

    print("Writing predictions to file")
    data_io.write_submission(predictions)
import numpy as np
import unittest

from audio_preprocessor import audio_preprocessor
from feature_extractor import feature_extractor
from feature_extractor import feature_type
from feature_extractor import echonest_feature_type
from feature_extractor import statistic_type

USE_ECHONEST_DATASET = False
extractor = feature_extractor(USE_ECHONEST_DATASET)
processor = audio_preprocessor()


class TestExtractor(unittest.TestCase):
    ''' Test feature extractor class '''
    ''' Make sure to download the metadata folder '''
    ''' using setup_env.sh before running unit tests '''
    def test_get_training_dataset_info(self):
        ''' Get training dataset '''
        training_set_song_ids = extractor.get_training_dataset_song_ids()
        print('---------------------------------------------------------')
        print('Training dataset (total size: ' +
              str(len(training_set_song_ids)) + ')')
        print('---------------------------------------------------------\n')
        TestExtractor.print_dataset_info(training_set_song_ids, True)

    def test_get_validation_dataset_info(self):
        ''' Get validation dataset '''
        validation_set_song_ids = extractor.get_validation_dataset_song_ids()
        print('---------------------------------------------------------')
Ejemplo n.º 14
0
        p = plt.errorbar(n_init_range, inertia.mean(axis=1),
                         inertia.std(axis=1))
        plots.append(p[0])
        legends.append("%s with %s init" % (factory.__name__, init))

    plt.xlabel('n_init')
    plt.ylabel('inertia')
    plt.legend(plots, legends)
    plt.title("Mean inertia for various k-means init across %d runs" % n_runs)
    plt.show()


'''
Step 1: Read and Preprocess data 
'''
fe = feature_extractor.feature_extractor(use_echonest_dataset=False)
#scaler was used to scale each feature to range [0,1]
scaler = StandardScaler()
#pca was used to reduced feature dimensions
pca = PCA(n_components=20)

#get training data and apply scaler and pca
train_ids = np.asarray(fe.get_training_dataset_song_ids())
train_features, train_genres = read_data(
    fe, train_ids)  #read_echonest_data(fe, train_ids)
train_features = scaler.fit_transform(train_features)
train_features = pca.fit_transform(train_features)

#get validation data and apply scaler and pca
validation_ids = np.asarray(fe.get_validation_dataset_song_ids())
validation_features, validation_genres = read_data(
Ejemplo n.º 15
0
def extract_feature(apk_files_path, dst):
    samples_id_file = os.path.join(apk_files_path, 'samples_id.txt')
    fe = feature_extractor(apk_files_path, dst, samples_id_file)
    fe.get_features()
Ejemplo n.º 16
0
def main():
    extractor_opensmile = feature_extractor()

    run_one_dataset(extractor_opensmile, None, 'gita', '', [0,1], None)
    extractor_opensmile.merge([gemaps], ['name', 'frameTime'], "patient_complete.csv")
Ejemplo n.º 17
0
def main():
    t1 = time()
    print("Reading in the training data")
    train = data_io.read_train_pairs()
    train_info = data_io.read_train_info()
    train = combine_types(train, train_info)

    #make function later
    train = get_types(train)
    target = data_io.read_train_target()

    print "Reading SUP data..."
    for i in range(1,4):
      print "SUP", str(i)
      sup = data_io.read_sup_pairs(i)
      sup_info = data_io.read_sup_info(i)
      sup = combine_types(sup, sup_info)
      sup = get_types(sup)
      sup_target = data_io.read_sup_target(i)
      train_info = train_info.append(sup_info)
      train = train.append(sup)
      target = target.append(sup_target)

    # Old train
    print "Reading old train data..."
    old_train = data_io.read_old_train_pairs()
    old_train_info = data_io.read_old_train_info()
    old_train = combine_types(old_train, old_train_info)
    old_train = get_types(old_train)
    old_target = data_io.read_old_train_target()

    train = train.append(old_train)
    target = target.append(old_target)
    # End old train

    print "Train size = ", str(train.shape)
    print("Extracting features and training model")
    feature_trans = fe.feature_extractor()
    orig_train = feature_trans.fit_transform(train)
    orig_train = numpy.nan_to_num(orig_train) 

    classifier = classify_catagory(orig_train, target.Target)
    #(both_classifier, A_classifier, B_classifier, none_classifier) = create_classifiers(orig_train, target.Target, train_info)

    print("Saving features")
    data_io.save_features(orig_train)

    print("Saving the classifier")
    #data_io.save_model( (both_classifier, A_classifier, B_classifier, none_classifier) )
    data_io.save_model(classifier) 
 
    #features = [x[0] for x in classifier.steps[0][1].features ]

    #csv_fea = csv.writer(open('features.csv','wb'))
    #imp = sorted(zip(features, classifier.steps[1][1].feature_importances_), key=lambda tup: tup[1], reverse=True)
    #for fea in imp:
    #    print fea[0], fea[1]
    #    csv_fea.writerow([fea[0],fea[1]])


    t2 = time()
    t_diff = t2 - t1
    print "Time Taken (min):", round(t_diff/60,1)
Ejemplo n.º 18
0
def main3():
    extractor_opensmile = feature_extractor()
    extractor_praat     = praat_extractor()    
Ejemplo n.º 19
0
def main():
    t1 = time()
    print("Reading in the training data")
    train = data_io.read_train_pairs()
    train_info = data_io.read_train_info()
    train = combine_types(train, train_info)

    #make function later
    train = get_types(train)
    target = data_io.read_train_target()

    print "Reading SUP data..."
    for i in range(1, 4):
        print "SUP", str(i)
        sup = data_io.read_sup_pairs(i)
        sup_info = data_io.read_sup_info(i)
        sup = combine_types(sup, sup_info)
        sup = get_types(sup)
        sup_target = data_io.read_sup_target(i)
        train_info = train_info.append(sup_info)
        train = train.append(sup)
        target = target.append(sup_target)

    # Old train
    print "Reading old train data..."
    old_train = data_io.read_old_train_pairs()
    old_train_info = data_io.read_old_train_info()
    old_train = combine_types(old_train, old_train_info)
    old_train = get_types(old_train)
    old_target = data_io.read_old_train_target()

    train = train.append(old_train)
    target = target.append(old_target)
    # End old train

    print "Train size = ", str(train.shape)
    print("Extracting features and training model")
    feature_trans = fe.feature_extractor()
    orig_train = feature_trans.fit_transform(train)
    orig_train = numpy.nan_to_num(orig_train)

    classifier = classify_catagory(orig_train, target.Target)
    #(both_classifier, A_classifier, B_classifier, none_classifier) = create_classifiers(orig_train, target.Target, train_info)

    print("Saving features")
    data_io.save_features(orig_train)

    print("Saving the classifier")
    #data_io.save_model( (both_classifier, A_classifier, B_classifier, none_classifier) )
    data_io.save_model(classifier)

    #features = [x[0] for x in classifier.steps[0][1].features ]

    #csv_fea = csv.writer(open('features.csv','wb'))
    #imp = sorted(zip(features, classifier.steps[1][1].feature_importances_), key=lambda tup: tup[1], reverse=True)
    #for fea in imp:
    #    print fea[0], fea[1]
    #    csv_fea.writerow([fea[0],fea[1]])

    t2 = time()
    t_diff = t2 - t1
    print "Time Taken (min):", round(t_diff / 60, 1)
import time
import urllib
import json
from datetime import datetime

import feature_extractor

if __name__ == "__main__":
    # Initiate face detection module
    detector = dlib.get_frontal_face_detector()

    # Initiate facial landmark detector
    predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")

    # Initiate feature extractor
    fe = feature_extractor.feature_extractor()

    # Fetch left right eye id range
    left_eye_start_id, left_eye_end_id = face_utils.FACIAL_LANDMARKS_IDXS[
        "left_eye"]
    right_eye_start_id, right_eye_end_id = face_utils.FACIAL_LANDMARKS_IDXS[
        "right_eye"]

    # Open the camera
    try:
        stream = WebcamVideoStream(0).start()
    except:
        # Check if camera opened successfully
        print("Fail to access camera")
        raise
Ejemplo n.º 21
0
 def __init__(self, *args, **kwargs):
     super(test_feature_extractor, self).__init__(*args, **kwargs)
     self.fea_extractor = feature_extractor(
         '../samples_balanced/', 'features',
         '../samples_balanced/samples_id.txt')