def predict_pipeline(audio_fpath, model):
    import feature
    import os
    if type(audio_fpath) != str:
        audio_fpath = os.path.join(*audio_fpath)
    feats = feature.extract(audio_fpath, verbose=False)
    predictions = np.argmax(model.predict(feats), axis=1)
    return predictions
def predict_pipeline(audio_fpath, model, raw_prob=False):
    import feature
    import os
    if type(audio_fpath) != str:
        audio_fpath = os.path.join(*audio_fpath)
    feats = feature.extract(audio_fpath)
    predictions = model.predict(feats)
    if not raw_prob:
        predictions = np.argmax(predictions, axis=1)
    return predictions
예제 #3
0
def preprocess(sample_size=10,
               feature_path='data/feature_vectors',
               label_path='data/labels'):

    print('\n===== REMOVE OLD FILES =====\n')

    output_dirs = ["./data/feature_vectors/", "./data/labels/", 'model/']
    for path in output_dirs:
        filelist = [
            path + f for f in os.listdir(path)
            if f.endswith(".npy") or f.endswith('.pkl')
        ]
        for f in filelist:
            os.remove(f)
            print('Deleted: ' + f)

    print('\n===== FEATURE EXTRACTION =====\n')

    feature.extract(sample_size)

    print('\n===== BINARIZE LABELS =====\n')

    label.binarize(sample_size)

    print('\n===== LOADING FEATURE VECTORS AND LABELS =====\n')

    X = []
    y = []
    i = 0
    for filename in os.listdir(feature_path):
        if i == int(sample_size):
            break
        if os.path.isfile(label_path + '/' + filename):
            feature_vector = np.load(feature_path + '/' + filename)
            label_vector = np.load(label_path + '/' + filename)
            X.append(feature_vector.tolist())
            y.append(label_vector.tolist())
            i = i + 1
    X = np.matrix(X)
    y = np.matrix(y)

    return X, y
예제 #4
0
def getprob(filename):
    """
    Find the probality that a song belongs to each genre. 
    """
    x = feature.extract(filename)
    clf = cmpr
    prob = clf.predict_proba(x)[0]
    #prob = np.round(prob,decimals=-5)
    #dd = dict(zip(feature.getlabels(),prob))
    dd = dict(zip(['Classical','Hipop','Jass','Metal','Pop','Rock'],prob))
    print(prob)

    # max probablity 
    m = max(dd,key=dd.get)
    print(m, dd[m])

    sorted_genre = sorted(dd,key=dd.get,reverse=True)
    has_features_of = []
    for i in sorted_genre:
        if (dd[i] > 0.15 or dd[i] >= dd[m]) and len(has_features_of) < 3:
            has_features_of.append(i)


    return dd, has_features_of
예제 #5
0
def getGenre(filename):

    music_feature =  feature.extract(os.path.abspath(os.path.dirname(__name__)) \
        +'/django-jquery-file-upload/' +filename)
    clf = cmpr
    return clf.predict(music_feature)
예제 #6
0
파일: test.py 프로젝트: freelandy/comp_code
for i in range(1, persons + 1):
    for j in range(1, palms_per_person + 1):
        file_name = '{}\\{:0>4d}_{:0>2d}.bmp'.format(data_path, i, j)

        if os.path.exists(file_name):
            file_names.append(file_name)

# generate filters
filters = gabor.generate()

# distance matrix
dist_matrix = np.zeros([len(file_names), len(file_names)])

for i in range(0, len(file_names) - 1):
    im1 = Image.open(file_names[i])
    f1 = feature.extract(im1, filters)
    m1 = matcher.get_mask(im1)

    # start = time.clock()
    for j in range(i + 1, len(file_names)):
        im2 = Image.open(file_names[j])

        start = time.clock()

        f2 = feature.extract(im2, filters)
        m2 = matcher.get_mask(im2)

        d = matcher.compute_distance(f1, f2, m1, m2, 4)

        dist_matrix[i][j] = d
        dist_matrix[j][i] = dist_matrix[i][j]
예제 #7
0
def main():
    csv_file_object = csv.reader(open('csv/train.csv',
                                      'rb'))  # Load in the training csv file
    header = csv_file_object.next()  # Skip the fist line as it is a header
    train_data = []  # Creat a variable called 'train_data'
    for row in csv_file_object:  # Skip through each row in the csv file
        train_data.append(row)  # adding each row to the data variable
    train_data = np.array(train_data)  # Then convert from a list to an array

    train_data = feature.extract(train_data, "train")

    # I need to do the same with the test data now so that the columns are in the same
    # as the training data

    test_file_object = csv.reader(open('csv/test.csv',
                                       'rb'))  # Load in the test csv file
    header = test_file_object.next()  # Skip the fist line as it is a header
    test_data = []  # Creat a variable called 'test_data'
    for row in test_file_object:  # Skip through each row in the csv file
        test_data.append(row)  # adding each row to the data variable
    test_data = np.array(test_data)  # Then convert from a list to an array

    test_data = feature.extract(test_data, "test")

    # The data is now ready to go. So lets train then test!

    print 'Training'

    cv_data = np.array([row[1:] for row in train_data])
    cv_target = np.array([row[0] for row in train_data])

    forest = RandomForestClassifier(n_estimators=100)
    scores = cross_validation.cross_val_score(forest, cv_data, cv_target, cv=5)
    print "Cross Validation for random forest, scores: " + str(np.mean(scores))

    best_score = 0
    best_c = 0
    best_gamma = 0
    C_list = [0.1, 0.3, 1, 3, 10, 30, 100, 300, 1000]
    gamma_list = [2**-15, 2**-13, 2**-11, 2**-9, 2**-7, 2**-5, 2**-3, 2**-1]
    for c in C_list:
        for gamma in gamma_list:
            svm_clf = svm.SVC(C=c, gamma=gamma)
            scores = cross_validation.cross_val_score(svm_clf,
                                                      cv_data,
                                                      cv_target,
                                                      cv=5)
            score = np.mean(scores)
            print "Cross Validation for SVM, scores with C = %f gamma = %f: %f" % (
                c, gamma, score)
            if score > best_score:
                best_score = score
                best_c = c
                best_gamma = gamma

    print "Best svm score is %f, with c=%f and gamma=%f" % (best_score, best_c,
                                                            best_gamma)

    print 'Predicting'

    clf = svm.SVC(C=best_c, gamma=best_gamma)
    clf.fit(cv_data, cv_target)
    output = clf.predict(test_data)

    open_file_object = csv.writer(open("csv/submission.csv", "wb"))
    test_file_object = csv.reader(open('csv/test.csv',
                                       'rb'))  # Load in the csv file

    test_file_object.next()
    open_file_object.writerow([
        "survived", "pclass", "name", "sex", "age", "sibsp", "parch", "ticket",
        "fare", "cabin", "embarked"
    ])
    i = 0
    for row in test_file_object:
        row.insert(0, output[i].astype(np.uint8))
        open_file_object.writerow(row)
        i += 1

    print "Done"
예제 #8
0
# --encoding:utf-8--

import svm

# 一、模型训练
# 1. 交叉验证
# svm.cross_validation(data_percentage=0.9)

# 2. 根据交叉验证修改代码参数后进行模型训练并输出
# svm.fit_dump_model(train_percentage=0.9, fold=100)

# # 4. 模型调用进行预测
import feature
# path = "../data/test/孙燕姿 - 我也很想他 - 怀旧.mp3"
path = "../data/test/Maize - I Like You-浪漫.mp3"
# path = "../data/test/Lasse Lindh - Run To You.mp3" # 清新

music_feature = feature.extract(path)
clf = svm.load_model()
label = svm.fetch_predict_label(clf, music_feature)
print("预测标签为:%s" % label)
예제 #9
0
def main():
    csv_file_object = csv.reader(open('csv/train.csv', 'rb'))  # Load in the training csv file
    header = csv_file_object.next()  # Skip the fist line as it is a header
    train_data = []  # Creat a variable called 'train_data'
    for row in csv_file_object:  # Skip through each row in the csv file
        train_data.append(row)  # adding each row to the data variable
    train_data = np.array(train_data)  # Then convert from a list to an array
    
    train_data = feature.extract(train_data, "train")
    
    
    # I need to do the same with the test data now so that the columns are in the same
    # as the training data
    
    test_file_object = csv.reader(open('csv/test.csv', 'rb'))  # Load in the test csv file
    header = test_file_object.next()  # Skip the fist line as it is a header
    test_data = []  # Creat a variable called 'test_data'
    for row in test_file_object:  # Skip through each row in the csv file
        test_data.append(row)  # adding each row to the data variable
    test_data = np.array(test_data)  # Then convert from a list to an array
    
    test_data = feature.extract(test_data, "test")
    
    # The data is now ready to go. So lets train then test!
    
    print 'Training'
    
    cv_data = np.array([row[1:] for row in train_data])
    cv_target = np.array([row[0] for row in train_data])
    
    forest = RandomForestClassifier(n_estimators=100)
    scores = cross_validation.cross_val_score(forest, cv_data, cv_target, cv=5)    
    print "Cross Validation for random forest, scores: " + str(np.mean(scores))
    
    
    best_score = 0
    best_c = 0
    best_gamma = 0
    C_list = [0.1, 0.3, 1, 3, 10, 30, 100, 300, 1000]
    gamma_list = [2 ** -15, 2 ** -13, 2 ** -11, 2 ** -9, 2 ** -7, 2 ** -5, 2 ** -3, 2 ** -1]
    for c in C_list:
        for gamma in gamma_list:
            svm_clf = svm.SVC(C=c, gamma=gamma)
            scores = cross_validation.cross_val_score(svm_clf, cv_data, cv_target, cv=5)
            score = np.mean(scores)
            print "Cross Validation for SVM, scores with C = %f gamma = %f: %f" % (c, gamma, score)
            if score > best_score:
                best_score = score
                best_c = c
                best_gamma = gamma
    
    print "Best svm score is %f, with c=%f and gamma=%f" % (best_score, best_c, best_gamma)
    
    print 'Predicting'
    
    clf = svm.SVC(C=best_c, gamma=best_gamma)
    clf.fit(cv_data, cv_target)
    output = clf.predict(test_data)
    
    open_file_object = csv.writer(open("csv/submission.csv", "wb"))
    test_file_object = csv.reader(open('csv/test.csv', 'rb'))  # Load in the csv file

    
    test_file_object.next()
    open_file_object.writerow(["survived", "pclass", "name", "sex", "age", "sibsp", "parch", "ticket", "fare", "cabin", "embarked"])
    i = 0
    for row in test_file_object:
        row.insert(0, output[i].astype(np.uint8))
        open_file_object.writerow(row)
        i += 1
    
    print "Done"