def compute_one_clique(maindir,
                       cliques,
                       mu,
                       sd,
                       clique_id=0,
                       output="clique_vs_nonclique.pk"):
    """Computes the features for one clique, and N other tracks as 
        non_cliques."""
    X = dict()
    X["cliques"] = []
    X["non_cliques"] = []
    for tid in cliques[clique_id]:
        path = utils.path_from_tid(maindir, tid)
        feats = utils.extract_feats(path)
        if feats == None:
            continue
        x = np.zeros((feats.shape[0], feats.shape[1] / 2))
        for i, feat in enumerate(feats):
            x[i] = feat[450:]
        X["cliques"].append(x)

    N = len(cliques[clique_id])
    n = 0
    checked_cliques = []
    checked_cliques.append(clique_id)
    while n < N:
        idx = np.random.random_integers(0, len(cliques))
        if idx in checked_cliques:
            continue
        path = utils.path_from_tid(maindir, cliques[idx][0])
        feats = utils.extract_feats(path)
        if feats == None:
            continue
        x = np.zeros((feats.shape[0], feats.shape[1] / 2))
        for i, feat in enumerate(feats):
            x[i] = feat[450:]
        n += 1
        X["non_cliques"].append(x)

    feats = np.empty((0, 450))
    bounds = []
    for key in X:
        print key
        for x in X[key]:
            x = standardize(x, mu, sd)
            feats = np.concatenate((feats, x), axis=0)
            try:
                bounds.append(x.shape[0] + bounds[-1])
            except:
                bounds.append(x.shape[0])

    plt.imshow(feats, interpolation="nearest", aspect="auto")
    for bound in bounds:
        plt.axhline(bound, color="magenta", linewidth=2.0)
    plt.show()

    f = open(output, 'w')
    cPickle.dump(X, f)
    f.close()
def compute_one_clique(maindir, cliques, mu, sd, clique_id=0,
                       output="clique_vs_nonclique.pk"):
    """Computes the features for one clique, and N other tracks as 
        non_cliques."""
    X = dict()
    X["cliques"] = []
    X["non_cliques"] = []
    for tid in cliques[clique_id]:
        path = utils.path_from_tid(maindir, tid)
        feats = utils.extract_feats(path)
        if feats == None:
            continue
        x = np.zeros((feats.shape[0], feats.shape[1]/2))
        for i,feat in enumerate(feats):
            x[i] = feat[450:]
        X["cliques"].append(x)

    N = len(cliques[clique_id])
    n = 0
    checked_cliques = []
    checked_cliques.append(clique_id)
    while n < N:
        idx = np.random.random_integers(0,len(cliques))
        if idx in checked_cliques:
            continue
        path = utils.path_from_tid(maindir, cliques[idx][0])
        feats = utils.extract_feats(path)
        if feats == None:
            continue
        x = np.zeros((feats.shape[0], feats.shape[1]/2))
        for i,feat in enumerate(feats):
            x[i] = feat[450:]
        n += 1
        X["non_cliques"].append(x)

    feats = np.empty((0,450))
    bounds = []
    for key in X:
        print key
        for x in X[key]:
            x = standardize(x, mu, sd)
            feats = np.concatenate((feats, x), axis=0)
            try:
                bounds.append(x.shape[0] + bounds[-1])
            except:
                bounds.append(x.shape[0])

    
    plt.imshow(feats, interpolation="nearest", aspect="auto")
    for bound in bounds:
        plt.axhline(bound, color="magenta", linewidth=2.0)
    plt.show()

    f = open(output, 'w')
    cPickle.dump(X, f)
    f.close()
Beispiel #3
0
def main():
    """
        主函数
    """
    # 特征提取
    utils.extract_feats()

    # 读取提取的HOG特征,用于训练svm模型
    utils.train_svm()

    # 对指定目录中的图片进行行人检测
    utils.detect_person_in_path('./data/test_image')
def compute_codes_orig_it(track_ids, maindir, clique_ids, start_idx, end_idx):
    """Computes the original features, based on Thierry and Ellis, 2012.
    Dimensionality reduction using PCA of 50, 100, and 200 components."""
    res = []
    trainedpca = utils.load_pickle(
        "models/pca_250Kexamples_900dim_nocovers.pkl")
    pca_components = [50, 100, 200]

    # Init codes
    codes = []
    for n_comp in pca_components:
        codes.append(np.ones((end_idx - start_idx, n_comp)) * np.nan)

    for i, tid in enumerate(track_ids[start_idx:end_idx]):
        path = utils.path_from_tid(maindir, tid)
        feats = utils.extract_feats(path)
        if feats == None:
            continue
        med = np.median(feats, axis=0)
        for pca_idx, n_comp in enumerate(pca_components):
            tmp = dan_tools.chromnorm(med.reshape(med.shape[0], 1)).squeeze()
            codes[pca_idx][i] = trainedpca.apply_newdata(tmp, ndims=n_comp)
        if i % 1000 == 0:
            logger.info("Computed %d of %d track(s)" %
                        (i, end_idx - start_idx))
    res = (codes, track_ids[start_idx:end_idx], clique_ids[start_idx:end_idx])
    return res
def compute_original_feats(maindir, tracks, cliques, output="originalfeats.pk"):
    """Computes the original features."""
    X = []
    I = []
    cnt = 0
    k = 0
    for tid in tracks:
        path = utils.path_from_tid(maindir, tid)
        feats = utils.extract_feats(path)
        if feats == None:
            continue
        x = np.zeros(feats.shape)
        for i,feat in enumerate(feats):
            #feat = dan_tools.chromnorm(feat.reshape(feat.shape[0], 1)).squeeze()
            #feat = np.reshape(feat, (1,900))
            x[i] = feat
        X.append(x)

        for i, clique in enumerate(cliques):
            if tid in clique:
                idx = i
                break
        I.append(idx)

        if cnt % 50 == 0 and cnt != 0:
            print "---Computing features: %d of %d" % (cnt, len(tracks))
            f = open("/Volumes/Audio/SummaryCovers/originalfeats%d.pk"%k, 'w')
            cPickle.dump((X,I), f)
            f.close()
            k += 1
            X = []
        cnt += 1
def compute_codes_orig_it(track_ids, maindir, clique_ids, start_idx, end_idx):
    """Computes the original features, based on Thierry and Ellis, 2012.
    Dimensionality reduction using PCA of 50, 100, and 200 components."""
    res = []
    trainedpca = utils.load_pickle("models/pca_250Kexamples_900dim_nocovers.pkl")
    pca_components = [50,100,200]

    # Init codes
    codes = []
    for n_comp in pca_components:
        codes.append(np.ones((end_idx-start_idx,n_comp)) * np.nan)

    for i, tid in enumerate(track_ids[start_idx:end_idx]):
        path = utils.path_from_tid(maindir, tid)
        feats = utils.extract_feats(path)
        if feats == None:
            continue
        med = np.median(feats, axis=0)
        for pca_idx, n_comp in enumerate(pca_components):
            tmp = dan_tools.chromnorm(med.reshape(med.shape[0], 
                                    1)).squeeze()
            codes[pca_idx][i] = trainedpca.apply_newdata(tmp, ndims=n_comp)
        if i % 1000 == 0:
            logger.info("Computed %d of %d track(s)" % (i, end_idx-start_idx))
    res = (codes, track_ids[start_idx:end_idx], clique_ids[start_idx:end_idx])
    return res
Beispiel #7
0
def exp1():
    print 'extract features for experiment 1'
    with open('CCMR/CCMR_Twitter_t.txt') as f1, open(
            'CCMR/CCMR_Google_t.txt') as f2:
        twitter = json.load(f1)
        google = json.load(f2)

    # get split of train test
    cv_task = task_split(twitter)
    cv_event = event_split(twitter)

    print 'extract cpcl features'
    X, Y = extract_feats(twitter, google, embed='complete', agree='complete')

    X_fill = imputer(X)
    X_fill = standardize(X_fill)
    scores = []
    for i in xrange(10):
        p = pearsonr(X_fill[:, i], Y)
        print p
        scores.append(abs(p[0]))
    print 'average: ', np.average(scores)

    with open('CLCP/Twitter_CLCP_via_Google.pkl', 'wb') as f:
        pkl.dump(((X, Y), (cv_task, cv_event)), f)
def extract_feats(filename, td=None, lda_file=None, lda_n=0, ver=True):
    """Computes the features using the dictionary transformation td. 
        If it doesn't exist, computes them using Thierry's method.

     The improved pipeline is composed of 11 steps:

        1.- Beat Synchronous Chroma
        2.- L2-Norm
        3.- Shingle (PATCH_LEN: 75 x 12)
        4.- 2D-FFT
        5.- L2-Norm
        6.- Log-Scale
        7.- Sparse Coding
        8.- Shrinkage
        9.- Median Aggregation
        10.- Dimensionality Reduction
        11.- L2-Norm

    Original method by Thierry doesn't include steps 5,6,7,8,11.
     """
    # 1.- Beat Synchronous Chroma
    # 2.- L2-Norm
    # 3.- Shingle (PATCH_LEN: 75 x 12)
    # 4.- 2D-FFT
    feats = utils.extract_feats(filename)
    if feats is None:
        return None

    if td is not None:
        # 5.- L2-Norm
        # 6.- Log-Scale
        # 7.- Sparse Coding
        # 8.- Shrinkage
        H = td(feats)
    else:
        H = feats

    #. 9.- Median Aggregation
    H = np.median(H, axis=0)

    # Apply LDA if needed
    if lda_file is not None:
        # 10.- Dimensionality Reduction
        H = lda_file[lda_n].transform(H)

    # 11.- L2-Norm
    feats = dan_tools.chromnorm(H.reshape(H.shape[0], 1)).squeeze()

    return feats
def compute_codes_it(track_ids,
                     maindir,
                     d,
                     clique_ids,
                     start_idx,
                     end_idx,
                     origcodes=None,
                     norm=False):
    """Computes the features based on Humphrey, Nieto and Bello, 2013.
    Dimensionality reduction using LDA of 50, 100, and 200 components."""
    fx = load_transform(d)
    res = []
    K = int(d.split("_")[1].split("E")[1])

    # Init codes
    codes = []
    if lda is not None:
        lda_components = [50, 100, 200]
        for n_comp in lda_components:
            codes.append(np.ones((end_idx - start_idx, n_comp)) * np.nan)
    else:
        codes.append(np.ones((end_idx - start_idx, K)) * np.nan)

    for i, tid in enumerate(track_ids[start_idx:end_idx]):
        if origcodes is None:
            path = utils.path_from_tid(maindir, tid)
            feats = utils.extract_feats(path)
            if feats == None:
                continue
            code = np.median(fx(feats), axis=0)
        else:
            code = origcodes[i]
        if norm:
            code = dan_tools.chromnorm(code.reshape(code.shape[0],
                                                    1)).squeeze()
        if pca is not None:
            code = pca.transform(code)
        if lda is not None:
            for lda_idx, n_comp in enumerate(lda_components):
                tmp = lda[lda_idx].transform(code)
                codes[lda_idx][i] = dan_tools.chromnorm(
                    tmp.reshape(tmp.shape[0], 1)).squeeze()
        else:
            codes[0][i] = code
        if i % 1000 == 0:
            logger.info("Computed %d of %d track(s)" %
                        (i, end_idx - start_idx))
    res = (codes, track_ids[start_idx:end_idx], clique_ids[start_idx:end_idx])
    return res
def extract_feats(filename, td=None, lda_file=None, lda_n=0, ver=True):
    """Computes the features using the dictionary transformation td. 
        If it doesn't exist, computes them using Thierry's method.

     The improved pipeline is composed of 11 steps:

        1.- Beat Synchronous Chroma
        2.- L2-Norm
        3.- Shingle (PATCH_LEN: 75 x 12)
        4.- 2D-FFT
        5.- L2-Norm
        6.- Log-Scale
        7.- Sparse Coding
        8.- Shrinkage
        9.- Median Aggregation
        10.- Dimensionality Reduction
        11.- L2-Norm

    Original method by Thierry doesn't include steps 5,6,7,8,11.
     """
    # 1.- Beat Synchronous Chroma
    # 2.- L2-Norm
    # 3.- Shingle (PATCH_LEN: 75 x 12)
    # 4.- 2D-FFT
    feats = utils.extract_feats(filename)
    if feats is None:
        return None

    if td is not None:
        # 5.- L2-Norm
        # 6.- Log-Scale
        # 7.- Sparse Coding
        # 8.- Shrinkage
        H = td(feats)
    else:
        H = feats

    # . 9.- Median Aggregation
    H = np.median(H, axis=0)

    # Apply LDA if needed
    if lda_file is not None:
        # 10.- Dimensionality Reduction
        H = lda_file[lda_n].transform(H)

    # 11.- L2-Norm
    feats = dan_tools.chromnorm(H.reshape(H.shape[0], 1)).squeeze()

    return feats
def compute_codes_it(track_ids, maindir, d, clique_ids, 
        start_idx, end_idx, origcodes=None, norm=False):
    """Computes the features based on Humphrey, Nieto and Bello, 2013.
    Dimensionality reduction using LDA of 50, 100, and 200 components."""
    fx = load_transform(d)
    res = []
    K = int(d.split("_")[1].split("E")[1])

    # Init codes
    codes = []
    if lda is not None:
        lda_components = [50,100,200]
        for n_comp in lda_components:
            codes.append(np.ones((end_idx-start_idx,n_comp)) * np.nan)
    else:
        codes.append(np.ones((end_idx-start_idx, K)) * np.nan)

    for i, tid in enumerate(track_ids[start_idx:end_idx]):
        if origcodes is None:
            path = utils.path_from_tid(maindir, tid)
            feats = utils.extract_feats(path)
            if feats == None:
                continue
            code = np.median(fx(feats), axis=0)
        else:
            code = origcodes[i]
        if norm:
            code = dan_tools.chromnorm(code.reshape(code.shape[0], 
                                        1)).squeeze()
        if pca is not None:
            code = pca.transform(code)
        if lda is not None:
            for lda_idx, n_comp in enumerate(lda_components):
                tmp = lda[lda_idx].transform(code)
                codes[lda_idx][i] = dan_tools.chromnorm(tmp.reshape(tmp.shape[0], 
                                        1)).squeeze()
        else:
            codes[0][i] = code 
        if i % 1000 == 0:
            logger.info("Computed %d of %d track(s)" % (i, end_idx-start_idx))
    res = (codes, track_ids[start_idx:end_idx], clique_ids[start_idx:end_idx])
    return res
def compute_N_cliques(maindir, cliques, N=10, output="cliques.pk"):
    """Computes the features for N cliques."""
    X = []
    clique_ids = []
    for i in xrange(N):
        clique_id = random.randint(0, len(cliques) - 1)
        while clique_id in clique_ids:
            clique_id = random.randint(0, len(cliques) - 1)
        clique_ids.append(clique_id)
        x = []
        for tid in cliques[clique_id]:
            path = utils.path_from_tid(maindir, tid)
            feats = utils.extract_feats(path)
            if feats == None:
                continue
            x.append(feats)
        X.append(x)

    f = open(output, 'w')
    cPickle.dump(X, f)
    f.close()
def compute_N_cliques(maindir, cliques, N=10, output="cliques.pk"):
    """Computes the features for N cliques."""
    X = []
    clique_ids = []
    for i in xrange(N):
        clique_id = random.randint(0, len(cliques)-1)
        while clique_id in clique_ids:
            clique_id = random.randint(0, len(cliques)-1)
        clique_ids.append(clique_id)
        x =[]
        for tid in cliques[clique_id]:
            path = utils.path_from_tid(maindir, tid)
            feats = utils.extract_feats(path)
            if feats == None:
                continue
            x.append(feats)
        X.append(x)
    
    f = open(output, 'w')
    cPickle.dump(X, f)
    f.close()
Beispiel #14
0
def exp3():
    print 'extract features for experiment 3'
    with open('CCMR/CCMR_Baidu_t.txt') as f1, open(
            'CCMR/CCMR_Google_t.txt') as f2:
        baidu = json.load(f1)
        google = json.load(f2)

    # filter others rumors in Baidu
    baidu_p = []
    for elem in baidu:
        if elem['label'] in [0, 1]:
            baidu_p.append(elem)

    # get split of train test
    cv_task = task_split(baidu_p)
    cv_event = event_split(baidu_p)

    print 'extract cpcl features'
    X, Y = extract_feats(baidu_p, google)

    with open('CLCP/Baidu_CLCP_via_Google.pkl', 'wb') as f:
        pkl.dump(((X, Y), (cv_task, cv_event)), f)
def compute_original_feats(maindir,
                           tracks,
                           cliques,
                           output="originalfeats.pk"):
    """Computes the original features."""
    X = []
    I = []
    cnt = 0
    k = 0
    for tid in tracks:
        path = utils.path_from_tid(maindir, tid)
        feats = utils.extract_feats(path)
        if feats == None:
            continue
        x = np.zeros(feats.shape)
        for i, feat in enumerate(feats):
            #feat = dan_tools.chromnorm(feat.reshape(feat.shape[0], 1)).squeeze()
            #feat = np.reshape(feat, (1,900))
            x[i] = feat
        X.append(x)

        for i, clique in enumerate(cliques):
            if tid in clique:
                idx = i
                break
        I.append(idx)

        if cnt % 50 == 0 and cnt != 0:
            print "---Computing features: %d of %d" % (cnt, len(tracks))
            f = open("/Volumes/Audio/SummaryCovers/originalfeats%d.pk" % k,
                     'w')
            cPickle.dump((X, I), f)
            f.close()
            k += 1
            X = []
        cnt += 1
Beispiel #16
0
def compute_feats(track_ids,
                  maindir,
                  d,
                  lda_file=None,
                  lda_n=0,
                  codes=None,
                  ver=True,
                  pca="",
                  pca_n=0):
    """Computes the features using the dictionary d. If it doesn't exist, 
     computes them using Thierry's method.

     The improved pipeline is composed of 11 steps:

        1.- Beat Synchronous Chroma
        2.- L2-Norm
        3.- Shingle (PATCH_LEN: 75 x 12)
        4.- 2D-FFT
        5.- L2-Norm
        6.- Log-Scale
        7.- Sparse Coding
        8.- Shrinkage
        9.- Median Aggregation
        10.- Dimensionality Reduction
        11.- L2-Norm

    Original method by Thierry doesn't include steps 5,6,7,8,11.
     """
    if d != "":
        fx = load_transform(d)
        K = int(d.split("_")[1].split("E")[1])
    else:
        K = PATCH_LEN

    if codes is None:
        compute_codes = True
        codes = np.ones((len(track_ids), K)) * np.nan
    else:
        compute_codes = False
        K = codes[0].shape[0]
    if lda_file is not None:
        if lda_n == 0: n_comp = 50
        elif lda_n == 1: n_comp = 100
        elif lda_n == 2: n_comp = 200
    else:
        n_comp = K

    if pca != "":
        pca = utils.load_pickle(pca)
        pca = pca[pca_n]

    final_feats = np.ones((codes.shape[0], n_comp)) * np.nan
    orig_feats = []
    for cnt, tid in enumerate(track_ids):
        if compute_codes:
            path = utils.path_from_tid(maindir, tid)

            # 1.- Beat Synchronous Chroma
            # 2.- L2-Norm
            # 3.- Shingle (PATCH_LEN: 75 x 12)
            # 4.- 2D-FFT
            feats = utils.extract_feats(path)
            #orig_feats.append(feats)    # Store orig feats
            if feats == None:
                continue

            if d != "":
                # 5.- L2-Norm
                # 6.- Log-Scale
                # 7.- Sparse Coding
                # 8.- Shrinkage
                H = fx(feats)
            else:
                H = feats
            #. 9.- Median Aggregation
            H = np.median(H, axis=0)
        else:
            H = codes[cnt]

        if compute_codes:
            codes[cnt] = H.copy()

        if pca != "":
            H = pca.transform(H)

        # Apply LDA if needed
        if lda_file is not None:
            #H = dan_tools.chromnorm(H.reshape(H.shape[0], 1)).squeeze()
            # 10.- Dimensionality Reduction
            H = lda_file[lda_n].transform(H)

        # 11.- L2-Norm
        final_feats[cnt] = dan_tools.chromnorm(H.reshape(H.shape[0],
                                                         1)).squeeze()

        if ver:
            if cnt % 50 == 1:
                logger.info("----Computing features %.1f%%" % \
                            (cnt/float(len(track_ids)) * 100))

    if d == "":
        d = "orig"  # For saving purposes

    # Save codes
    utils.create_dir("results")
    if compute_codes:
        utils.save_pickle(codes,
                          "results/codes-" + os.path.basename(d) + ".pk")

    # Save features
    #utils.save_pickle(orig_feats, "results/feats-" + os.path.basename(d) + ".pk")

    logger.info("Features Computed")
    return final_feats
Beispiel #17
0
def main():
    """
        主函数
    """
    # 加载数据
    print('加载训练数据...')
    X_train, y_train = utils.load_fashion_mnist_dataset(config.train_data_file)

    print('加载测试数据...')
    X_test, y_test = utils.load_fashion_mnist_dataset(config.test_data_file)

    # 随机查看9张图像
    utils.plot_random_samples(X_train)

    # 特征提取
    print('训练数据特征提取...')
    feats_train = utils.extract_feats(X_train)
    print('测试数据特征提取...')
    feats_test = utils.extract_feats(X_test)

    # 特征归一化处理
    proc_feats_train, proc_feats_test = utils.do_feature_engineering(
        feats_train, feats_test)

    # 数据建模及验证
    print('\n===================== 数据建模及验证 =====================')

    if IS_SIMPLE_EXP:
        # 耗时比较短
        print('简单的Logistic Regression分类:')
        lr = LogisticRegression()
        lr.fit(proc_feats_train, y_train)
        print('测试准确率:{:.3f}'.format(lr.score(proc_feats_test,
                                             y_test)))  # 结果为:0.8289

    else:
        # 耗时比较长
        print('多个模型交叉验证分类比较:')
        model_name_param_dict = {
            'kNN': (KNeighborsClassifier(), {
                'n_neighbors': [5, 25, 55]
            }),
            'LR': (LogisticRegression(), {
                'C': [0.01, 1, 100]
            }),
            'SVM': (SVC(kernel='linear'), {
                'C': [0.01, 1, 100]
            }),
            'DT': (DecisionTreeClassifier(), {
                'max_depth': [50, 100, 150]
            }),
            'AdaBoost': (AdaBoostClassifier(), {
                'n_estimators': [100, 150, 200]
            }),
            'GBDT': (GradientBoostingClassifier(), {
                'learning_rate': [0.01, 1, 100]
            }),
            'RF': (RandomForestClassifier(), {
                'n_estimators': [100, 150, 200]
            })
        }

        # 比较结果的DataFrame
        results_df = pd.DataFrame(columns=['Accuracy (%)', 'Time (s)'],
                                  index=list(model_name_param_dict.keys()))
        results_df.index.name = 'Model'

        for model_name, (model, param_range) in model_name_param_dict.items():
            best_clf, best_acc, mean_duration = train_model(
                proc_feats_train, y_train, proc_feats_test, y_test, model_name,
                model, param_range)
            results_df.loc[model_name, 'Accuracy (%)'] = best_acc * 100
            results_df.loc[model_name, 'Time (s)'] = mean_duration

        results_df.to_csv(
            os.path.join(config.output_path, 'model_comparison.csv'))

        # 模型及结果比较
        print('\n===================== 模型及结果比较 =====================')

        plt.figure(figsize=(10, 4))
        ax1 = plt.subplot(1, 2, 1)
        results_df.plot(y=['Accuracy (%)'],
                        kind='bar',
                        ylim=[50, 100],
                        ax=ax1,
                        title='Accuracy(%)',
                        legend=False)

        ax2 = plt.subplot(1, 2, 2)
        results_df.plot(y=['Time (s)'],
                        kind='bar',
                        ax=ax2,
                        title='Time (s)',
                        legend=False)
        plt.tight_layout()
        plt.savefig(os.path.join(config.output_path, 'pred_results.png'))
        plt.show()
def compute_feats(track_ids, maindir, d, lda_file=None, lda_n=0, codes=None, 
        ver=True, pca="", pca_n=0):
    """Computes the features using the dictionary d. If it doesn't exist, 
     computes them using Thierry's method.

     The improved pipeline is composed of 11 steps:

        1.- Beat Synchronous Chroma
        2.- L2-Norm
        3.- Shingle (PATCH_LEN: 75 x 12)
        4.- 2D-FFT
        5.- L2-Norm
        6.- Log-Scale
        7.- Sparse Coding
        8.- Shrinkage
        9.- Median Aggregation
        10.- Dimensionality Reduction
        11.- L2-Norm

    Original method by Thierry doesn't include steps 5,6,7,8,11.
     """
    if d != "":
        fx = load_transform(d)
        K = int(d.split("_")[1].split("E")[1])
    else:
        K = PATCH_LEN
    
    if codes is None:
        compute_codes = True
        codes = np.ones((len(track_ids),K)) * np.nan
    else:
        compute_codes = False
        K = codes[0].shape[0]
    if lda_file is not None:
        if lda_n == 0: n_comp = 50
        elif lda_n == 1: n_comp = 100
        elif lda_n == 2: n_comp = 200
    else:
        n_comp = K 

    if pca != "":
        pca = utils.load_pickle(pca)
        pca = pca[pca_n]

    final_feats = np.ones((codes.shape[0],n_comp)) * np.nan
    orig_feats = []
    for cnt, tid in enumerate(track_ids):
        if compute_codes:
            path = utils.path_from_tid(maindir, tid)

            # 1.- Beat Synchronous Chroma
            # 2.- L2-Norm
            # 3.- Shingle (PATCH_LEN: 75 x 12)
            # 4.- 2D-FFT
            feats = utils.extract_feats(path)
            #orig_feats.append(feats)    # Store orig feats
            if feats == None:
                continue
            
            if d != "":
                # 5.- L2-Norm
                # 6.- Log-Scale
                # 7.- Sparse Coding
                # 8.- Shrinkage
                H = fx(feats)
            else:
                H = feats
            #. 9.- Median Aggregation
            H = np.median(H, axis=0)
        else:
            H = codes[cnt]

        if compute_codes:
            codes[cnt] = H.copy()

        if pca != "":
            H = pca.transform(H)

        # Apply LDA if needed
        if lda_file is not None:
            #H = dan_tools.chromnorm(H.reshape(H.shape[0], 1)).squeeze()
            # 10.- Dimensionality Reduction
            H = lda_file[lda_n].transform(H)

        # 11.- L2-Norm
        final_feats[cnt] = dan_tools.chromnorm(H.reshape(H.shape[0], 1)).squeeze()

        if ver:
            if cnt % 50 == 1:
                logger.info("----Computing features %.1f%%" % \
                            (cnt/float(len(track_ids)) * 100))

    if d == "":
        d = "orig" # For saving purposes
    
    # Save codes
    utils.create_dir("results")
    if compute_codes:
        utils.save_pickle(codes, "results/codes-" + os.path.basename(d) + ".pk")

    # Save features
    #utils.save_pickle(orig_feats, "results/feats-" + os.path.basename(d) + ".pk")

    logger.info("Features Computed")
    return final_feats