dendrogram(links)
    knn(meta[liwc].T, labels=liwc)
    knn(meta[liwc], labels=meta['Name of Work'].values)
    '''K-means'''
    k = KMeans(n_clusters=5)  # 5 is at an elbow for sse in 2-d
    km = k.fit_transform(truncatedFeatures)

    '''PCA'''
    pca, X_pca, k, km = kcluster(justDFeatures, n_clusters=8)
    print features.columns[np.argsort(pca.components_[0])[:100]]

#    plt.savefig("scree.png", dpi= 100)

    pca = decomposition.PCA(n_components=2)
    X_pca = pca.fit_transform(X_centered)
    plot_embedding(X_pca, y)
    k.plot_k_sse(X_pca)  # for 2 components 5 clusters

    ''' Supervised Learning'''
    # Logistic Regression and Random Forest seem to perform the best
    # Nonfiction seems unpredictable, while fiction, letters and poetry
    # are somewhat predictabe
    for genre in set(meta.Genre):
        df = meta[meta.Genre == genre].reset_index()
        if len(df) > 20:
            y = df.pop('deprivation')
            print genre, 'Logit'
            p.plot_roc(df[liwc].fillna(0), y, LogisticRegression)
            print genre, 'Random Forest'
            p.plot_roc(df[liwc].fillna(0), y, RandomForestClassifier)