Ejemplo n.º 1
0
def plotDataset(X, Y, name, img=False, normalizeCols=False,
	maxPerClass=np.inf, transforms=None):
	print("plotting dataset: " + name + "...")

	if normalizeCols:
		X = meanNormalizeCols(X)

	if img:
		imgExamples(X, Y, maxPerClass, transforms)
		plt.suptitle(name)
	else:
		plotExamples(X, Y, maxPerClass, normalize, transforms)
		plt.title(name)

	suffix = ""
	subdir = ""
	if img:
		suffix += "_img"
		subdir += "_img"
	if normalizeCols:
		suffix += "_colnormalized"
		subdir += "_colnormalized"
	if maxPerClass < np.inf:
		suffix += "_" + str(maxPerClass)
		subdir += "_" + str(maxPerClass)
	if transforms:
		for transform in transforms:
			name = '_' + str(transform.__name__)
			suffix += name
			subdir += name
	saveCurrentPlot(name, suffix=suffix, subdir=subdir)
Ejemplo n.º 2
0
def plotDataset(X,
                Y,
                name,
                img=False,
                normalizeCols=False,
                maxPerClass=np.inf,
                transforms=None):
    print("plotting dataset: " + name + "...")

    if normalizeCols:
        X = meanNormalizeCols(X)

    if img:
        imgExamples(X, Y, maxPerClass, transforms)
        plt.suptitle(name)
    else:
        plotExamples(X, Y, maxPerClass, normalize, transforms)
        plt.title(name)

    suffix = ""
    subdir = ""
    if img:
        suffix += "_img"
        subdir += "_img"
    if normalizeCols:
        suffix += "_colnormalized"
        subdir += "_colnormalized"
    if maxPerClass < np.inf:
        suffix += "_" + str(maxPerClass)
        subdir += "_" + str(maxPerClass)
    if transforms:
        for transform in transforms:
            name = '_' + str(transform.__name__)
            suffix += name
            subdir += name
    saveCurrentPlot(name, suffix=suffix, subdir=subdir)
Ejemplo n.º 3
0
def showClusters(name, X, Y, ks, lengths, clusterFactory=makeKMeans, stride=1, normSubSeqs=True):
    print("showing clusters for dataset %s" % name)
    plt.figure(figsize=(17, 9.5))
    plt.rcParams["font.size"] = 18

    plotNum = 0
    plotRows = len(ks)
    plotCols = len(lengths)

    n = max(max(lengths), 64) # if we want k > 64, ts must be at least this long
    n = min(n, X.shape[1])  # can't be longer than ts

    print("Resampling and normalizing...")
    X2 = resampleToLengthN(X, n)
    X2 = zNormalizeRows(X2)

    print("Fitting and plotting...")
    for i, k in enumerate(ks):
        for j, l in enumerate(lengths):
            plotNum += 1
            if l > n:   # window longer than whole sequence
                continue

            subseqs = window.sliding_windows_of_rows(X2, l, stride)
            if normSubSeqs:
                subseqs = zNormalizeRows(subseqs)
            clusterer = clusterFactory(subseqs, k)
            print subseqs.shape
            print subseqs
            clusterer.fit(subseqs)

            # cluster centers are the actual means
            if hasattr(clusterer, 'cluster_centers_'):
                centers = clusterer.cluster_centers_
            else:
                lbls = clusterer.labels_
                # grouped = munge.groupXbyY(subseqs, lbls)
                grouped = groupXbyY(subseqs, lbls)
                centers = map(lambda rows: rows.mean(axis=0), grouped)

            plt.subplot(plotRows, plotCols, plotNum)
            for center in centers:
                plt.plot(np.arange(len(center)), center)

            plt.xticks(())
            plt.yticks(())
            plt.tight_layout()

            if i == 0:
                plt.title("Window length = %d" % l)
            if j == 0:
                plt.ylabel("K = %d" % k)

    algoName = clusterer.__class__.__name__
    if normSubSeqs:
        algoName += '-Normalized'
    plt.suptitle("Clusters in %s Dataset using %s" % (name, algoName))
    plt.tight_layout()
    plt.subplots_adjust(left=.03, right=.97, bottom=.01, top=.9, wspace=.02,
                        hspace=.01)

    # all algos for each dataset in one dir, as well as subdir for each algo
    saveCurrentPlot("%s_%s.png" % (name, algoName), subdir='cluster')
    saveCurrentPlot("%s_%s.png" % (name, algoName),
                    subdir=os.path.join('cluster', algoName))
    plt.close()
Ejemplo n.º 4
0
def showClusters(name,
                 X,
                 Y,
                 ks,
                 lengths,
                 clusterFactory=makeKMeans,
                 stride=1,
                 normSubSeqs=True):
    print("showing clusters for dataset %s" % name)
    plt.figure(figsize=(17, 9.5))
    plt.rcParams["font.size"] = 18

    plotNum = 0
    plotRows = len(ks)
    plotCols = len(lengths)

    n = max(max(lengths),
            64)  # if we want k > 64, ts must be at least this long
    n = min(n, X.shape[1])  # can't be longer than ts

    print("Resampling and normalizing...")
    X2 = resampleToLengthN(X, n)
    X2 = zNormalizeRows(X2)

    print("Fitting and plotting...")
    for i, k in enumerate(ks):
        for j, l in enumerate(lengths):
            plotNum += 1
            if l > n:  # window longer than whole sequence
                continue

            subseqs = window.sliding_windows_of_rows(X2, l, stride)
            if normSubSeqs:
                subseqs = zNormalizeRows(subseqs)
            clusterer = clusterFactory(subseqs, k)
            print subseqs.shape
            print subseqs
            clusterer.fit(subseqs)

            # cluster centers are the actual means
            if hasattr(clusterer, 'cluster_centers_'):
                centers = clusterer.cluster_centers_
            else:
                lbls = clusterer.labels_
                # grouped = munge.groupXbyY(subseqs, lbls)
                grouped = groupXbyY(subseqs, lbls)
                centers = map(lambda rows: rows.mean(axis=0), grouped)

            plt.subplot(plotRows, plotCols, plotNum)
            for center in centers:
                plt.plot(np.arange(len(center)), center)

            plt.xticks(())
            plt.yticks(())
            plt.tight_layout()

            if i == 0:
                plt.title("Window length = %d" % l)
            if j == 0:
                plt.ylabel("K = %d" % k)

    algoName = clusterer.__class__.__name__
    if normSubSeqs:
        algoName += '-Normalized'
    plt.suptitle("Clusters in %s Dataset using %s" % (name, algoName))
    plt.tight_layout()
    plt.subplots_adjust(left=.03,
                        right=.97,
                        bottom=.01,
                        top=.9,
                        wspace=.02,
                        hspace=.01)

    # all algos for each dataset in one dir, as well as subdir for each algo
    saveCurrentPlot("%s_%s.png" % (name, algoName), subdir='cluster')
    saveCurrentPlot("%s_%s.png" % (name, algoName),
                    subdir=os.path.join('cluster', algoName))
    plt.close()