colors = datainfo.colors #f = h5py.File(datainfo.dpath + datainfo.name + ext + '.hdf5', 'r+') for s in datainfo.sensors: print s mdist = np.zeros((len(datainfo.datafiles), len(datainfo.datafiles))) print mdist.shape for i, rfile in enumerate(datainfo.datafiles): rfile = rfile rlabels = compute_data_labels(rfile, rfile, s) rcnt = Counter(list(rlabels)) rhisto = [rcnt[i] for i in range(len(rcnt))] for j, dfile in enumerate(datainfo.datafiles): dlabels = compute_data_labels(rfile, dfile, s) cnt = Counter(list(dlabels)) histo = [cnt[i] for i in range(len(cnt))] mdist[i,j] = hellinger_distance(rhisto, histo) #print rfile, dfile, hellinger_distance(rhisto, histo) for i in range(mdist.shape[0]): for j in range(mdist.shape[1]): tmp = (mdist[i, j] + mdist[j, i])/2 mdist[i, j] = mdist[j,i] = tmp transf = MDS(n_components=2, dissimilarity='precomputed', n_jobs=-1) #transf = TSNE(n_components=2, metric='precomputed') mres = transf.fit_transform(mdist) print transf.stress_ fig = plt.figure() #ax = fig.gca(projection='3d') #plt.scatter(mres[:, 0], mres[:, 1], zs=mres[:, 2], c=colors, s=100) plt.scatter(mres[:, 0], mres[:, 1], c=colors, s=100)
if args.pairs: counts_model = estimate_frequency_pairs_model(sequences[0:estsize], nclusters, laplace=laplace) else: counts_model = estimate_frequency_model(sequences[0:estsize], nclusters, laplace=laplace) for i in range(0, len(sequences)-stepsize, stepsize): tk += 1 if args.pairs: counts_model_ahead = estimate_frequency_pairs_model(sequences[i:i + estsize], nclusters, laplace=laplace) else: counts_model_ahead = estimate_frequency_model(sequences[i:i + estsize], nclusters, laplace=laplace) #diff = jensen_shannon_divergence(counts_model/sum(counts_model), counts_model_ahead/sum(counts_model_ahead)) diff = hellinger_distance(counts_model/sum(counts_model), counts_model_ahead/sum(counts_model_ahead)) lldiff.append(diff) if diff>tolerance: counts_model = counts_model_ahead.copy() llmark.append(tolerance*1.2) else: llmark.append(0) if i > seqend[0]: seqend.pop(0) tkpos.append(tk) tkpos.insert(0, 0) sp1 = fig.add_subplot(nfil, ncol, nfig) sp1.axis([0, len(lldiff), 0, tolerance*1.2]) plt.title(sensor)
histo = np.zeros(nclusters) for i in labels: histo[i] += 1.0 histo /= len(labels) lhisto.append(histo) df = pd.DataFrame(np.array(lhisto), index=datainfo.expnames, columns=['class-%d'%i for i in range(1, nclusters+1)]) if args.hellinger: lrms = [] lhell = [] for h, nphase in zip(lhisto, datainfo.expnames): rms = np.dot(lhisto[0] - h, lhisto[0] - h) rms /= h.shape[0] lhell.append(hellinger_distance(h, lhisto[0])) lrms.append(np.sqrt(rms)) df['Hellinger'] = lhell df['RMS'] = lrms rfile = open(datainfo.dpath + '/' + datainfo.name + '/Results/cluster-histo-' + dfile + '-' + sensor + '-' + str(nclusters) + '-' + ext + '.txt', 'w') rfile.write(df.to_string(line_width=200)) rfile.close() matplotlib.rcParams.update({'font.size': 30}) fig = plt.figure() ax = fig.add_subplot(2, 1, 1) fig.set_figwidth(30) fig.set_figheight(20)
print data.shape lhisto = [] for dataf, ndata in zip(ldata, datainfo.datafiles): histo = np.zeros(nclusters) for i in range(dataf.shape[0]): histo[km.predict(dataf[i])] += 1.0 histo /= dataf.shape[0] print datainfo.name, ndata print histo lhisto.append(histo) for h in lhisto[1:]: rms = np.dot(lhisto[0] - h, lhisto[0] - h) rms /= h.shape[0] print np.sqrt(rms), hellinger_distance(h, lhisto[0]) fig, ax = plt.subplots() fig.set_figwidth(30) fig.set_figheight(40) ind = np.arange(nclusters) # the x locations for the groups width = 0.10 # the width of the bars ax.set_xticks(ind+width) ax.set_xticklabels( ind ) for i, h in enumerate(lhisto): rects = ax.bar(ind+(i*width), h, width, color=colors[i]) fig.suptitle(datainfo.name + '-' + s + ext, fontsize=48) fig.savefig(datainfo.dpath+'/Results/' + datainfo.name + '-' + s + ext + '-histo.pdf', orientation='landscape', format='pdf') # plt.show()
histo[km.predict(dataf[i])] += 1.0 histo /= dataf.shape[0] # print(datainfo.name, ndata) # print('HISTO ', histo) histosorted = np.zeros(nclusters) for i in range(histosorted.shape[0]): histosorted[i] = histo[lmax[i][0]] else: histosorted = np.zeros(nclusters) lhisto.append(histosorted) if args.hellinger: for h in lhisto[1:]: rms = np.dot(lhisto[0] - h, lhisto[0] - h) rms /= h.shape[0] print(np.sqrt(rms), hellinger_distance(h, lhisto[0])) matplotlib.rcParams.update({'font.size': 30}) fig = plt.figure() ax = fig.add_subplot(2, 1, 1) fig.set_figwidth(60) fig.set_figheight(40) ind = np.arange(nclusters) # the x locations for the groups width = 1.0/(len(lhisto)+1) # the width of the bars ax.set_xticks(ind+width) ax.set_xticklabels(ind) for i, h in enumerate(lhisto): rects = ax.bar(ind+(i*width), h, width, color=colors[i]) fig.suptitle(datainfo.name + '-' + sensor, fontsize=48) minaxis = np.min(centroids)