def biclustering(db): #mydata = genfromtxt('/home/fan/intern/process_db/analysis/viewtime_matrix_524.csv',dtype=None,delimiter=',',names=True,skip_header=1) df = pd.read_csv( '/home/fan/intern/process_db/analysis/viewtime_matrix_501_0.1.csv') dma = 501 #print df.head() print df.shape dev_list = df.ix[:, 0].values prog_list = df.columns.values #print type(dev_list) #print type(prog_list) df.drop(df.columns[0], axis=1, inplace=True) #df[df==0] = 1 df = df.apply(fraction, axis=1) #print df.head() #print df.values #print type(df.values) #mydata = df.values #mydata=np.delete(mydata, 0, axis=0) #mydata=np.delete(mydata, 0, axis=1) #mydata[mydata==0] = 0.01 #print 'data format is:',mydata,type(mydata) # model=SpectralCoclustering(n_clusters=5, random_state=0) #n_clusters=(1000,20) # 4*3 = 12 clusters #model = SpectralBiclustering(random_state=None) model = SpectralCoclustering(n_clusters=10) model.fit(df) #fit_data=mydata[np.argsort(model.row_labels_)] #fit_data=fit_data[:,np.argsort(model.column_labels_)] #plt.matshow(fit_data[0:40],cmap=plt.cm.Blues) # plt.show() print model.get_params() for i in range(0, 5): print 'Size of one cluster:', model.get_shape(i) indices = model.get_indices(i) #print indices[1] print prog_list[indices[1]] print model.get_submatrix(i, df.values) dev_in_cluster = dev_list[indices[0]] #print type(dev_in_cluster) print 'number of devices within this cluster:', len(dev_in_cluster) get_income(db, dma, dev_in_cluster.tolist())