def csoms(X, D, params=()): pnames = ['neighbour', 'learning_rate', 'input_length_ratio'] dflts = [0.1, 0.2, -1] if isinstance(params, np.ndarray): paramsloc = params.tolist() else: paramsloc = params (neighbour, learning_rate, input_length_ratio) = ds.resolveargumentpairs(pnames, dflts, paramsloc) Xloc = np.array(X) K = D[0] * D[1] # Number of clusters N = Xloc.shape[0] # Number of genes Ndim = Xloc.shape[1] # Number of dimensions in X som = sompy.SOM(D, Xloc) som.set_parameter(neighbor=neighbour, learning_rate=learning_rate, input_length_ratio=input_length_ratio) centres = som.train(N).reshape(K, Ndim) dists = [[spdist.euclidean(c, x) for c in centres] for x in Xloc] C = [np.argmin(d) for d in dists] return clustVec2partMat(C, K)
def classifySamples(testDataIn, numbClasses, hitMapBool): sm = SOM.SOM('sm', trainDataIn, mapsize = [numbClasses, numbClasses], norm_method = 'var', initmethod='pca') sm.train(n_job=1, shared_memory='no',verbose='off') if hitMapBool: sm.hit_map(testDataIn) testData_proj = sm.project_data(testDataIn) testData_loc = sm.ind_to_xy(testData_proj)[:,2] return testData_loc
def clusterSamples(model,trainDataIn,testDataIn,params): if model == 'SOM': # Map size msz0 = params[0] msz1 = params[1] #print('SOM size: ', trainDataIn.shape[0]) sm = SOM.SOM('sm', trainDataIn, mapsize = [msz0, msz1],norm_method = 'var',initmethod='pca') sm.train(n_job = 1, shared_memory = 'no',verbose='off') #sm.set_data_labels(list(instancesCorpus)) if params[2] == True: #print('Hitmap for CORPUS (train, red) and TARGET (test, blue) data:') sm.hit_map(testDataIn) testData_proj = sm.project_data(testDataIn) trainData_proj = sm.project_data(trainDataIn) testData_loc = sm.ind_to_xy(testData_proj)[:,2] trainData_loc = sm.ind_to_xy(trainData_proj)[:,2] return trainData_loc, testData_loc, sm if model == 'AffinityPropagation': model = AffinityPropagation() model.fit(trainDataIn) return model.predict(trainDataIn), model.predict(testDataIn), model if model == 'DBSCAN': model = DBSCAN() model.fit(trainDataIn) return model.fit_predict(trainDataIn), model.fit_predict(testDataIn), model if model == 'KMeans': model = KMeans(n_clusters=params[0]) model.fit(trainDataIn) return model.predict(trainDataIn), model.predict(testDataIn), model if model == 'AgglomerativeClustering': model = AgglomerativeClustering(n_clusters=params[0]) model.fit(trainDataIn) return model.fit_predict(trainDataIn), model.fit_predict(testDataIn), model
#Data = df_allyears_raw[['NDVI','CHIRP','LST']].as_matrix() import sys sys.path.insert(0, r'D:\Python27x64\Lib\site-packages\sompy') import sompy as SOM # In[20]: Data = Data[0:-4] # Precip has Nan for last period of 2015 # In[21]: msz11 = 6 msz10 = 6 som1 = SOM.SOM('som1', Data, mapsize = [msz10, msz11], norm_method = 'var', initmethod='pca') #What you get when you initialize the map with pca som1.init_map() som1.view_map(text_size=7, cmap='viridis', grid='Yes') # In[22]: #What you get when you train the map som1.train(n_job = 1, shared_memory = 'no',verbose='off') som1.view_map(text_size=7, cmap='viridis', grid='Yes', save='Yes', save_dir=r'D:\Downloads\Mattijn@Jia\png\trial_7\SOM.png') # In[35]: #hitmap
'C:/Users/zchen4/Desktop/data/attraction frequency/freqs_Fri.csv') print file_fri1.head() # file_sat1 = pd.read_csv('C:/Users/zchen4/Desktop/data/attraction frequency/freqs_Sat.csv') # file_sun1 = pd.read_csv('C:/Users/zchen4/Desktop/data/attraction frequency/freqs_Sun.csv') label_fri1 = file_fri1['id'].values # label_sat1 = file_sat1['id'].values # label_sun1 = file_sun1['id'].values data_fri1 = file_fri1.ix[:, 1:71].values # data_sat1 = file_sat1.ix[:,1:71].values # data_sun1 = file_sun1.ix[:,1:71].values msz0 = 30 msz1 = 30 sm = SOM.SOM('sm', data_fri1, mapsize=[msz0, msz1], norm_method='var', initmethod='pca') sm.train(n_job=1, shared_memory='no', verbose='final') sm.view_map(text_size=7) # df = pd.DataFrame(data = data_fri1, columns= attr) # fig = scatter_matrix(df, alpha=0.2, figsize=(10, 10), diagonal='kde') plt.show()
attr = file_fri_1.columns.values.astype('str') msz0 = 30 msz1 = 30 Data = data_fri_1 label = labour_fri_1 print 'Data size: ', Data.shape #Put this if you are updating the sompy codes reload(sys.modules['sompy']) t0 = time.time() sm = SOM.SOM('sm', Data, label, mapsize=[msz0, msz1], norm_method='var', initmethod='pca') sm.init_map() sm.train(n_job=1, shared_memory='no', verbose='off') print 'Training is done in: ', time.time() - t0, 'seconds' a = sm.view_map(what='codebook', which_dim='all', pack='Yes', text_size=2.8, save='No', save_dir='empty',
#other wise the training data will be parallelized #This is your selected map size msz0 = 30 msz1 = 30 #This is a random data set, but in general it is assumed that you have your own data set as a numpy ndarray Data = np.random.rand(10 * 1000, 20) print 'Data size: ', Data.shape #Put this if you are updating the sompy codes otherwise simply remove it reload(sys.modules['sompy']) sm = SOM.SOM('sm', Data, mapsize=[msz0, msz1], norm_method='var', initmethod='pca') sm.train(n_job=1, shared_memory='no', verbose='final') sm.view_map(text_size=7) dlen = 200 Data1 = pd.DataFrame(data=1 * np.random.rand(dlen, 2)) Data1.values[:, 1] = (Data1.values[:, 0][:, np.newaxis] + .42 * np.random.rand(dlen, 1))[:, 0] Data2 = pd.DataFrame(data=1 * np.random.rand(dlen, 2) + 1) Data2.values[:, 1] = (-1 * Data2.values[:, 0][:, np.newaxis] + .62 * np.random.rand(dlen, 1))[:, 0]