예제 #1
0
def csoms(X, D, params=()):
    pnames = ['neighbour', 'learning_rate', 'input_length_ratio']
    dflts = [0.1, 0.2, -1]
    if isinstance(params, np.ndarray):
        paramsloc = params.tolist()
    else:
        paramsloc = params
    (neighbour, learning_rate,
     input_length_ratio) = ds.resolveargumentpairs(pnames, dflts, paramsloc)

    Xloc = np.array(X)

    K = D[0] * D[1]  # Number of clusters
    N = Xloc.shape[0]  # Number of genes
    Ndim = Xloc.shape[1]  # Number of dimensions in X

    som = sompy.SOM(D, Xloc)
    som.set_parameter(neighbor=neighbour,
                      learning_rate=learning_rate,
                      input_length_ratio=input_length_ratio)

    centres = som.train(N).reshape(K, Ndim)
    dists = [[spdist.euclidean(c, x) for c in centres] for x in Xloc]
    C = [np.argmin(d) for d in dists]
    return clustVec2partMat(C, K)
예제 #2
0
def classifySamples(testDataIn, numbClasses, hitMapBool):
	  sm = SOM.SOM('sm', trainDataIn, mapsize = [numbClasses, numbClasses], norm_method = 'var', initmethod='pca')
	  sm.train(n_job=1, shared_memory='no',verbose='off')

	  if hitMapBool: sm.hit_map(testDataIn)

	  testData_proj = sm.project_data(testDataIn)
	  testData_loc = sm.ind_to_xy(testData_proj)[:,2]

	  return testData_loc
예제 #3
0
def clusterSamples(model,trainDataIn,testDataIn,params):
    
    if model == 'SOM':
        
        # Map size 
        msz0 = params[0]
        msz1 = params[1]

        #print('SOM size: ', trainDataIn.shape[0])
        sm = SOM.SOM('sm', trainDataIn, mapsize = [msz0, msz1],norm_method = 'var',initmethod='pca')
        sm.train(n_job = 1, shared_memory = 'no',verbose='off')
        #sm.set_data_labels(list(instancesCorpus))

        if params[2] == True:
            #print('Hitmap for CORPUS (train, red) and TARGET (test, blue) data:')        
            sm.hit_map(testDataIn)

        testData_proj = sm.project_data(testDataIn)
        trainData_proj = sm.project_data(trainDataIn)
        testData_loc = sm.ind_to_xy(testData_proj)[:,2]
        trainData_loc = sm.ind_to_xy(trainData_proj)[:,2]

        return trainData_loc, testData_loc, sm

    if model == 'AffinityPropagation':
        
        model = AffinityPropagation()
        model.fit(trainDataIn)
        
        return model.predict(trainDataIn), model.predict(testDataIn), model
    
    if model == 'DBSCAN':
        
        model = DBSCAN()
        model.fit(trainDataIn)
        
        return model.fit_predict(trainDataIn), model.fit_predict(testDataIn), model

    if model == 'KMeans':
        
        model = KMeans(n_clusters=params[0])
        model.fit(trainDataIn)
        
        return model.predict(trainDataIn), model.predict(testDataIn), model
    
    if model == 'AgglomerativeClustering':
        model = AgglomerativeClustering(n_clusters=params[0])
        model.fit(trainDataIn)
        
        return model.fit_predict(trainDataIn), model.fit_predict(testDataIn), model    
#Data = df_allyears_raw[['NDVI','CHIRP','LST']].as_matrix()
import sys
sys.path.insert(0, r'D:\Python27x64\Lib\site-packages\sompy')
import sompy as SOM


# In[20]:

Data = Data[0:-4] # Precip has Nan for last period of 2015


# In[21]:

msz11 = 6
msz10 = 6
som1 = SOM.SOM('som1', Data, mapsize = [msz10, msz11], norm_method = 'var', initmethod='pca')
#What you get when you initialize the map with pca
som1.init_map()
som1.view_map(text_size=7, cmap='viridis', grid='Yes')


# In[22]:

#What you get when you train the map
som1.train(n_job = 1, shared_memory = 'no',verbose='off')
som1.view_map(text_size=7, cmap='viridis', grid='Yes', save='Yes', save_dir=r'D:\Downloads\Mattijn@Jia\png\trial_7\SOM.png')


# In[35]:

#hitmap
예제 #5
0
    'C:/Users/zchen4/Desktop/data/attraction frequency/freqs_Fri.csv')
print file_fri1.head()

# file_sat1 = pd.read_csv('C:/Users/zchen4/Desktop/data/attraction frequency/freqs_Sat.csv')
# file_sun1 = pd.read_csv('C:/Users/zchen4/Desktop/data/attraction frequency/freqs_Sun.csv')

label_fri1 = file_fri1['id'].values
# label_sat1 = file_sat1['id'].values
# label_sun1 = file_sun1['id'].values

data_fri1 = file_fri1.ix[:, 1:71].values
# data_sat1 = file_sat1.ix[:,1:71].values
# data_sun1 = file_sun1.ix[:,1:71].values

msz0 = 30
msz1 = 30

sm = SOM.SOM('sm',
             data_fri1,
             mapsize=[msz0, msz1],
             norm_method='var',
             initmethod='pca')
sm.train(n_job=1, shared_memory='no', verbose='final')

sm.view_map(text_size=7)

# df = pd.DataFrame(data = data_fri1, columns= attr)
# fig = scatter_matrix(df, alpha=0.2, figsize=(10, 10), diagonal='kde')

plt.show()
예제 #6
0
attr = file_fri_1.columns.values.astype('str')

msz0 = 30
msz1 = 30

Data = data_fri_1
label = labour_fri_1
print 'Data size: ', Data.shape
#Put this if you are updating the sompy codes
reload(sys.modules['sompy'])

t0 = time.time()
sm = SOM.SOM('sm',
             Data,
             label,
             mapsize=[msz0, msz1],
             norm_method='var',
             initmethod='pca')

sm.init_map()

sm.train(n_job=1, shared_memory='no', verbose='off')

print 'Training is done in: ', time.time() - t0, 'seconds'

a = sm.view_map(what='codebook',
                which_dim='all',
                pack='Yes',
                text_size=2.8,
                save='No',
                save_dir='empty',
예제 #7
0
#other wise the training data will be parallelized

#This is your selected map size
msz0 = 30
msz1 = 30

#This is a random data set, but in general it is assumed that you have your own data set as a numpy ndarray
Data = np.random.rand(10 * 1000, 20)
print 'Data size: ', Data.shape

#Put this if you are updating the sompy codes otherwise simply remove it
reload(sys.modules['sompy'])

sm = SOM.SOM('sm',
             Data,
             mapsize=[msz0, msz1],
             norm_method='var',
             initmethod='pca')
sm.train(n_job=1, shared_memory='no', verbose='final')

sm.view_map(text_size=7)

dlen = 200
Data1 = pd.DataFrame(data=1 * np.random.rand(dlen, 2))
Data1.values[:, 1] = (Data1.values[:, 0][:, np.newaxis] +
                      .42 * np.random.rand(dlen, 1))[:, 0]

Data2 = pd.DataFrame(data=1 * np.random.rand(dlen, 2) + 1)
Data2.values[:, 1] = (-1 * Data2.values[:, 0][:, np.newaxis] +
                      .62 * np.random.rand(dlen, 1))[:, 0]