def main(in_file, out_file, x, y, epochs, ref=None, test=False, verbose=0): if test: df = pd.DataFrame(in_file, columns=range(in_file.shape[1])) else: df = pd.read_table(in_file, sep='\t', low_memory=True, index_col=0) s = df.shape[0] df.dropna(axis=0, how='any', inplace=True) sn = df.shape[0] if s != sn: logger.warning('%d rows dropped due to missing values' % (s - sn)) s = df.shape[1] df = df.select_dtypes(include=[np.number]) sn = df.shape[1] if s != sn: logger.warning('%d columns dropped due to non-numeric data type' % (s - sn)) basedir = os.path.dirname(os.path.abspath(__file__)) som = SOM(x, y) if ref == 'IRCI': som = som.load('/SOM.pkl') embedding = som.winner_neurons(df.values) else: som.fit(df.values, epochs, verbose=verbose) embedding = som.winner_neurons(df.values) if ref == 'Create': som.save(basedir + '/SOM.pkl') emb_df = pd.DataFrame({'ID': df.index}) emb_df['X'] = embedding[:, 1] emb_df['Y'] = embedding[:, 0] if test: return emb_df else: emb_df.to_csv(out_file, index=False, sep='\t')
#som.plot('result.png', 0) #plt.figure(figsize=(8,8)) #i = 1 #for index in np.ndindex(som.shape): # print(index, i) # node_full = np.empty(tuple(variables[0].shape[1:])).flatten() # mask = np.ma.getmaskarray(variables[0][0,:].flatten()) # print(node_full.shape, mask.shape) #node_full[~mask] = som.nodes[index][:mask.shape[0]]*som.std[:mask.shape[0]] + som.mean[:mask.shape[0]] # node_full[~mask] = som.nodes[index][:mask.shape[0]] # node_full = node_full.reshape(tuple(variables[0].shape[1:])) # node_full = np.ma.masked_greater(node_full, 1e9) # print node_full.shape # plt.subplot(som.shape[1], som.shape[0], i) # plt.pcolormesh(np.squeeze(node_full)) # i += 1 #plt.savefig('result.png') som.save('som.nc')