def SOM_test(train, test, testt, niteration, figure_number): net = som.som(50, 50, train) best = np.zeros(np.shape(train)[0], dtype=int) for i in range(np.shape(train)[0]): best[i], activation = net.somfwd(train[i, :]) plot_som_graph(best, net, testt, figure_number) print( 'Number of overlaps neuron for SOM of 50 x 50 network without Training ', count_overlaps(testt, best)[0]) best, net = run_som(10, 10, train, test, niteration) plot_som_graph(best, net, testt, figure_number + 1) print('Number of overlaps neuron for SOM of 10 x 10 network ', count_overlaps(testt, best)[0]) best, net = run_som(20, 20, train, test, niteration) plot_som_graph(best, net, testt, figure_number + 2) print('Number of overlaps neuron for SOM of 20 x 20 network ', count_overlaps(testt, best)[0]) best, net = run_som(50, 50, train, test, niteration) plot_som_graph(best, net, testt, figure_number + 3) print('Number of overlaps neuron for SOM of 50 x 50 network ', count_overlaps(testt, best)[0])
def __init__(self, parent=None): super(MyWidget, self).__init__(parent) self.setupUi(self) self.beach_dir = "" self.forest_dir = "" self.d = weights.data("weights") self.ow, self.oh, self.inpf, self.inpb, self.w, self.m = self.d.load() if self.ow == -1: QtGui.QMessageBox.about(self, "ERROR", "error in weights file") exit(0) self.s = som.som(self.ow * self.oh * 3, self.inpf + self.inpb, 0.01) self.s.init() print self.ow * self.oh * 3 * (self.inpf + self.inpb), " ", len(self.m) self.s.put_weights(self.w, self.m) self.img1 = image.image(self.ow, self.oh) self.img2 = image.image(self.ow, self.oh) self.text = "" self.f_dir = "" self.b_dir = "" self.timer = QtCore.QTimer() self.timer.setInterval(1000) self.timer.timeout.connect(self.re_write) self.timer.start() self.start.clicked.connect(self.start_func) self.open_b.clicked.connect(self.open_b_dir) self.open_f.clicked.connect(self.open_f_dir) self.run = False
def run_som(x, y, train, test, niteration): # Make and train a SOM net = som.som(x, y, train) net.somtrain(train, niteration) # Store the best node for each training input best = np.zeros(np.shape(test)[0], dtype=int) for i in range(np.shape(test)[0]): best[i], activation = net.somfwd(test[i, :]) return best, net
def makesom(x, y): # Make and train a SOM net = som.som(x, y, train) net.somtrain(train, 400) #return net # Store the best node for each training input best = np.zeros(np.shape(train)[0], dtype=int) for i in range(np.shape(train)[0]): best[i], activation = net.somfwd(train[i, :]) return best
def __init__(self, parent=None): super(MyWidget, self).__init__(parent) self.setupUi(self) self.d = weights.data("weights") self.ow, self.oh, self.inpm, self.inpn, self.w, self.m = self.d.load() if self.ow == -1: QtGui.QMessageBox.about(self, "ERROR", "error in weights file") exit(0) self.s = som.som(self.ow * self.oh * 3, self.inpm + self.inpn, 0.01) self.s.init() self.s.put_weights(self.w, self.m) self.file = "" self.pushButton_2.clicked.connect(self.open) self.pushButton.clicked.connect(self.open_file) self.label.setScaledContents(True)
def run_som_pcn(): ''' Runs a perceptron using the activations from a SOM. The initial data is split into two sets, one for use in the SOM, and the other for use in the perceptron. ''' x = preprocess.preprocess('Pollens') pollen = np.array(x.create_one_file(SIMPLE_GRASS)) pollen = x.normalise_max(pollen) som_train_set, som_train_set_target, pcn_set, pcn_set_target, empty_set, empty_set_target = x.make_groups( pollen, LABEL_SIZE, algorithm='mlp', train_size=300, test_size=350, validation_size=0) net = som.som(5, 5, som_train_set) net.somtrain(som_train_set, 300) net.run_perceptron(pcn_set, pcn_set_target, train_size=200, test_size=150)
def thread_func(self): self.run = True self.text = "" self.text = "Loading Images ..." self.num_cls_f = self.img.get_all_images(self.f_dir) self.num_cls_b = self.img.get_all_images(self.b_dir) - self.num_cls_f self.text = "Loading Images ...Done !!!\n\n" if self.num_cls_f + self.num_cls_b <= 1: self.text = "Error - no enough images\n" return 0 self.s = som.som(self.ow * self.oh * 3, self.num_cls_f + self.num_cls_b, 0.01) self.s.init() self.text = self.text + "total number of class : " self.text = self.text + str(self.num_cls_f + self.num_cls_b) + "\n" for i in range(0, self.num_cls_f + self.num_cls_b): dataset = self.img.get_data_set(i) indx = self.s.learn(dataset, str(i)) self.text = self.text + "\nclass : " + str(i) + "\n\n" error = 0 txt = self.text pb = "" for j in range(0, 400): error = self.s.train(indx) if j % 10 == 0: self.text = txt + str(float(j) / 4) + "% : " + pb + "=>" pb = pb + "=" self.text = txt + "100% : " + pb + "=>" self.text = self.text + "\n\nerror : " + str(error) + "\n" w, m = self.s.get_weights() self.d.save(self.ow, self.oh, self.num_cls_f, self.num_cls_b, w, m) self.text = self.text + "\n\n---TRAINING COMPLEATED---\n" time.sleep(1)
def execute(): body = request.json # Obtener las columnas del body # rows = body['rows'] # columns = body['columns'] # cantidad de clusters a ejecutar clusters = body['clusters'] # Las dimensiones a evaluar dimensions = body['dimensions'] # Eligiendo las columnas filtered_data = [] for col in dimensions: filtered_data.append(data[col]) # Creando los items items = [] size = len(data['name']) cols_count = len(dimensions) # Colocando el valor por defecto si las celdas no presentan valor for i in range(size): row = [] for j in range(cols_count): if (math.isnan(filtered_data[j][i])): row.append(0) else: row.append(filtered_data[j][i]) items.append(row) rows = 1 cols = clusters # Ejecutar el algoritmo con las columnas elegidas por el usuario? result = som.som(data['name'], items, rows, cols) return jsonify(result)
test = iris[3::4, 0:4] testt = target[3::4] # print train.max(axis=0), train.min(axis=0) import kmeansnet # import kmeans as kmeansnet net = kmeansnet.kmeans(3, train) net.kmeanstrain(train) cluster = net.kmeansfwd(test) print(1. * cluster) print(iris[3::4, 4]) import som net = som.som(6, 6, train) net.somtrain(train, 400) best = np.zeros(np.shape(train)[0], dtype=int) for i in range(np.shape(train)[0]): best[i], activation = net.somfwd(train[i, :]) pl.plot(net.map[0, :], net.map[1, :], 'k.', ms=15) where = pl.find(traint == 0) pl.plot(net.map[0, best[where]], net.map[1, best[where]], 'rs', ms=30) where = pl.find(traint == 1) pl.plot(net.map[0, best[where]], net.map[1, best[where]], 'gv', ms=30) where = pl.find(traint == 2) pl.plot(net.map[0, best[where]], net.map[1, best[where]], 'b^', ms=30) pl.axis([-0.1, 1.1, -0.1, 1.1]) pl.axis('off')
penaltySum = 0 for i in range(10): order = range(shape(iris)[0]) random.shuffle(order) iris = iris[order,:] target = target[order,:] train = iris[::2,0:4] traint = target[::2] valid = iris[1::4,0:4] validt = target[1::4] test = iris[3::4,0:4] testt = target[3::4] net = som.som(netsize, netsize, train) #print 'Network Size :', netsize net.somtrain(train, 400) best = zeros(shape(train)[0], dtype=int) for i in range(shape(train)[0]): best[i], activation = net.somfwd(train[i, :]) #plot(net.map[0, :], net.map[1, :], 'k.', ms=15) where = find(traint == 0) #plot(net.map[0, best[where]], net.map[1, best[where]], 'rs', ms=30) #Find all the unique points that map the red squares in train data rs11 = net.map[0, best[where]] rs12 = net.map[1, best[where]]
def run(date): ''' ''' print date data = dataDF[dataDF['date'] <= date] if MAX_LOOKBACK_MONTHS>0: min_ret_date = date - pd.tseries.offsets.MonthEnd() * MAX_LOOKBACK_MONTHS data = data[data['date'] >= min_ret_date] #if only_country: # data = data[data['COUNTRY']==only_country] # can't use today's knowledge ix_today = data['date'] == date today = data[ix_today] data = data[-ix_today] # use factor past week return and month stdev, and perhaps epfr or other macro info predict_cols = ['NextRET',] use_cols = [x for x in data.columns if COUNTRY_STUDY in x or x == 'CurrRET'] train_data = data[use_cols] N = len(train_data) clumps = float(N)/30 ''' grid = int(np.sqrt(clumps)) if grid < GRIDMIN: grid = GRIDMIN elif grid > GRIDMAX: grid = GRIDMAX ''' grid = GRIDMAX print 'N: {}, clumps: {}, grid size chosen: {}'.format(N, clumps, grid) SOM_X, SOM_Y = grid, grid if DEBUG: print 'train' print train_data.head(2).T som1 = som.som(SOM_X, SOM_Y, train_data.values, usePCA=False) ## input data, number of iterations som1.somtrain(train_data.values, TRAIN_STEPS) if DEBUG: print 'hood' ### model is now trained, walk through each row to get the neighborhood for a given row data['hood'] = None for cnt, row in data[use_cols].iterrows(): hood, act = som1.somfwd(row.values) data['hood'].ix[cnt] = hood ### now take todays data, walk through, and place each row in a neighborhood today['hood'] = None if not CLUSTERINFO: for cnt, row in today[use_cols].iterrows(): hood, act = som1.somfwd(row.values) today['hood'].ix[cnt] = hood else: today['hood_ct'] = None today['hood_z'] = None today['hood_avg'] = None today['hood_std'] = None for cnt, row in today[use_cols].iterrows(): hood, act, csize, czscore, cavg, cstd = som1.somfwd(row.values, clusterSizeFlag=True) today['hood'].ix[cnt] = hood today['hood_ct'].ix[cnt] = csize today['hood_z'].ix[cnt] = czscore today['hood_avg'].ix[cnt] = cavg today['hood_std'].ix[cnt] = cstd if DEBUG: print 'group' for ret_col in predict_cols: hood_ret = data.groupby('hood')[ret_col].median() hood_ret = hood_ret.to_dict() ### map the return back to securities today['pred_'+ret_col] = today['hood'].apply(hood_ret.get) today.to_csv(MODEL_NAME+'/forecast_{:%Y%m%d}.csv'.format(date)) if DEBUG: print 'done'
def run(date, only_country=None): ''' ''' print date data = dataDF[dataDF['date'] <= date] if MAX_LOOKBACK_MONTHS > 0: min_ret_date = date - pandas.datetools.MonthEnd() * MAX_LOOKBACK_MONTHS data = data[data['date'] >= min_ret_date] if only_country: data = data[data['COUNTRY'] == only_country] # whether we want to contrast by creating a benchmark that doesnt use priviledge info if BENCHMARK: del data[EPFR] # can't use today's knowledge ix_today = data['date'] == date today = data[ix_today] data = data[-ix_today] # use factor past week return and month stdev, and perhaps epfr or other macro info use_cols = [ x for x in data.columns if x.endswith('.sum') or x.endswith('.std') ] predict_cols = [x for x in data.columns if x.endswith('fret')] train_data = data[use_cols] N = len(train_data) clumps = float(N) / 30 grid = int(np.sqrt(clumps) / 5.0) * 5 if grid < 5: grid = 3 elif grid > 10: grid = 10 print 'N: {}, clumps: {}, grid size chosen: {}'.format(N, clumps, grid) SOM_X, SOM_Y = grid, grid if DEBUG: print 'train' print train_data.head(2).T som1 = som.som(SOM_X, SOM_Y, train_data.values, usePCA=False) ## input data, number of iterations som1.somtrain(train_data.values, TRAIN_STEPS) if DEBUG: print 'hood' ### model is now trained, walk through each row to get the neighborhood for a given row data['hood'] = None for cnt, row in data[use_cols].iterrows(): hood, act = som1.somfwd(row.values) data['hood'].ix[cnt] = hood ### now take todays data, walk through, and place each row in a neighborhood today['hood'] = None for cnt, row in today[use_cols].iterrows(): hood, act = som1.somfwd(row.values) today['hood'].ix[cnt] = hood if DEBUG: print 'group' for ret_col in predict_cols: hood_ret = data.groupby('hood')[ret_col].median() hood_ret = hood_ret.to_dict() ### map the return back to securities today['pred_' + ret_col] = today['hood'].apply(hood_ret.get) today.to_csv(MODEL_NAME + '/forecast_{:%Y%m%d}.csv'.format(date)) if DEBUG: print 'done'
def rng(): #Define um número aleatório entre 1 e 10. Sorte é variável global para uso em condições. global sorte sorte = randint(1,10) if __name__ == '__main__': print() print('-=' * 70) soletrar(72, 74) print('-=' * 70) personagem.mostrar_personagem() print(relogio) soletrar(0, 5) opcao = input('Escolha uma opção: ') if opcao == '1': som('carruagem_cidade') rng() if sorte < 5: soletrar(5, 6) personagem.mudar_stamina(-25) relogio.adicionar_tempo(120) personagem.mostrar_personagem() print(relogio) else: soletrar(6, 7) personagem.mudar_stamina(-25) principe.amar(3) relogio.adicionar_tempo(60) personagem.mostrar_personagem() print(relogio) if opcao == '2':
# non-commercial purposes, but please maintain the name of the original author. # This code comes with no warranty of any kind. # Stephen Marsland, 2008 # A simple example of using the SOM on a 2D dataset showing the neighbourhood connections from pylab import * from numpy import * import som nNodesEdge = 8 data = (random.rand(2000, 2) - 0.5) * 2 # Set up the network and decide on parameters net = som.som(nNodesEdge, nNodesEdge, data, usePCA=0) step = 0.2 figure(1) plot(data[:, 0], data[:, 1], '.') # Train the network for 0 iterations (to get the position of the nodes) net.somtrain(data, 0) for i in range(net.x * net.y): neighbours = where(net.mapDist[i, :] <= step) t = zeros((shape(neighbours)[1] * 2, shape(net.weights)[0])) t[::2, :] = tile(net.weights[:, i], (shape(neighbours)[1], 1)) t[1::2, :] = transpose(net.weights[:, neighbours[0][:]]) plot(t[:, 0], t[:, 1], 'g-') axis('off')
#ecoli = loadtxt('shortecoli.dat') #classes = ecoli[:,7:] #data = ecoli[:,:7] #data -= mean(data,axis=0) #data /= data.max(axis=0) order = range(shape(data)[0]) np.random.shuffle(order) split = int(np.round(np.shape(data)[0]/2)) train = data[order[:split],:] target = classes[order[:split],:] test = data[order[split:],:] ttarget = classes[order[:split],:] net = som.som(15,15,train,eta_b=0.3,eta_n=0.1,nSize=0.5,alpha=1,usePCA=1,useBCs=1,eta_bfinal=0.03,eta_nfinal=0.01,nSizefinal=0.05) net.somtrain(train,12000) best = np.zeros(shape(test)[0],dtype=int) for i in range(shape(test)[0]): best[i],activation = net.somfwd(train[i,:]) #print best #print ttarget pl.plot(net.map[0,:],net.map[1,:],'k.',ms=15) where = pl.find(target == 0) pl.plot(net.map[0,best[where]],net.map[1,best[where]],'rs',ms=30) where = pl.find(target == 1) pl.plot(net.map[0,best[where]],net.map[1,best[where]],'gv',ms=30)
# ================================================================================== df_glass = pd.read_csv("glass.data", header=None) df_glass.columns = [ 'Id', 'RI', 'Na', 'Mg', 'Al', 'Si', 'K', 'Ca', 'Ba', 'Fe', 'Type' ] input_data = df_glass[['RI', 'Na', 'Mg', 'Al', 'Si', 'K', 'Ca', 'Ba', 'Fe']].values n_feature = input_data.shape[1] n_class = max(df_glass['Type'].values) sc = preprocessing.StandardScaler() input_data = sc.fit_transform(input_data) somsize_x = 4 somsize_y = 5 som = som(somsize_x, somsize_y, n_feature, learning_rate=1.0) som.random_weights_init(input_data) som.train_batch(input_data, 1000) ''' 課題:SOMで学習した各ニューロンノードの参照ベクトルのグラフ,および各ニューロンに分類されたデータのクラス分布を描画する ヒント:som.winner(x) で勝者ニューロンのインデックスを取得できる. ''' y = np.array([]) fig, axes = plt.subplots(nrows=5, ncols=4, figsize=(12, 8), sharex=True) label = np.array(['RI', 'Na', 'Mg', 'Al', 'Si', 'K', 'Ca', 'Ba', 'Fe']) x = np.arange(len(label)) for i in range(somsize_x): for j in range(somsize_y): y = som.weights[i][j] axes[j, i].bar(x, y, tick_label=label, align="center")
def run(date): ''' ''' print date data = dataDF[dataDF['date'] <= date] if MAX_LOOKBACK_MONTHS > 0: min_ret_date = date - pandas.datetools.MonthEnd() * MAX_LOOKBACK_MONTHS data = data[data['date'] >= min_ret_date] # filter on Day of the week data only: dow = date.weekday() data['dow'] = data['date'].apply(lambda x: x.weekday()) data = data[data['dow'] == dow] #if only_country: # data = data[data['COUNTRY']==only_country] # can't use today's knowledge ix_today = data['date'] == date today = data[ix_today] data = data[-ix_today] # use factor past week return and month stdev, and perhaps epfr or other macro info use_cols = [x for x in data.columns if x.endswith('TWN')] predict_cols = [ 'RET', ] train_data = data[use_cols] N = len(train_data) clumps = float(N) / 30 grid = int(np.sqrt(clumps)) if grid < GRIDMIN: grid = GRIDMIN elif grid > GRIDMAX: grid = GRIDMAX print 'N: {}, clumps: {}, grid size chosen: {}'.format(N, clumps, grid) SOM_X, SOM_Y = grid, grid if DEBUG: print 'train' print train_data.head(2).T som1 = som.som(SOM_X, SOM_Y, train_data.values, usePCA=False) ## input data, number of iterations som1.somtrain(train_data.values, TRAIN_STEPS) if DEBUG: print 'hood' ### model is now trained, walk through each row to get the neighborhood for a given row data['hood'] = None for cnt, row in data[use_cols].iterrows(): hood, act = som1.somfwd(row.values) data['hood'].ix[cnt] = hood ### now take todays data, walk through, and place each row in a neighborhood today['hood'] = None for cnt, row in today[use_cols].iterrows(): hood, act = som1.somfwd(row.values) today['hood'].ix[cnt] = hood if DEBUG: print 'group' for ret_col in predict_cols: hood_ret = data.groupby('hood')[ret_col].median() hood_ret = hood_ret.to_dict() ### map the return back to securities today['pred_' + ret_col] = today['hood'].apply(hood_ret.get) today.to_csv(MODEL_NAME + '/forecast_{:%Y%m%d}.csv'.format(date)) if DEBUG: print 'done'
# print test_in def shuffle_in_unison(a, b): state = numpy.random.get_state() numpy.random.shuffle(a) numpy.random.set_state(state) numpy.random.shuffle(b) shuffle_in_unison(train_in,train_tgt) actsTrain = np.zeros((390,169)) actsTest = np.zeros((130,169) ) import som markers = ['rv', 'gv', 'bv', 'ro','go','bo','rp','gp','bp','r*','g*','b*','r8','g8'] net = som.som(13,13,train_in) net.somtrain(train_in,400) pl.figure(1) count = 0 best = np.zeros(np.shape(train_in)[0],dtype=int) for i in range(np.shape(train_in)[0]): best[i],activation = net.somfwd(train_in[i,:]) actsTrain[count]=activation count+=1 pl.plot(net.map[0,:],net.map[1,:],'k.',ms=10) for i in range(13): where = pl.find(train_tgt[:,i] == 1)
# Train the network import kmeansnet net = kmeansnet.kmeans(6, train) net.kmeanstrain(train) cluster = net.kmeansfwd(test) kprediction = 1. * cluster actual = data[3::4, p] correct = 0. for i in range(len(actual)): if kprediction[i] == actual[i]: correct += 1. print 'K-means percentage correct =', correct / len(actual) import som net = som.som(7, 7, train) net.somtrain(train, 400) best = zeros(shape(train)[0], dtype=int) for i in range(shape(train)[0]): best[i], activation = net.somfwd(train[i, :]) plot(net.map[0, :], net.map[1, :], 'k.', ms=15) where = find(traint == 1) plot(net.map[0, best[where]], net.map[1, best[where]], 'rs', ms=30) where = find(traint == 2) plot(net.map[0, best[where]], net.map[1, best[where]], 'gv', ms=30) where = find(traint == 3) plot(net.map[0, best[where]], net.map[1, best[where]], 'b^', ms=30) where = find(traint == 5) plot(net.map[0, best[where]], net.map[1, best[where]], 'r*', ms=30)
# 1. generate samples my_dg = data_generator(WIDTH, HEIGHT) NR_CLUSTERS = 15 NR_SAMPLES_PER_CLUSTER = 30 data_samples = my_dg.generate_samples_near_to_clusters( NR_CLUSTERS, NR_SAMPLES_PER_CLUSTER) nr_samples = len(data_samples) print("Type of data_samples is ", type(data_samples)) print("There are ", nr_samples, "samples in the list.") # 2. generate a SOM my_som = som(INPUT_DIM, NR_NEURONS) my_som.initialize_neuron_weights_to_grid([10, 10, 150,150]) # 3. SOM training while (True): # 3.1 retrieve randomly a sample vector rnd_vec_id = np.random.randint(nr_samples) vec = data_samples[rnd_vec_id] # 3.2 train the SOM with this vector my_som.train( vec, LEARN_RATE, adapt_neighbors )
# Train the network import kmeansnet net = kmeansnet.kmeans(6,train) net.kmeanstrain(train) cluster = net.kmeansfwd(test) kprediction = 1.*cluster actual = data[3::4,p] correct = 0. for i in range(len(actual)): if kprediction[i] == actual[i]: correct += 1. print 'K-means percentage correct =', correct/len(actual) import som net = som.som(7,7,train) net.somtrain(train,400) best = zeros(shape(train)[0],dtype=int) for i in range(shape(train)[0]): best[i],activation = net.somfwd(train[i,:]) plot(net.map[0,:],net.map[1,:],'k.',ms=15) where = find(traint == 1) plot(net.map[0,best[where]],net.map[1,best[where]],'rs',ms=30) where = find(traint == 2) plot(net.map[0,best[where]],net.map[1,best[where]],'gv',ms=30) where = find(traint == 3) plot(net.map[0,best[where]],net.map[1,best[where]],'b^',ms=30) where = find(traint == 5) plot(net.map[0,best[where]],net.map[1,best[where]],'r*',ms=30)
def run(date, only_country=None): ''' ''' print date data = dataDF[dataDF['date'] <= date] if MAX_LOOKBACK_MONTHS>0: min_ret_date = date - pandas.datetools.MonthEnd() * MAX_LOOKBACK_MONTHS data = data[data['date'] >= min_ret_date] if only_country: data = data[data['COUNTRY']==only_country] # whether we want to contrast by creating a benchmark that doesnt use priviledge info if BENCHMARK: del data[EPFR] # add common CONTEXT columns: turn barra sum/std into count of good or bad styles and signals for factor in SIGNALS + BARRA: data[factor+'.T'] = data[factor+'.sum']/data[factor+'.std'] # add prev 3 weeks for prev in range(1,4): data['prev{}_'.format(prev)+factor+'.T'] = data[factor+'.T'].shift(prev) # get ready for training # can't use today's knowledge ix_today = data['date'] == date # for forecast: today = data[ix_today] # training data data = data[-ix_today] # select grid size N = len(data) clumps = float(N)/30 grid = int(np.sqrt(clumps)/5.0)*5 if grid < 5: grid = 3 elif grid > 10: grid = 10 print 'N: {}, clumps: {}, grid size chosen: {}'.format(N, clumps, grid) # training for signal in SIGNALS: # use signal relevant history, and BARRA factors use_cols = [x for x in data.columns if x.endswith('.T') and signal in x] + \ [x for x in data.columns if x.endswith('.T') and x in BARRA] train_data = data[use_cols] train_data = train_data.dropna() SOM_X, SOM_Y = grid, grid som1 = som.som(SOM_X, SOM_Y, train_data.values, usePCA=False) ## input data, number of iterations som1.somtrain(train_data.values, TRAIN_STEPS) ### model is now trained, walk through each row to get the neighborhood for a given row data['hood_'+signal] = None for cnt, row in train_data.iterrows(): hood, act = som1.somfwd(row.values) data['hood_'+signal].ix[cnt] = hood ### now take todays data, walk through, and place each row in a neighborhood today['hood_'+signal] = None for cnt, row in today[use_cols].iterrows(): hood, act = som1.somfwd(row.values) today['hood_'+signal].ix[cnt] = hood #predict_cols = [x for x in data.columns if x.endswith('fret')] #for ret_col in predict_cols: ret_col = signal + '.fret' hood_ret = data.groupby('hood_'+signal)[ret_col].median() hood_ret = hood_ret.to_dict() ### map the return back to securities today['pred_'+ret_col] = today['hood_'+signal].apply(hood_ret.get) today.to_csv(MODEL_NAME+'/forecast_{:%Y%m%d}.csv'.format(date))
def run_som(): ''' Runs a SOM and outputs the best activations in a 2d grid. Each class is given a unique symbol. ''' x = preprocess.preprocess('Pollens') pollen = np.array(x.create_one_file(SIMPLE_GRASS)) pollen = x.normalise_max(pollen) train_set, train_set_target, test_set, test_set_target, validation_set, validation_set_target = x.make_groups( pollen, LABEL_SIZE, algorithm='som', train_size=500, test_size=150, validation_size=0) net = som.som(20, 20, train_set) net.somtrain(train_set, 400) best = np.zeros(np.shape(train_set)[0], dtype=int) for i in range(np.shape(train_set)[0]): best[i], activation = net.somfwd(train_set[i, :]) pl.plot(net.map[0, :], net.map[1, :], 'k.', ms=15) where = pl.find(train_set_target == 0) pl.plot(net.map[0, best[where]], net.map[1, best[where]], 'rs', ms=15) where = pl.find(train_set_target == 1) pl.plot(net.map[0, best[where]], net.map[1, best[where]], 'rv', ms=15) where = pl.find(train_set_target == 2) pl.plot(net.map[0, best[where]], net.map[1, best[where]], 'r^', ms=15) where = pl.find(train_set_target == 3) pl.plot(net.map[0, best[where]], net.map[1, best[where]], 'bs', ms=15) where = pl.find(train_set_target == 4) pl.plot(net.map[0, best[where]], net.map[1, best[where]], 'bv', ms=15) where = pl.find(train_set_target == 5) pl.plot(net.map[0, best[where]], net.map[1, best[where]], 'b^', ms=15) where = pl.find(train_set_target == 6) pl.plot(net.map[0, best[where]], net.map[1, best[where]], 'gs', ms=15) where = pl.find(train_set_target == 7) pl.plot(net.map[0, best[where]], net.map[1, best[where]], 'gv', ms=15) where = pl.find(train_set_target == 8) pl.plot(net.map[0, best[where]], net.map[1, best[where]], 'g^', ms=15) where = pl.find(train_set_target == 9) pl.plot(net.map[0, best[where]], net.map[1, best[where]], 'ms', ms=15) where = pl.find(train_set_target == 10) pl.plot(net.map[0, best[where]], net.map[1, best[where]], 'mv', ms=15) where = pl.find(train_set_target == 11) pl.plot(net.map[0, best[where]], net.map[1, best[where]], 'm^', ms=15) where = pl.find(train_set_target == 12) pl.plot(net.map[0, best[where]], net.map[1, best[where]], 'ys', ms=15) pl.axis([-0.1, 1.1, -0.1, 1.1]) pl.axis('off') pl.figure(2) best = np.zeros(np.shape(test_set)[0], dtype=int) for i in range(np.shape(test_set)[0]): best[i], activation = net.somfwd(test_set[i, :]) pl.plot(net.map[0, :], net.map[1, :], 'k.', ms=15) where = pl.find(test_set_target == 0) pl.plot(net.map[0, best[where]], net.map[1, best[where]], 'rs', ms=15) where = pl.find(test_set_target == 1) pl.plot(net.map[0, best[where]], net.map[1, best[where]], 'rv', ms=15) where = pl.find(test_set_target == 2) pl.plot(net.map[0, best[where]], net.map[1, best[where]], 'r^', ms=15) where = pl.find(test_set_target == 3) pl.plot(net.map[0, best[where]], net.map[1, best[where]], 'bs', ms=15) where = pl.find(test_set_target == 4) pl.plot(net.map[0, best[where]], net.map[1, best[where]], 'bv', ms=15) where = pl.find(test_set_target == 5) pl.plot(net.map[0, best[where]], net.map[1, best[where]], 'b^', ms=15) where = pl.find(test_set_target == 6) pl.plot(net.map[0, best[where]], net.map[1, best[where]], 'gs', ms=15) where = pl.find(test_set_target == 7) pl.plot(net.map[0, best[where]], net.map[1, best[where]], 'gv', ms=15) where = pl.find(test_set_target == 8) pl.plot(net.map[0, best[where]], net.map[1, best[where]], 'g^', ms=15) where = pl.find(test_set_target == 9) pl.plot(net.map[0, best[where]], net.map[1, best[where]], 'ms', ms=15) where = pl.find(test_set_target == 10) pl.plot(net.map[0, best[where]], net.map[1, best[where]], 'mv', ms=15) where = pl.find(test_set_target == 11) pl.plot(net.map[0, best[where]], net.map[1, best[where]], 'm^', ms=15) where = pl.find(test_set_target == 12) pl.plot(net.map[0, best[where]], net.map[1, best[where]], 'ys', ms=15) pl.axis([-0.1, 1.1, -0.1, 1.1]) pl.axis('off') pl.show()
np.random.shuffle(order) spam = spam[order, :] target = target[order] train = spam[0:3449, 0:56] traint = target[0:3449] valid = spam[3450:4029, 0:56] validt = target[3450:4029] test = spam[4030:4600, 0:56] testt = target[4030:4600] net = kmeansnet.kmeans(3, train) net.kmeanstrain(train) cluster = net.kmeansfwd(test) net = som.som(6, 6, train) net.somtrain(train, 400) best = np.zeros(np.shape(train)[0], dtype=int) for i in range(np.shape(train)[0]): best[i], activation = net.somfwd(train[i, :]) pl.plot(net.map[0, :], net.map[1, :], 'k.', ms=15) where = pl.find(traint == 0) pl.plot(net.map[0, best[where]], net.map[1, best[where]], 'rs', ms=30) where = pl.find(traint == 1) pl.plot(net.map[0, best[where]], net.map[1, best[where]], 'gv', ms=30) where = pl.find(traint == 2) pl.plot(net.map[0, best[where]], net.map[1, best[where]], 'b^', ms=30) pl.axis([-0.1, 1.1, -0.1, 1.1]) pl.axis('off')
# -*- coding: utf-8 -*- """ Created on Wed Jun 27 13:29:14 2018 @author: Samqua """ import numpy as np import datetime from som import som glass_train_data_kernel=np.genfromtxt('glass_train_data_kernel.csv', delimiter=';', dtype=float) glass_validation_data_kernel=np.genfromtxt('glass_validation_data_kernel.csv', delimiter=';', dtype=float) soms=[] for i in range(7): # try 7 a night soms.append(som("new"+str(i),np.random.random((100,100,7000)))) beginning=datetime.datetime.now() for x in soms: x.train(glass_train_data_kernel,65) # this will take a while... x.dimreduce(glass_train_data_kernel) x.dimreduce(glass_validation_data_kernel,validation=True) print("Complete runtime, including twofold dimreduce: "+str(datetime.datetime.now()-beginning))
def run(date, only_country=None): ''' ''' print date data = dataDF[dataDF['date'] <= date] if MAX_LOOKBACK_MONTHS > 0: min_ret_date = date - pandas.datetools.MonthEnd() * MAX_LOOKBACK_MONTHS data = data[data['date'] >= min_ret_date] if only_country: data = data[data['COUNTRY'] == only_country] # whether we want to contrast by creating a benchmark that doesnt use priviledge info if BENCHMARK: del data[EPFR] # add common CONTEXT columns: turn barra sum/std into count of good or bad styles and signals for factor in SIGNALS + BARRA: data[factor + '.T'] = data[factor + '.sum'] / data[factor + '.std'] # add prev 3 weeks for prev in range(1, 4): data['prev{}_'.format(prev) + factor + '.T'] = data[factor + '.T'].shift(prev) # a weekly return vs a daily std: expect weekly return 1 sigma ~ 2 daily sigma data[factor + '.pos'] = data[factor + '.T'].apply(lambda x: 1 if x > 2 else 0) data[factor + '.neg'] = data[factor + '.T'].apply(lambda x: 1 if x < -2 else 0) data[factor + '.neut'] = data[factor + '.T'].apply(lambda x: 1 if np.abs(x) < 2 else 0) for sign in ['pos', 'neg', 'neut']: data['signal_' + sign] = data[[x + '.' + sign for x in SIGNALS ]].apply(lambda x: reduce(np.add, x), axis=1) data['barra_' + sign] = data[[x + '.' + sign for x in BARRA ]].apply(lambda x: reduce(np.add, x), axis=1) context_cols = [ x for x in data.columns if ('signal_' in x) or ('barra_' in x) ] for cc in context_cols: for prev in range(1, 4): data['prev{}_'.format(prev) + cc] = data[cc].shift(prev) # expand to include prev: context_cols = [ x for x in data.columns if ('signal_' in x) or ('barra_' in x) ] # get ready for training # can't use today's knowledge ix_today = data['date'] == date # for forecast: today = data[ix_today] # training data data = data[-ix_today] # select grid size N = len(data) clumps = float(N) / 30 grid = int(np.sqrt(clumps) / 5.0) * 5 if grid < 5: grid = 3 elif grid > 10: grid = 10 print 'N: {}, clumps: {}, grid size chosen: {}'.format(N, clumps, grid) # training # use factor past week return and month stdev, and perhaps epfr or other macro info #use_cols = [x for x in data.columns if x.endswith('.T')] use_cols = context_cols train_data = data[use_cols] train_data = train_data.dropna() SOM_X, SOM_Y = grid, grid som1 = som.som(SOM_X, SOM_Y, train_data.values, usePCA=False) ## input data, number of iterations som1.somtrain(train_data.values, TRAIN_STEPS) ### model is now trained, walk through each row to get the neighborhood for a given row data['hood'] = None for cnt, row in train_data.iterrows(): hood, act = som1.somfwd(row.values) data['hood'].ix[cnt] = hood ### now take todays data, walk through, and place each row in a neighborhood today['hood'] = None for cnt, row in today[use_cols].iterrows(): hood, act = som1.somfwd(row.values) today['hood'].ix[cnt] = hood predict_cols = [x for x in data.columns if x.endswith('fret')] for ret_col in predict_cols: hood_ret = data.groupby('hood')[ret_col].median() hood_ret = hood_ret.to_dict() ### map the return back to securities today['pred_' + ret_col] = today['hood'].apply(hood_ret.get) today.to_csv(MODEL_NAME + '/forecast_{:%Y%m%d}.csv'.format(date))
#convert to numpy array and remove class collun data = numpy.array(dataset) data = numpy.delete(data, numpy.s_[-1], axis=1) data = data.astype(numpy.float) # from str to float somCol = 6 somRow = 6 sigmaInitial = 3 radius = 3 # maxIterations = 500 maxIterations = 500 * (somRow * somCol) som = som(data, maxIterations, sigmaInitial, somCol, somRow, radius) ans = som.trainmodel() print('trained model is', ans) print('processing Cluster Image') #mapping 3 classes of iris dataset #https://archive.ics.uci.edu/ml/datasets/iris class_mapping = { 0: [1, 0, 0], #red 1: [0, 1, 0], #green 2: [0, 0, 1] #blue }
penaltySum = 0 for i in range(10): order = range(shape(iris)[0]) random.shuffle(order) iris = iris[order, :] target = target[order, :] train = iris[::2, 0:4] traint = target[::2] valid = iris[1::4, 0:4] validt = target[1::4] test = iris[3::4, 0:4] testt = target[3::4] net = som.som(netsize, netsize, train) #print 'Network Size :', netsize net.somtrain(train, 400) best = zeros(shape(train)[0], dtype=int) for i in range(shape(train)[0]): best[i], activation = net.somfwd(train[i, :]) #plot(net.map[0, :], net.map[1, :], 'k.', ms=15) where = find(traint == 0) #plot(net.map[0, best[where]], net.map[1, best[where]], 'rs', ms=30) #Find all the unique points that map the red squares in train data rs11 = net.map[0, best[where]] rs12 = net.map[1, best[where]] rs1 = [[rs11[index], rs12[index]] for index in range(len(rs11))]
return train_audio_streams, test_audio_streams # 1. read in all the training audio files # and the test audio files my_dataset = audio_dataset("10x10_audio_dataset") train_audio_streams, test_audio_streams = my_dataset.read() nr_train_audio_streams = len(train_audio_streams) nr_test_audio_streams = len(test_audio_streams) print("I have read in ", nr_train_audio_streams, " audio streams for training.") print("I have read in ", nr_test_audio_streams, " audio streams for testing.") # 2. generate a SOM my_som = som(FEATURE_VEC_LEN, NR_NEURONS, NR_CLASSES) my_som.initialize_neuron_weights_to_origin() # 3. now train a SOM with vectors from the audio streams for train_step_nr in range(TRAIN_STEPS): # choose randomly one of the training audio streams audio_nr = np.random.randint(nr_train_audio_streams) # choose a random start position in that audio stream data = train_audio_streams[audio_nr] len_data = len(data) start_pos = np.random.randint(len_data - RAW_DATA_LEN) # get the data starting at that position raw_data = data[start_pos:start_pos + RAW_DATA_LEN]
def run(date): ''' How many lead days (data warmup) do we need to allot? For now, leave it at one year ''' print date data = pandas.DataFrame() dts = pandas.DateRange(date - pandas.datetools.MonthEnd() * MAX_CLUSTER_LENGTH, date, offset=pandas.datetools.MonthEnd()) min_ret_date = date - pandas.datetools.MonthEnd() * MAX_IMPULSE_LENGTH for dt in dts: try: _tmp = pandas.read_csv('som/som.input_data/%s.csv' % dt.strftime('%Y%m%d')) data = data.append(_tmp, ignore_index=True) except: pass print 'no data for %s' % dt ix = data['COUNTRY'] != 'IND' data = data[ix] if MAKE_COUNTRY_PIVOT: data = make_country(data) else: del data['COUNTRY'] # whether we want to contrast by creating a benchmark that doesnt use priviledge info if BENCHMARK: del data[EPFR] # can't use today's knowledge ix_today = data['date'] == int(date.strftime('%Y%m%d')) today = data[ix_today] data = data[-ix_today] use_cols = data.columns.tolist() use_cols.remove('resid_c') use_cols.remove('BARRID') use_cols.remove('date') som1 = som.som(SOM_X, SOM_Y, data[use_cols].values, usePCA=False) ''' if DEBUG: print 'train' data[use_cols].to_csv('som/som.train/%s.csv' % date.strftime('%Y%m%d')) return ''' ## input data, number of iterations som1.somtrain(data[use_cols].values, 30) if DEBUG: print 'hood' ### model is now trained, walk through each row to get the neighborhood for a given row data['res'] = None for cnt, row in data[use_cols].iterrows(): hood, act = som1.somfwd(row.values) data['res'].ix[cnt] = hood if DEBUG: print 'group' ### group each neighborhood to get the forward return ix = data['date'] >= int(min_ret_date.strftime('%Y%m%d')) hood_ret = data[ix].groupby('res')[RETURN_COL].median() hood_ret = hood_ret.to_dict() if DEBUG: print 'today' ### now take todays data, walk through, and place each row in a neighborhood today['res'] = None for cnt, row in today[use_cols].iterrows(): hood, act = som1.somfwd(row.values) today['res'].ix[cnt] = hood ### map the return back to securities today['pret'] = today['res'].apply(hood_ret.get) today = today[['BARRID', 'pret']] today.set_index('BARRID', inplace=True) nu.write_alpha_files(today, MODEL_NAME, date) if DEBUG: print 'done'
from som import som import numpy as numpy input = numpy.array( [[1., 0., 0.], [1., 0., 1.], [0., 0., 0.5], [0.125, 0.529, 1.0], [0.33, 0.4, 0.67], [0.6, 0.5, 1.0], [0., 1., 0.], [1., 0., 0.], [0., 1., 1.], [1., 0., 1.], [1., 1., 0.], [1., 1., 1.], [.33, .33, .33], [.5, .5, .5], [.66, .66, .66]]) somCol = 2 somRow = 2 som = som(input,12,4,somCol,somRow) ans =som.trainmodel() print 'trained model is',ans
order = range(shape(data)[0]) random.shuffle(order) split = int(round(shape(data)[0] / 2)) train = data[order[:split], :] target = classes[order[:split], :] test = data[order[split:], :] ttarget = classes[order[:split], :] net = som.som(15, 15, train, eta_b=0.3, eta_n=0.1, nSize=0.5, alpha=1, usePCA=1, useBCs=1, eta_bfinal=0.03, eta_nfinal=0.01, nSizefinal=0.05) net.somtrain(train, 12000) best = zeros(shape(test)[0], dtype=int) for i in range(shape(test)[0]): best[i], activation = net.somfwd(train[i, :]) #print best #print ttarget
# Stephen Marsland, 2008 # A simple example of using the SOM on a 2D dataset showing the neighbourhood connections from numpy import * from pylab import * import som nNodesEdge = 8 data = (random.rand(2000,2)-0.5)*2 # Set up the network and decide on parameters net = som.som(nNodesEdge,nNodesEdge,data,usePCA=0) step = 0.2 figure(1) plot(data[:,0],data[:,1],'.') # Train the network for 0 iterations (to get the position of the nodes) net.somtrain(data,0) for i in range(net.x*net.y): neighbours = where(net.mapDist[i,:]<=step) t = zeros((shape(neighbours)[1]*2,shape(net.weights)[0])) t[::2,:] = tile(net.weights[:,i],(shape(neighbours)[1],1)) t[1::2,:] = transpose(net.weights[:,neighbours[0][:]]) plot(t[:,0],t[:,1],'g-') axis('off')
def main(): path_name = "" som_path = "" show_stream = False verbose = False # Parse the command line options. path_name, skeleton_name, som_path, show_stream, verbose = parseOpts( sys.argv) # Build the skeleton filename string _skeleton_filename_ = skeleton_name + '.skeleton' # Build the associated masked depth directory pathname _masked_depth_pathname_ = path_name # ---------------------------------------------------------------------------------------------------- # Load the initial SOM if (os.path.isfile(som_path) == True): # Instantiate a new som _som_ = som.som() # Load the som data from the specified file. _som_.load_from_file(som_path) else: print("\nNo som file with name: ", som_path) print("Leave script now.\n") exit(0) # ---------------------------------------------------------------------------------------------------- # Open the skeleton file if it exist. if (os.path.isfile(_skeleton_filename_) == True): print("Skeleton file: ", _skeleton_filename_) _skeleton_fileHandler_ = open(_skeleton_filename_, 'r') # Read the data from the skeleton file for the whole sequence all_skeleton_frames = l_S.read_skeleton_data(_skeleton_fileHandler_, verbose) else: print("\nNo skeleton file with name: ", _skeleton_filename_) print("Leave script now.\n") exit(0) # ---------------------------------------------------------------------------------------------------- # Open associated masked depth files in a directory with the same name as the skeleton file if (os.path.isdir(_masked_depth_pathname_) == True): print("Open masked depth files in: ", _masked_depth_pathname_) # Read the data from the masked depth directory all_masked_depth_frames = l_MD.read_masked_depth_data( _masked_depth_pathname_) else: print("\nNo depth mask directory with name: ", _masked_depth_pathname_) print("Leave script now.\n") exit(0) # ---------------------------------------------------------------------------------------------------- # Train the SOM with the data if (_som_): _som_.train_som(all_masked_depth_frames) else: pass # ---------------------------------------------------------------------------------------------------- # Moin Franz, # Ich hab dir hier den Funktionsaufruf für die Hoj3D Funktion schon definiert. # Du brauchst dafür nur die Skeleton daten als Input. ( Soweit ich mich erinnere ) # Aufbau der Daten: # # all_skeleton_frames ( liste von frames ) # |_> jeder Frame enthält den Frame_header und eine Liste von Joints des zugehörigen Skeletons # # -> frameHeader.py und joint.py sollten dir weitere Informationen dazu liefern # -> Sollten weitere Fragen auftauchen -> schreib mir ne Mail, ich versuch sie trotz Urlaub so schnell wie möglich zu # beantworten # # # Franz i = 0 for frame in all_skeleton_frames: list_of_joints = frame.get_ListOfJoints() # gget joints from the paper 3, 5, 9, 6, 10, 13, 17, 14, 18, 12, 16 # joints_to_compute = [] # joints_to_compute.append(list_of_joints[3]) # head 0 # joints_to_compute.append(list_of_joints[5]) # l elbow 1 # joints_to_compute.append(list_of_joints[9]) # r elbow 2 # joints_to_compute.append(list_of_joints[6]) # l hand 3 # joints_to_compute.append(list_of_joints[10]) # r hand 4 # joints_to_compute.append(list_of_joints[13]) # l knee 5 # joints_to_compute.append(list_of_joints[17]) # r knee 6 # joints_to_compute.append(list_of_joints[14]) # l feet 7 # joints_to_compute.append(list_of_joints[18]) # r feet 8 # joints_to_compute.append(list_of_joints[12]) # l hip 9 # joints_to_compute.append(list_of_joints[16]) # r hip 10 # hip center, spine, hip right, hip left hoj3d_set = h3d.compute_hoj3d( list_of_joints, list_of_joints[0], list_of_joints[1], list_of_joints[16], list_of_joints[12], joint_indexes=[3, 5, 9, 6, 10, 13, 17, 14, 18, 12, 16], use_triangle_function=True) filename = "{0:0=3d}".format(i) h3d_t.write_hoj3d(filename, hoj3d_set) i += 1 # break # # ---------------------------------------------------------------------------------------------------- # Let the show begin if show_stream == True: r_W.show_stream(all_masked_depth_frames, all_skeleton_frames, verbose)