def getRawLog(N, user_amount, item_amount, features): 'Get raw log' if N <= 0: return [] raw_log = [] all_user_dic = {} all_item_dic = {} for i in range(user_amount): user = User('user ' + str(i)) user.createProfile(features) all_user_dic['user ' + str(i)] = user for j in range(item_amount): item = Item('item ' + str(j)) item.createContentVector(features) all_item_dic['item ' + str(j)] = item moment = time.time() gap = 5 j = 0 for uid in all_user_dic.keys(): for i in range(N): random_item = 'item ' + str((int)(random.random() * item_amount)) random_time = moment + j * gap raw_log.append((uid, random_item, random_time)) j += 1 shuffle(raw_log) return raw_log, all_user_dic, all_item_dic
def spliDtata(self): shuffle(self.train_data) # 切分出测试样本和训练样本 split_index = int(np.ceil(len(self.train_data) * self.splitline)) self.test_data = self.train_data[split_index:] self.train_data = self.train_data[:split_index]
def test_shuffle_of_array_of_objects(self): # Test that permuting an array of objects will not cause # a segfault on garbage collection. # See gh-7719 random.seed(1234) a = np.array([np.arange(1), np.arange(4)]) for _ in range(1000): random.shuffle(a) # Force Garbage Collection - should not segfault. import gc gc.collect()
def test_shuffle_of_array_of_different_length_strings(self): # Test that permuting an array of different length strings # will not cause a segfault on garbage collection # Tests gh-7710 random.seed(1234) a = np.array(['a', 'a' * 1000]) for _ in range(100): random.shuffle(a) # Force Garbage Collection - should not segfault. import gc gc.collect()
def generateLog(self, N): 'Generate raw log' self.raw_log = [] moment = time.time() gap = 2 j = 0 for u in range(self.user_amount): for i in range(N): random_item = 'item ' + str((int)(random.random() * self.item_amount)) random_tag = 'tag ' + str((int)(random.random() * self.tag_amount)) random_moment = moment + gap * j self.raw_log.append(('user ' + str(u), random_item, random_tag, random_moment)) shuffle(self.raw_log)
def getRawLog(N, user_amount, item_amount, tag_amount): 'Get raw log whose each record is (u, i, t)' if N <= 0: return [] raw_log = [] for uid in range(user_amount): for i in range(N): random_u = 'user ' + str(uid) random_i = 'item ' + str((int)(random.random() * item_amount)) random_t = 'tag ' + str((int)(random.random() * tag_amount)) raw_log.append((random_u, random_i, random_t)) shuffle(raw_log) return raw_log
def getRawLog(N, user_amount, item_amount, tag_amount): 'Get raw log whose each record is (u, i, t)' if N <= 0: return [] raw_log = [] for uid in range(user_amount): for i in range(N): random_u = 'user ' + str(uid) random_i = 'item ' + str((int)(random.random() * item_amount)) random_t = 'tag ' + str((int)(random.random() * tag_amount)) raw_log.append((random_u, random_i, random_t)) shuffle(raw_log) return raw_log
def autoGetRawLog(N, user_amount, item_amount): 'Auto generate raw log' if N <= 0: return [] raw_log = [] moment = time.time() gap = 5 j = 0 for uid in range(user_amount): for i in range(N): random_item = 'item ' + str((int)(random.random() * item_amount)) random_time = moment + j * gap raw_log.append(('user ' + str(uid), random_item, random_time)) j += 1 shuffle(raw_log) return raw_log
def autoGetRawLog(N, user_amount, item_amount): 'Auto generate raw log' if N <= 0: return [] raw_log = [] moment = time.time() gap = 5 j = 0 for uid in range(user_amount): for i in range(N): random_item = 'item ' + str((int)(random.random() * item_amount)) random_time = moment + j * gap raw_log.append(('user ' + str(uid), random_item, random_time)) j += 1 shuffle(raw_log) return raw_log
def __init__( self, data_path=r'/home/xinye/workingdirectory/PyCodeFragment/data/resized_animal', splitline=0.9): """这个版本直将图片载入内存,对于4g图片,考虑一次只缓存图片的全路径,feed之前再读入图片""" # 各个图片文件夹的名字 animal_path_name = os.listdir(data_path) # 生成每个类别的编码 labels = np.zeros((len(animal_path_name), len(animal_path_name))) for i in range(len(labels)): labels[i][i] = 1 print(animal_path_name[i], '==>', labels[i]) # 图片文件夹全路径 animal_paths = [ os.path.join(data_path, filename) for filename in animal_path_name ] self.train_data = list() for i in range(len(animal_paths)): image_paths = self.getImagePaths(animal_paths[i]) print('获得%s' % animal_path_name[i]) for filename in image_paths: pic = Image.open(filename) self.train_data.append([np.asarray(pic).flatten(), labels[i]]) if len(self.train_data[-1][0]) != height * width * channel: print('图片大小异常(检查图片格式)%d--%s' % (len(self.train_data[-1][0]), filename)) pic.close() print('......\n图片提取完成\n') # 转换成numpy并打乱顺序 self.train_data = np.array(self.train_data) print(self.train_data.shape) shuffle(self.train_data) shuffle(self.train_data) # 切分出测试样本和训练样本 split_index = int(np.ceil(len(self.train_data) * splitline)) self.test_data = self.train_data[split_index:] self.train_data = self.train_data[:split_index] # 取数据标记 self.flag = 0
def generateLog(self, N): 'Generate raw log' self.raw_log = [] moment = time.time() gap = 2 j = 0 for u in range(self.user_amount): for i in range(N): random_item = 'item ' + str( (int)(random.random() * self.item_amount)) random_tag = 'tag ' + str( (int)(random.random() * self.tag_amount)) random_moment = moment + gap * j self.raw_log.append( ('user ' + str(u), random_item, random_tag, random_moment)) shuffle(self.raw_log)
def getRawLog(N, user_amount, item_amount): 'Get raw log whose each record is (u, i, t)' if N <= 0: return [] raw_log = [] moment = time.time() gap = 5 j = 0 for u in range(user_amount): for i in range(N): random_i = 'item ' + str((int)(random.random() * item_amount)) random_t = moment + gap * j raw_log.append(('user ' + str(u), random_i, random_t)) j += 1 shuffle(raw_log) return raw_log
def get_text_pairs(self): self.get_datasets() k_pairs = list(permutations(self.datasets.keys(), 2)) print('\nCreating pairs: ') print('Progress: #', end='') for p in k_pairs: key = re.sub(r'\s[-]\sBíblia Completa.csv', '', str(p)) key = re.sub(r'\s[-]\sNovo Testamento.csv', '', str(p)) key = re.sub(r'\(', '', key) key = re.sub(r'\)', '', key) key = re.sub(r'[,]', ' -', key) key = re.sub(r'[\']', '', key) pair_text = [] print('#', end='') self.datasets[p[0]]['Scripture'].align( self.datasets[p[1]]['Scripture']) for r_1, r_2 in zip(self.datasets[p[0]]['Scripture'], self.datasets[p[1]]['Scripture']): try: pair_text.append(' '.join(str(r_1).split()) + '\t' + ' '.join(str(r_2).split()) + '\n') except AttributeError: print(AttributeError) breakpoint() shuffle(pair_text) self.data_pairs[key] = pair_text return self.data_pairs
def doArena(n: mcts2.INeuralNet, mcts: mcts2.MCTS, doTrain=True): # otherPlayer = Player("Marcel", lambda x: HumanPlayer().play(x)) # otherPlayer = Player("random", lambda x: RandomPlayer().play(x)) otherPlayer = Player("neural OLD", lambda x: np.argmax(mcts2.getActionProbabilities(x, 0))) neuralPlayer = Player("neural", lambda x: np.argmax(mcts.getActionProbabilities(x, 0))) a = Arena(neuralPlayer, otherPlayer, moaraGame, moara.args, mcts) result = a.playGames(5, verbose=False) if doTrain: # train the network based on the arena games trainExamples = [] for e in a.trainExamplesHistory: trainExamples.extend(e) shuffle(trainExamples) if trainExamples != []: n.train(trainExamples) # test against the previous # if i % 5 == 0: # # self.PitAgainst('no36.neural.data-ITER-390') # PitAgainst(moara.filename - 1) n.save_checkpoint(folder=moara.args.checkpoint, filename_no=moara.args.filename)
def __init__(self, data_path=r'/home/xinye/workingdirectory/PyCodeFragment/data/resized_animal', splitline=0.9): """这个版本直将图片载入内存,对于4g图片,考虑一次只缓存图片的全路径,feed之前再读入图片""" # 各个图片文件夹的名字 animal_path_name = os.listdir(data_path) # 生成每个类别的编码 labels = np.zeros((len(animal_path_name), len(animal_path_name))) for i in range(len(labels)): labels[i][i] = 1 print(animal_path_name[i], '==>', labels[i]) # 图片文件夹全路径 animal_paths = [os.path.join(data_path, filename) for filename in animal_path_name] self.train_data = list() for i in range(len(animal_paths)): image_paths = self.getImagePaths(animal_paths[i]) print('获得%s' % animal_path_name[i]) for filename in image_paths: pic = Image.open(filename) self.train_data.append([np.asarray(pic).flatten(), labels[i]]) if len(self.train_data[-1][0]) != height * width * channel: print('图片大小异常(检查图片格式)%d--%s' % (len(self.train_data[-1][0]), filename)) pic.close() print('......\n图片提取完成\n') # 转换成numpy并打乱顺序 self.train_data = np.array(self.train_data) print(self.train_data.shape) shuffle(self.train_data) shuffle(self.train_data) # 切分出测试样本和训练样本 split_index = int(np.ceil(len(self.train_data) * splitline)) self.test_data = self.train_data[split_index:] self.train_data = self.train_data[:split_index] # 取数据标记 self.flag = 0
def train_test_split(samples_list, train_test_ratio=0.5): # print(samples_list) test_set_samples_count = int(len(samples_list) * 0.5) # print(test_set_samples_count) uncontrolled_samples_list = samples_list[samples_list[:, 2] == 'uncontrolled'] insitu_samples_list = samples_list[samples_list[:, 2] == 'insitu'] controlled_samples_list = samples_list[samples_list[:, 2] == 'controlled'] shuffle(uncontrolled_samples_list) shuffle(insitu_samples_list) shuffle(controlled_samples_list) rearranged_samples_list = [] rearranged_samples_list.extend(uncontrolled_samples_list) rearranged_samples_list.extend(insitu_samples_list) rearranged_samples_list.extend(controlled_samples_list) test_set_samples_list = rearranged_samples_list[:test_set_samples_count] training_set_samples_list = rearranged_samples_list[ test_set_samples_count:] return training_set_samples_list, test_set_samples_list
def cards(): global game global check feed = 0 p1d = [] p2d = [] Bicicle = deck() Bicicle.shufffle() purple = Bicicle.getdeck() p1 = [] for x in range((int(len(purple)/2))): p1.append(purple[0]) del purple[0] print(p1) p2 = purple print(p2) print(len(p1)) print(len(p2)) game = 'lol' print('hi') def stack(player): global game if player == 1: for asd in range(len(p1d)): p1.append(p1d[0]) del p1d[0] if player == 2: for abc in range(len(p2d)): p2.append(p2d[0]) del p2d[0] def chek(player,thresh,w1,w2): global game global f1 global f2 if player == 1: if len(p1)+len(p1d) < thresh: print("thresh: ", thresh) game = 'xd' print('PLAYER 2 WINS!!!') f2 = f2+1 return if len(p1) < thresh: stack(1) if player == 2: if len(p2)+len(p2d) <thresh: game = 'xd' print("thresh", thresh) print('PLAYER 1 WINS!!!') f1 = f1+1 return if len(p2) < thresh: stack(2) while game == 'lol': if game != 'lol': warq = 1 if check == 'q': game = 'xd' gtry = 'no' chek(1,1,w1,w2) chek(2,1,w1,w2) if game == 'lol': drew1 = p1[0] drew2 = p2[0] del p1[0] del p2[0] if game != 'lol': warq = 1 print(game) shuffle(p1) shuffle(p2) chek(1,1,w1,w2) chek(2,1,w1,w2) #print('Player 1 Drew: ', drew1) #print('Player 2 Drew: ', drew2) if drew1.getnum() > drew2.getnum(): #print('PLAYER 1 PICKS UP CARDS! ') p1d.append(drew1) p1d.append(drew2) print(drew1,drew2) print('no') elif drew1.getnum() < drew2.getnum(): #print('PLAYER 2 PICKS UP CARDS! ') p2d.append(drew1) p2d.append(drew2) print('yes') else: warq = 0 stwar = 0 pp1 = 0 pp2 = 0 stw1 = 0 stw2 = 0 drew1 = [] drew2 = [] while warq == 0: print('this is warq: ',warq) stwar = stwar + 4 stw1 = stwar stw2 = stwar print(stwar) if game != 'lol': warq = 1 if game == 'lol': chek(1,stwar+1,w1,w2) chek(2,stwar+1,w1,w2) if game != 'lol': warq = 1 if game == 'lol': #print('=====WAR!=====') for x in range(stwar): drew1.append(p1[0]) del p1[0] for n in range(stwar): drew2.append(p2[0]) del p2[0] print(drew1) print(drew2) #print('Player 1 Drew: ', drew1.toString()) #print('Player 2 Drew: ', drew2.toString()) if len(drew1) != 0 and len(drew2) != 0: if drew1[-1].getnum() > drew2[-1].getnum(): #print('Player 1 picks up cards! ') for jp in range(len(drew1)): if game == 'lol': p2d.append(drew1[0]) del drew1[0] for jh in range(len(drew2)): p2d.append(drew2[0]) del drew2[0] warq = 1 print('boi') if len(drew1) != 0 and len(drew2) != 0: if drew1[-1].getnum() < drew2[-1].getnum(): #print('Player 2 picks up cards! ') for jz in range(len(drew1)): if game == 'lol': p2d.append(drew1[0]) del drew1[0] for ja in range(len(drew2)): p2d.append(drew2[0]) del drew2[0] warq = 1 print('boi') chek(1,stw2+2,w1,w2) chek(2,stw2+2,w1,w2)
def play(self, game): valids = game.getValidMoves(game.getCrtPlayer()) shuffle(valids) return valids[0]
def vectorize(normed): sequences = [normed[i:i + slidingWindowSize] for i in range(len(normed) - slidingWindowSize)] shuffle(sequences) # sequences = np.array(sequences) # sequences = pd.DataFrame(sequences) return sequences
# -*- coding: utf-8 -*- import numpy as np from numpy.random.mtrand import shuffle from sklearn.ensemble import RandomForestClassifier from sklearn import tree from sklearn.tree import export_graphviz import graphviz import matplotlib.pyplot as plt # 处理数据 filename = '../data/4.4.3-wine.csv' data = np.loadtxt(open(filename, "rb"), delimiter=",", skiprows=0) shuffle(data) X = data[:, :-1] y = data[:, -1] from sklearn.model_selection import train_test_split x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=99) # 训练模型 clf = RandomForestClassifier(n_estimators=3) #各种参数的含义和设置! clf.fit(x_train, y_train) label_predict = clf.predict(x_test) # print(clf.estimators_[0].tree_.n_node_samples) #模型评估 from sklearn.metrics import classification_report print(classification_report(y_test, label_predict))
def sgd( self, training_data, epochs=30, mini_batch_size=10, alpha=3.0, lmbda=0.1, evaluation_data=None, monitor_evaluation_cost=False, monitor_evaluation_accuracy=False, monitor_training_cost=False, monitor_training_accuracy=False ): """ Train the neural network using mini-batch stochastic gradient descent. The ``training_data`` is a list of tuples ``(x, y)`` representing the training inputs and the desired outputs. The other non-optional parameters are self-explanatory, as is the regularization parameter ``lmbda``. The method also accepts ``evaluation_data``, usually either the validation or test data. We can monitor the cost and accuracy on either the evaluation data or the training data, by setting the appropriate flags. The method returns a tuple containing four lists: the (per-epoch) costs on the evaluation data, the accuracies on the evaluation data, the costs on the training data, and the accuracies on the training data. All values are evaluated at the end of each training epoch. So, for example, if we train for 30 epochs, then the first element of the tuple will be a 30-element list containing the cost on the evaluation data at the end of each epoch. Note that the lists are empty if the corresponding flag is not set. """ n_data = None if evaluation_data: n_data = len(evaluation_data) n = len(training_data) evaluation_cost, evaluation_accuracy = [], [] training_cost, training_accuracy = [], [] for j in range(epochs): shuffle(training_data) mini_batches = [ training_data[k:k+mini_batch_size] for k in range(0, n, mini_batch_size)] for mini_batch in mini_batches: self.update_mini_batch( mini_batch, alpha, lmbda, len(training_data)) print("Epoch %s training complete" % j) if monitor_training_cost: cost = self.total_cost(training_data, lmbda) training_cost.append(cost) print("Cost on training data: {}".format(cost)) if monitor_training_accuracy: accuracy = self.accuracy(training_data, convert=True) training_accuracy.append(accuracy) print( "Accuracy on training data: {} / {}" .format(accuracy, n) ) if monitor_evaluation_cost: cost = self.total_cost(evaluation_data, lmbda, convert=True) evaluation_cost.append(cost) print("Cost on evaluation data: {}".format(cost)) if monitor_evaluation_accuracy: accuracy = self.accuracy(evaluation_data) evaluation_accuracy.append(accuracy) print( "Accuracy on evaluation data: {} / {}" .format(self.accuracy(evaluation_data), n_data) ) alpha = alpha * 0.9 # slowly decrease alpha per training iteration return ( evaluation_cost, evaluation_accuracy, training_cost, training_accuracy )