def main(): learn.learn() # print(learn.perc) f = open("wdbcl.data", "rU") datas = [] for line in f: data = line.split(",") del data[0] datas.append(data) for data in datas: test(data) print "tn", tn print "tp", tp print "fn", fn print "fp", fp p = float(tp) / (tp + fp) r = float(tp) / (tp + fn) f1 = 2 * p * r / (p + r) print ("Precision = " + str(p)) print ("Recall = " + str(r)) print ("F1-metric = " + str(f1)) f.close()
def main(): try: parser = get_args_parser() args = parser.parse_args() if args.command == "predict_proba": predict_proba(args.input_path, args.model_path, args.device) elif args.command == "predict": print(predict(args.input_path, args.model_path, args.device)) elif args.command == "learn": learn(args.location, args.num_samples, args.device) elif args.command == "crossval": crossval(path=args.model_path) elif args.command in ["locations", "ls"]: locations(args.model_path) elif args.command == "rename": rename_label(args.label, args.new_label) print("Retraining model...") train_model() elif args.command == "train": train_model(args.model_path) else: parser.print_help() parser.exit(1) except (KeyboardInterrupt, SystemExit): exit()
def random_reply(): log.info('making random reply') # Choose a random submission from /r/all that is currently hot submission = random.choice(list(api.subreddit('all').hot())) sub_name = submission.subreddit.display_name brain = "{}/{}.db".format(DB_DIR, sub_name) if not glob.glob(brain): learn(sub_name) reply_brain = bot.Brain(brain) # Replace the "MoreReplies" with all of the submission replies submission.comments.replace_more(limit=0) # Choose a random top level comment comment = random.choice(submission.comments.list()) try: # Pass the users comment to chatbrain asking for a reply response = reply_brain.reply(comment.body) except Exception as e: log.error(e, exc_info=False) try: # Reply tp the same users comment with chatbrains reply reply = comment.reply(response) except Exception as e: log.error(e, exc_info=False)
def learn_tree(node, max_depth, min_size, depth): 'node是一个字典,通过初始赋值或者learn函数得到,其包含:groups(经learn函数分类后得到含有两个向量的向量组),left(左向量,若left可再分,则作为下一个node,若不可再分,则获得判断),right(同左向量)' #将左右向量分别取出进行判断 print depth left, right = node['groups'] print left, right print node['index'], node['value'] del (node['groups'] ) #数据本身并不重要,我们关注的是对数据划分的方法(分类点),当前node已经保存了上一个数据的分类点,我们现在关注的是是否继续划分 #若左右向量有一个为空,则说明learn函数不能将数据进行划分,则停止递归 if not left or not right: print 'empty voctor' node['left'] = node['right'] = class_value(left + right) return #说起来你可能不信:这个函数有很多return,但这确实是一个没有返回值的函数 #若达到最大深度,则不再延伸枝叶(递归) 最大深度如何得到?玄学! if depth >= max_depth: print 'max depth' node['left'], node['right'] = class_value(left), class_value(right) return #若某个向量小于最小划分程度,则不再延伸枝叶(划分) 递归亦是对某一侧向量的进一步划分 if len(left) <= min_size: print 'left min size' node['left'] = class_value(left) #若满足继续划分的条件,则将某侧向量通过learn函数变为下一个node learn函数既是对向量组的划分,亦可将一个向量组变为一个node else: print 'left' node['left'] = learn(left) #生成某侧向量的下一节点 learn_tree(node['left'], max_depth, min_size, depth + 1) if len(right) <= min_size: print 'right min size' node['right'] = class_value(right) else: print 'right' node['right'] = learn(right) learn_tree(node['right'], max_depth, min_size, depth + 1)
def main(): learn.learn() #print(learn.perc) f = open('wdbcl.data', 'rU') datas = [] for line in f: data = line.split(',') del data[0] datas.append(data) for data in datas: test(data) print "tn", tn print "tp", tp print "fn", fn print "fp", fp p = float(tp) / (tp + fp) r = float(tp) / (tp + fn) f1 = 2 * p * r / (p + r) print("Precision = " + str(p)) print("Recall = " + str(r)) print("F1-metric = " + str(f1)) f.close()
def random_reply(): log.info('making random reply') # Choose a random submission from /r/all that is currently hot submission = random.choice(list(api.subreddit('all').hot())) submission.comments.replace_more( limit=0 ) # Replace the "MoreReplies" with all of the submission replies sub_name = submission.subreddit.display_name brain = "{}/{}.db".format(DB_DIR, sub_name) if not glob.glob(brain): learn(sub_name) reply_brain = bot.Brain(brain) try: if prob(.35): #There's a larger chance that we'll reply to a comment. log.info('replying to a comment') comment = random.choice(submission.comments.list()) response = reply_brain.reply(comment.body) reply = comment.reply(response) log.info('Replied to comment: {}'.fomrat(comment)) log.info('Replied with: {}'.format(reply)) else: log.info('replying to a submission') # Pass the users comment to chatbrain asking for a reply response = reply_brain.reply(submission.title) submission.reply(response) log.info('Replied to Title: {}'.format(submission.title)) log.info('Replied with: {}'.format(response)) except Exception as e: log.error(e, exc_info=False)
def main(): learn.learn() print(learn.perc) f = open('wdbcl.data', 'rU') datas = [] for line in f: data = line.split(',') del data[0] datas.append(data) for data in datas: test(data) print "tn", tn print "tp", tp print "fn", fn print "fp", fp if tp == 0 and fp == 0: precis = 0.0 else: precis = float(tp) / (tp + fp) if tp == 0 and fn == 0: rec = 0.0 else: rec = float(tp) / (tp + fn) print ("Precision = " + str(precis)) print ("Recall = " + str(rec)) f.close()
def g (self, root): spamDict = {} hamDict = {} if not os.path.exists(root + "spamDict"): if not os.path.exists(self.user): os.mkdir(self.user) if not os.path.exists(self.user + "ham/"): os.mkdir(self.user + "ham/") if not os.path.exists(self.user + "spam/"): os.mkdir(self.user + "spam/") for filename in os.listdir(self.user + "ham/"): os.remove(self.user + "ham/" + filename) for filename in os.listdir(self.user + "spam/"): os.remove(self.user + "spam/"+filename) from learn import learn self.statusBar.setText(r"Wait, I'm learning \.\.\.\.\.") learn(root) self.statusBar.setText("learned") for line in open(root + "spamDict","r+"): key, data = line.split() spamDict[key] = data for line in open(root + "hamDict","r+"): key, data = line.split() hamDict[key] = data self.k = 1 # For Laplacian smoothing temp = shelve.open(root + "db") print "db" , temp C = temp["pOfHam"] # lenHams = temp["lenHams"] # lenSpams = temp["lenSpams"] # total = lenHams + lenSpams temp.close() pOfHam = C #(C*total+self.k)/(total+self.k*2) pOfSpam = 1-C #((1-C)*total+self.k)/(total+self.k*2) # pOfSpam = (60+self.k)/(90+self.k*2) # pOfHam = (30+self.k)/(90+self.k*2) print "pOfHam, pOfSpam",pOfHam,pOfSpam sKeyList = spamDict.keys() sKeyList.sort() totalWordFrequencyOfSpams = 0 for sKey in sKeyList: totalWordFrequencyOfSpams += int(spamDict[sKey]) hKeyList = hamDict.keys() hKeyList.sort() totalWordFrequencyOfHams = 0 for hKey in hKeyList: totalWordFrequencyOfHams += int(hamDict[hKey]) return pOfHam, pOfSpam, hamDict, spamDict, totalWordFrequencyOfHams, totalWordFrequencyOfSpams
def main(): choice='TP' prepare_dictionary(); if(choice == 'TP'): global pathVars; global pathToAuthors; global gloveText; for i, item in enumerate(pathVars): pathToFold = os.getcwd() + "/data/folds"+ "/" + pathTopics[i]+ "_folds"; reorderedData = reorder(readData.readData(pathVars[i], pathToAuthors, pathTopics[i]), pathToFold); learn(reorderedData, pathTopics[i]); elif(choice == 'E'): sentence = raw_input("Enter your sentence.."); topic = raw_input(' What is the topic? '); predict(sentence.lower(), topic);
def buttonPushed(): data = None word_thresh=0.01 objects = [] verb_forms = {} spec_post = False with_preps = True print "success" newhognew(file1) svminput.svminputfn() svmfile.svmfilefn() #classi() #print file1 #txt.insert(END, file1) data = yaml.load(file('C:/meenuneenu/project/libsvm-3.17/python/detail.txt', 'r')) pickle.dump(data, open("C:/meenuneenu/project/libsvm-3.17/python/pickled_files/data.pk", "wb")) learn_obj = learn(objects, word_thresh) gen_obj = Generator(learn_obj, data, spec_post, with_preps) final_sentence=gen_obj.run() for post_id in sorted(final_sentence): print "***", post_id for s_num in final_sentence[post_id]: for sentence in final_sentence[post_id][s_num]: #print sentence + '.'+ '\n' txt.insert(END, sentence) txt.insert(END, '.\n') deletefiles()
def calculateData(): inputsNumber = 2 firstLayerNueronsNumber = 2 secondLayerNueronsNumbe = 1 # Creation of a double-layer neural network. firstLayerNetworkWeightsMatrixBeforeLearn, \ secondLayerNetworkWeightsMatrixBeforeLearn = createNetworkWithRandomWeights(inputsNumber, \ firstLayerNueronsNumber, \ secondLayerNueronsNumbe) # Learning the double-layer neural network. firstLayerNetworkWeightsMatrixAfterLearn, \ secondLayerNetworkWeightsMatrixAfterLearn, \ firstLayerDataPlot, \ secondLayerDataPlot = learn(firstLayerNetworkWeightsMatrixBeforeLearn, \ secondLayerNetworkWeightsMatrixBeforeLearn, \ trainStringInputs, \ trainStringOutputs, \ learnSteps, \ maxLearnSteps, \ supposedNetworkError, \ numberOfShownExamplesPerStep) return firstLayerNetworkWeightsMatrixAfterLearn, \ secondLayerNetworkWeightsMatrixAfterLearn, \ firstLayerDataPlot, \ secondLayerDataPlot
def model0(): ''' Version 0 The most basic thing I can think of Read the whole directory and scan reviews Include negation and emoticon testing (latter is redundant) Average out scores Find standard deviation and remove band (mean-sigma, mean+sigma) (modify later) ''' bar = progressbar.ProgressBar() data = tools.loaddir("/home/aditya/Desktop/project/aclImdb/train/pos/") data.extend(tools.loaddir("/home/aditya/Desktop/project/aclImdb/train/neg/")) print "model: BEGIN" print "model: "+"reading data and scores" reviews=[]; scores=[] for w in bar(data): reviews.append(open(w[0]).read()) scores.append(w[1]) print "model: "+"read complete. now learning" bar = progressbar.ProgressBar() learn.learn(reviews, scores, bar=bar, verbose=True) print "model: learnt. now reviewing" connection = memory.gc_() cur = connection.cursor() cur.execute("insert into _dump select * from dump") print "model: dump backed up" cur.execute("select @av:= avg(times) from dump") cur.execute("delete from dump where times < @av") cur.execute("select @lim:=times from dump where element = 'good'") cur.execute("delete from dump where times > @lim") print "model: average filtering done" cur.execute(r"delete from dump where element like '%\'%\'%'") cur.execute(r"delete from dump where element like '%\'%' and element not like '%n\'t%';") print "model: filtering complete" cur.execute("select @rnf:=10/(max(score)-min(score)) from dump") cur.execute("update dump set score = score * @rnf") print "model: renormalization done. closing connection" connection.commit() connection.close() #def test(testdir): #def failure():
def files_in_a_flash(path): """ PARAMETERS: ----------- path: the path to the emplacement of the archives file in memory """ probabilities = learn.learn(path+'/sorted') sort.sort(probabilities, path)
def main(): choice = 'TP' prepare_dictionary() if (choice == 'TP'): global pathVars global pathToAuthors global gloveText for i, item in enumerate(pathVars): pathToFold = os.getcwd( ) + "/data/folds" + "/" + pathTopics[i] + "_folds" reorderedData = reorder( readData.readData(pathVars[i], pathToAuthors, pathTopics[i]), pathToFold) learn(reorderedData, pathTopics[i]) elif (choice == 'E'): sentence = raw_input("Enter your sentence..") topic = raw_input(' What is the topic? ') predict(sentence.lower(), topic)
def run(): config = tf.ConfigProto() # os.envrion['CUDA_VISIBLE_DEVICES'] = "0" # msg err # config.gpu_options.allow_growth = True with tf.Session(config=config): model.learn( policy=arch.A2CPolicy, env=SubprocVecEnv([*env.make_train(all_=True)]), n_steps=2048, total_timesteps=1000000, gamma=0.99, lam=0.95, lr=2e-4, log_interval=10, max_grad_norm=.5, vf_coef=0.5, ent_coef=0.01, )
def calculateData(): # Creation of a single-layer neural network. networkWeightsMatrixBeforeLearn = createNetworkWithRandomWeights(inputsNumber, nueronsNumber) # Learning the single-layer neural network. networkWeightsMatrixAfterLearn, dataPlot = learn(networkWeightsMatrixBeforeLearn,\ trainStringInputs,\ trainStringOutputs,\ learnSteps,\ maxLearnSteps,\ supposedNetworkError) # Obtaining the result of learning the single-layer neural network. # The result will be used to test the neural network. result = simulateNeuralNetwork(networkWeightsMatrixAfterLearn, trainStringInputs) return result, dataPlot
def run(self): # インスタンスを生成 converter = ConvertVector3() # 選択した女優のIDを入力していく for actor_id in self.actress: converter.getVector3(actor_id, 1) # 0のラベル付けをするための女優をランダムに選出 for other_id in self.actress: other_actor = converter.other_actor_random(other_id) converter.getVector3(other_actor, 0) # データーとラベル self.datas = converter.datas self.labels = converter.labels # 教師データー(データー,ラベル)を与えてモデルを生成 clf = learn(self.datas, self.labels) print(':', self.datas) print('-', self.labels) print(clf)
def testLearn2(self): sentence=u"てすと" learn.learn(sentence)
def testLearn1(self): sentence=u"テスト" learn.learn(sentence)
def random_reply(): log.info("making random reply") # Choose a random submission from /r/all that is currently hot if SUBREDDIT_LIST: subreddit = random.choice(SUBREDDIT_LIST) submission = random.choice(list(api.subreddit(subreddit).hot())) else: submission = random.choice(list(api.subreddit("all").hot())) submission.comments.replace_more( limit=0 ) # Replace the "MoreReplies" with all of the submission replies sub_name = submission.subreddit.display_name brain = "{}/{}.db".format(DB_DIR, sub_name) log.info(brain) if not glob.glob(brain): learn(sub_name) reply_brain = bot.Brain(brain) try: # if prob(.1): # small chance we advertise # content = share() # comment = random.choice(submission.comments.list()) # log.info('sharing - thanks for helping out!') # sharing = '{} {}'.format(content['comment'], content['url']) # reply = comment.reply(sharing) # log.info("Replied to comment: {}".format(comment.body)) # log.info("Replied with: {}".format(reply)) # return if prob(.35): # There's a larger chance that we'll reply to a comment. log.info("replying to a comment") comment = random.choice(submission.comments.list()) response = reply_brain.reply(comment.body) # We might not be able to learn enough from the subreddit to reply # If we don't, then pull a reply from the general database. if "I don't know enough to answer you yet!" in response: log.info( "I don't know enough from {}, using main brain db to reply" .format(sub_name)) brain = "{}/{}.db".format(DB_DIR, "brain") reply_brain = bot.Brain(brain) response = reply_brain.reply(comment.body) reply = comment.reply(response) log.info("Replied to comment: {}".format(comment.body)) log.info("Replied with: {}".format(response)) else: log.info("replying to a submission") # Pass the users comment to chatbrain asking for a reply response = reply_brain.reply(submission.title) # same as above. nobody will ever see this so it's fine. if "I don't know enough to answer you yet!" in response: log.info( "I don't know enough from {}, using main brain db to reply" .format(sub_name)) brain = "{}/{}.db".format(DB_DIR, "brain") reply_brain = bot.Brain(brain) response = reply_brain.reply(submission.title) submission.reply(response) log.info("Replied to Title: {}".format(submission.title)) log.info("Replied with: {}".format(response)) except praw.exceptions.APIException as e: raise e except Exception as e: log.error(e, exc_info=False)
sys.exit() #Set argument variables training_file = sys.argv[1] test_file = sys.argv[2] max_instances = int(sys.argv[3]) #This will hold the list of attributes e.g. [wesley, romulan, poetry] attributes = [] #These will hold all of the training / testing vectors training_data = [] testing_data = [] #Load the data from file (training_data, attributes) = util.load_data(training_file) (testing_data, _) = util.load_data(test_file) #Create the root node of our tree and start recursive learning n = Node(training_data[:max_instances]) learn.learn(n, attributes) #Print the resulting tree util.printTree(n.leftNode, 0) util.printTree(n.rightNode, 1) #Print accuracy percentages print "\nAccuracy on training set (%d instances): %.1f%%" % ( len(training_data), learn.test(n, training_data) * 100) print "\nAccuracy on test set (%d instances): %.1f%%" % ( len(testing_data), learn.test(n, testing_data) * 100)
def main(matcher, combined=False): listOfTuples = [] for file in glob.glob(matcher): if not combined: if "physical+" in file: continue # for file in glob.glob("happy_Trevor.csv"): if file == "listOfTuples.csv" or file == "output.csv": continue infile = open(file, "r") print file tag = get_tag(infile, binary=False) # Convert converter = Converter(infile, tag) # Build up list listOfTuples = listOfTuples + converter.generate() return listOfTuples # printToFile("listOfTuples.csv", listOfTuples) # test = main("test/physical_pushups2.csv") learn(main("ProComp_CSV/*.csv")) # learn_with_test(main("ProComp_CSV/*.csv"), test) # Plot # plot(converter.skin, converter.filtered_skin, converter.heart, converter.filtered_heart)
pos_mult = positive_count / positive_texts except ZeroDivisionError: pos_mult = 0 try: neg_mult = negative_count / negative_texts except ZeroDivisionError: neg_mult = 0 weight = count * pos_mult - count * neg_mult sum += weight print(f'{word}: {weight}') except DoesNotExist: print("Slow down here mate:", word) print("Sum: ", sum) tone = "positive" if sum > 0 else "negative" print(f"This text is {tone}") if args.hybrid: while True: response = input(f"Is this correct? (y/n):") if response == "" or response == "y": if sum > 0: learn(["+ " + text], db_username, db_password) else: learn(["- " + text], db_username, db_password) break elif response == "n": if sum <= 0: learn(["+ " + text], db_username, db_password) else: learn(["- " + text], db_username, db_password) break
def build_tree(dataset, max_depth, min_size, depth=1): root = learn(dataset) learn_tree(root, max_depth, min_size, depth) return root
from time import time start = time() print 'Importing...', import prepare import learn print 'done' aAList = ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLU', 'GLN', 'GLY', 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL'] working_directory = 'C:/university/biology' prepare.prepare(working_directory) learn.learn(working_directory, True) print 'Total time taken: {} seconds'.format(time()-start)
def mainTestIter(withhold=0, params=None): from sklearn import cross_validation import learn #default value for params if params==None: params = {} params = dict({'withhold': 0, 'load': None, 'extractFile': None, 'trainFile': 'train.xml', 'testFile': 'testcases.xml', 'writePredict': False, 'outputFile': 'predictions.csv', # arguments to `learn` 'options': {}, # the option to cycle through 'option': None, # range of values to cycle through 'range': [], # k-fold cross-validation 'n_folds': 10 }, **params) trainfile = "train.xml" testfile = "testcases.xml" # TODO put the names of the feature functions you've defined above in this list ffs = [metadata_feats, unigram_noStop] print print "extracting training/testing features..." time1 = time.clock() # X_train, y_train, train_ids, X_test, y_test, test_ids = test.loadData(params, withhold, ffs) X, y, ids, _, _, _ = test.loadData(params, withhold, ffs) time2 = time.clock() print "done extracting training/testing features", time2-time1, "s" print "%d data, %d features" % X.shape print # options for the learning engine options = params['options'] # array to store MAEs for various values of learning options MAEs = [] print "iterating over values of %s from %s ... %s" % (params['option'], params['range'][0], params['range'][-1]) print "================================================================================" # iterate through each value of `params['option']` in `params['range']` # and calculate the MAE for that value for (i, value) in enumerate(params['range']): print "%s = %s" % (params['option'], str(value)) op = dict(options) op[params['option']] = value decomp = None # generate k cross-validation folds kf = cross_validation.KFold(len(y),n_folds=params['n_folds'],shuffle=True) print "k-fold cross-validation with %d folds" % params['n_folds'] cv_mae = [] # for each cv fold for train,tests in kf: # generate partition X_train, y_train, X_test, y_test = X[train], y[train], X[tests], y[tests] # train here, and return regression parameters print "learning..." time1 = time.clock() if 'reduction' in op and op['reduction'] != None: ((learned_w0, learned_w), decomp) = learn.learn(X_train, y_train, **op) else: (learned_w0, learned_w) = learn.learn(X_train, y_train, **op) time2 = time.clock() print "done learning, ", time2-time1, "s" print # make predictions print "making predictions..." if decomp is None: preds = X_test.dot(learned_w) + learned_w0 else: preds = decomp(X_test).dot(learned_w) + learned_w0 print "done making predictions" # cross-validate cv_mae.append(testMAE(preds, y_test)) print "MAE on withheld data: ", cv_mae[-1] print cv_mae_mean, cv_mae_std = np.mean(cv_mae), np.std(cv_mae) print print "Avg. MAE: %f" % cv_mae_mean print "Std. MAE: %f" % cv_mae_std MAEs.append((cv_mae_mean, cv_mae_std)) print "--------------------------------------------------------------------------------" print "================================================================================" # tabulate results results = dict() print "Options:" print options print print "Results:" print "%18s \t MAE \t std" % params['option'] for (i, value) in enumerate(params['range']): print "%18s \t %d \t %d" % (value, MAEs[i][0], MAEs[i][1]) if(isinstance(value, list)): value = tuple(value) results[value] = MAEs[i] return results
def kcv(knl, kpar, filt, t_range, X, y, k, task, split_type): #KCV Perform K-Fold Cross Validation. # [T_KCV_IDX, ERR_KCV] = KCV(KNL, KPAR, FILT, T_RANGE, X, Y, K, TASK, SPLIT_TYPE) # performs k-fold cross validation to calculate the index of the # regularization parameter 'T_KCV_IDX' within a range 'T_RANGE' # which minimizes the average cross validation error 'AVG_ERR_KCV' given # a kernel type 'KNL' and (if needed) a kernel parameter 'KPAR', a filter # type 'FILT' and a dataset composed by the input matrix 'X[n,d]' and the output vector # 'Y[n,1]'. # # The allowed values for 'KNL' and 'KPAR' are described in the # documentation given with the 'KERNEL' function. Moreover, it is possible to # specify a custom kernel with 'KNL='cust'' and 'KPAR[n,n]' matrix. # # The allowed values for 'FILT' are: # 'rls' - regularized least squares # 'land' - iterative Landweber # 'tsvd' - truncated SVD # 'nu' - nu-method # 'cutoff'- spectral cut-off # # The parameter 'T_RANGE' may be a range of values or a single value. # In case 'FILT' equals 'land' or 'nu', 'T_RANGE' *MUST BE* a single # integer value, because its value is interpreted as 'T_MAX' (see also # LAND and NU documentation). Note that in case of 'land' the algorithm # step size 'tau' will be automatically calculated (and printed). # # According to the parameter 'TASK': # 'class' - classification # 'regr' - regression # the function minimizes the classification or regression error. # # The last parameter 'SPLIT_TYPE' must be: # 'seq' - sequential split (as default) # 'rand' - random split # as indicated in the 'SPLITTING' function. # # Example: # [t_kcv_idx, avg_err_kcv] = kcv('lin', [], 'rls', logspace(-3, 3, 7), X, y, 5, 'class', 'seq') # [t_kcv_idx, avg_err_kcv] = kcv('gauss', 2.0, 'land', 100, X, y, 5, 'regr', 'rand') # # See also LEARN, KERNEL, SPLITTING, LEARN_ERROR k=math.ceil(k) if (k <= 1): print 'The number of splits in KCV must be at least 2' ## Split of training set: k=int(k) sets = splitting(y, k, split_type) ## Starting Cross Validation err_kcv=[] for i in range(0,k): err_kcv.append([]) #one series of errors for each split for split in range(0,k): print 'Split number', split test_idxs = sets[split] train_idxs = np.setdiff1d(np.arange(0,np.size(y,axis=0)), test_idxs) X_train = X[train_idxs, :] y_train = y[train_idxs, 0] y_train = np.reshape(y_train, (len(y_train),1)) X_test = X[test_idxs, :] y_test = y[test_idxs, 0] y_test = np.reshape(y_test, (len(y_test),1)) ## Learning alpha, err = learn(knl, kpar, filt, t_range, X_train, y_train, task) print err ## Test error estimation # Error estimation over the test set, using the parameters given by the # pprevious task K_test = KernelMatrix(X_test, X_train, knl, kpar) init_err_kcv=np.zeros((1, np.size(alpha, axis=1))) init_err_kcv=np.reshape(init_err_kcv, np.size(alpha, axis=1)) init_err_kcv=list(init_err_kcv) # On each split we estimate the error with each t value in the range err_kcv[split]=init_err_kcv for t in range(0, np.size(alpha, axis=1)): y_learnt = np.dot(K_test, alpha[:,t]) err_kcv[split][t] =learn_error(y_learnt, y_test, task) ## Average the error over different splits err_kcv=np.reshape(err_kcv, (np.size(err_kcv, axis=0), len(err_kcv[0]))) avg_err_kcv =[] for l in range(0, np.size(err_kcv, axis=1)): avg_err_kcv.append(np.median(err_kcv[:,l])) ## Calculate minimum error w.r.t. the regularization parameter #min_err = inf t_kcv_idx = -1; avg_err_kcv=np.reshape(avg_err_kcv, (1, len(avg_err_kcv))) ny=np.size(avg_err_kcv, axis=0) nx=np.size(avg_err_kcv, axis=1) for i in range(0, (nx*ny)): if i==0: min_err = avg_err_kcv[0,i] t_kcv_idx = i elif avg_err_kcv[0,i] <= min_err: min_err = avg_err_kcv[0,i] t_kcv_idx = i #np.size(alpha[t_kcv_idx], axis=0) #np.size(alpha[t_kcv_idx], axis=1) #alpha[t_kcv_idx] return t_kcv_idx, avg_err_kcv
def get_obs(): for i in range(1, 1001): fn = "../../logs/rand" + str(i) + ".log" fp = open(fn, "r") lines = fp.readlines() for j in range(len(lines)-1): add_to_dict(lines[j], lines[j + 1]) fp.close() if __name__ == "__main__": print "Collecting observations ..." get_obs() obs = [obs1] print "Learning ..." model = l.learn(obs, gamma, 5, True, 0.001) print "Storing model" fp = open("model.pk", "wb") pk.dump(model, fp) fp.close() print "Done!"
def f (self,lock, server, username, password, statusBar, listWidgetInbox, listWidgetSpam, textBoxInbox, textBoxSpam, pOfHamLearned, pOfSpamLearned, hamDictLearned, spamDictLearned, hamWordFreqLearned, spamWordFreqLearned, pOfHamUser, pOfSpamUser, hamDictUser, spamDictUser, hamWordFreqUser, spamWordFreqUser, d): print "f" if not os.path.exists(self.user): os.makedirs(self.user) if not os.path.exists(self.user + "ham"): os.makedirs(self.user + "ham") # files = glob.glob(self.user + "user_inbox" + '/*') # for f in files: # os.remove(f) if not os.path.exists(self.user + "spam"): os.makedirs(self.user+"spam") # files = glob.glob(self.user+"user_spam" + '/*') # for f in files: # os.remove(f) conn = imaplib.IMAP4_SSL(server, 993) conn.login(username, password) statusBar.setText("Connected") print "Connected" unreadCount = re.search("UNSEEN (\d+)", conn.status("INBOX", "(UNSEEN)")[1][0]).group(1) conn.select() typ, data = conn.search(None, 'ALL') i = 0 learn(self.user) if os.path.exists("spamFilter/user/"): if os.path.exists(self.user + "spam/"): for filename in os.listdir(self.user + "spam/"): self.listWidgetSpam.addItem(str(filename)) i = max(int(filename),i) if os.path.exists(self.user + "ham/"): for filename in os.listdir(self.user + "ham/"): self.listWidgetInbox.addItem(str(filename)) i = max(int(filename),i) for num in data[0].split()[i:]: if d.disconnectFlag == True: self.statusBar.setText("Disconnected") print "Disconnected" break typ, data = conn.fetch(num, '(RFC822)') print num time.sleep(10) message = "" message = data[0][1] try: lock.lockForWrite() h1, s1 = self.isHam(message, pOfHamLearned, pOfSpamLearned, hamDictLearned, spamDictLearned, hamWordFreqLearned, spamWordFreqLearned) h2, s2 = self.isHam(message, pOfHamUser, pOfSpamUser, hamDictUser, spamDictUser, hamWordFreqUser, spamWordFreqUser) hBar = (h1 + (self.n-1)*h2)/self.n sBar = (s1 + (self.n-1)*s2)/self.n print "hBar,sBar",hBar, sBar if (hBar-sBar > 0) : listWidgetInbox.addItem(str(num)) f = open(self.user + "ham/" + str(num),"w") f.write(message) f.close() else: listWidgetSpam.addItem(str(num)) f = open(self.user + "spam/" + str(num),"w") f.write(message) f.close() finally: lock.unlock() conn.close() conn.logout()
def on_pushButtonNotSpam_clicked(self): i = self.listWidgetSpam.takeItem(self.listWidgetSpam.currentRow()) text = i.text() shutil.move("spamFilter/user/spam/"+text, "spamFilter/user/ham/"+text) self.listWidgetInbox.addItem(i) learn("spamFilter/user/")
def mainPredict(params): import learn #default value for params if params==None: params = {} params = dict({'withhold': 0, 'load': None, 'extractFile': None, 'trainFile': 'train.xml', 'testFile': 'testcases.xml', 'writePredict': False, 'outputFile': 'predictions.csv', # arguments to `learn` 'options': {}, # the option to cycle through 'option': None, # range of values to cycle through 'range': [], # k-fold cross-validation 'n_folds': 2 }, **params) trainfile = "train.xml" testfile = "testcases.xml" # TODO put the names of the feature functions you've defined above in this list ffs = [metadata_feats, unigram_noStop] print print "extracting training/testing features..." time1 = time.clock() # X_train, y_train, train_ids, X_test, y_test, test_ids = test.loadData(params, withhold, ffs) X_train,global_feat_dict,y_train,train_ids = extract_feats(ffs, params['trainFile']) time2 = time.clock() print "done extracting training/testing features", time2-time1, "s" print # options for the learning engine options = params['options'] op = dict(options) decomp = None # train here, and return regression parameters print "learning..." time1 = time.clock() if 'reduction' in op and op['reduction'] != None: ((learned_w0, learned_w), decomp) = learn.learn(X_train, y_train, **op) else: (learned_w0, learned_w) = learn.learn(X_train, y_train, **op) time2 = time.clock() print "done learning, ", time2-time1, "s" print # load test features print "extracting test features..." X_test,_,y_ignore,test_ids = extract_feats(ffs, params['testFile'], global_feat_dict=global_feat_dict) print "done extracting test features" print # make predictions print "making predictions..." if decomp is None: preds = X_test.dot(learned_w) + learned_w0 else: preds = decomp(X_test).dot(learned_w) + learned_w0 print "done making predictions" print print "writing predictions..." util.write_predictions(preds, test_ids, params['outputFile']) print "done!"
log.info("user is " + str(reddit.api.user.me())) if __name__ == '__main__': log.info('db size size to start replying:' + str(bytesto(limit, 'm'))) while True: if os.path.isfile(MAIN_DB): size = os.path.getsize(MAIN_DB) log.info('db size: ' + str(bytesto(size, 'm'))) else: size = 0 if size < limit: # learn faster early on log.info('fast learning') learn() try: log.info('new db size: ' + str(bytesto(os.path.getsize(MAIN_DB), 'm'))) except: pass countdown(5) if size > limit: # once we learn enough start submissions and replies log.info('database size is big enough') if prob(0.02): # 2% chance we reply to someone reddit.random_reply() if prob(0.00): # 1% chance we make a random submission
# 复制当前局势,然后随机下棋256次,输出其中最多能赢多少子-->实质就是MCTS中算某结点胜利的次数 score_list = [] b_cpy = Board() # 模拟256次 for i in range(256): b.copy(b_cpy) b_cpy.rollout(show_board=False) score_list.append(b_cpy.score()) # 选出得分表中出现最多次的得分 # [(74.0, 25), (-88.0, 15), (10.0, 13), (16.0, 11), (-2.0, 11), (4.0, 9), (-18.0, 9), (-16.0, 8), (6.0, 7), # (8.0, 7), (14.0, 6), (22.0, 6), (-28.0, 6), (-26.0, 6), (-24.0, 6), (-6.0, 6), (2.0, 5), (-48.0, 5), # (-22.0, 5), (-12.0, 5),(0.0, 4), (24.0, 4), (26.0, 4), (-20.0, 4), (-8.0, 4), (-4.0, 4), (20.0, 3), # (-56.0, 3), (-46.0, 3), (-44.0, 3), (-40.0, 3), (-38.0, 3), (-36.0, 3), (-34.0, 3), (-32.0, 3), (-30.0, 3), # (28.0, 2), (30.0, 2), (32.0, 2), (36.0, 2), (44.0, 2), (48.0, 2), (-54.0, 2), (-42.0, 2), (-10.0, 2), # (34.0, 1), (38.0, 1), (42.0, 1), (46.0, 1), (50.0, 1), (-72.0, 1), (-62.0, 1), (-60.0, 1), (-58.0, 1), # (-52.0, 1), (-50.0, 1), (56.0, 1), (-14.0, 1)] score = Counter(score_list).most_common(1)[0][0] if score == 0: result_str = "Draw" else: winner = "B" if score > 0 else "W" result_str = "%s+%.1f" % (winner, abs(score)) sys.stderr.write("result: %s\n" % result_str) else: # launch_mode == 2、Learn: path = input('input the path of learned sgfs') learn.learn(3e-4, 0.5, sgf_dir="{}".format(path), use_gpu=use_gpu, gpu_cnt=1)
move = b.random_play() elif quick: move = rv2ev(np.argmax(tree.evaluate(b)[0][0])) b.play(move, False) else: move, _ = tree.search(b, 0, clean=clean) b.play(move, False) b.showboard() if prev_move == PASS and move == PASS: break score_list = [] b_cpy = Board() for i in range(256): b.copy(b_cpy) b_cpy.rollout(show_board=False) score_list.append(b_cpy.score()) score = Counter(score_list).most_common(1)[0][0] if score == 0: result_str = "Draw" else: winner = "B" if score > 0 else "W" result_str = "%s+%.1f" % (winner, abs(score)) sys.stderr.write("result: %s\n" % result_str) else: learn.learn(3e-4, 0.5, sgf_dir="sgf/", use_gpu=use_gpu, gpu_cnt=1)
feed = match.Feed() for i in range(100): print("%d total games / next epoch: %d " % (i * args.game_cnt, i + 1)) acc = match.feed_match(feed, args.game_cnt, args.search_limit, ckpt_path, args.initial_life, use_gpu=args.gpu, gpu_idx=0, reuse=(i != 0), show_info=args.verbose) acc_list.append(acc) if len(acc_list) >= 3 and acc_list[-3:] == terminate_list: print("\naccuracy seems to be stable at 100%") break fp = match.FeedPicker(feed) learn.learn(fp, ckpt_path, 1e-4, use_gpu=args.gpu, gpu_cnt=args.gpu_cnt) ckpt_path = "ckpt/model" # check if ckpt files exists if glob.glob("ckpt/*.data*") == []: print("ckpt files not found.") print("use \'--learn\' option to start learning or copy files from \'pre-train/ckpt\' to \'ckpt\' directory.") exit(0) # show game record match.test_match(args.gpu, args.search_limit, args.initial_life, reuse=args.learn, show_info=args.verbose)
def init(): log.info("db size size to start replying:" + str(bytesto(MAIN_DB_MIN_SIZE, "m"))) reddit.shadow_check() # check if this is the first time running the bot set_user_info() check_first_run() set_db_size() while True: if get_db_size( ) < MAIN_DB_MIN_SIZE and not COMMENTS_DISABLED: # learn faster early on log.info(""" THE BOT IS WORKING. IT WILL TAKE ABOUT 8 HOURS FOR IT TO LEARN AND START COMMENTING. """) log.info("fast learning") learn() try: log.info("new db size: " + str(bytesto(get_db_size(), "m"))) except: pass set_db_size() countdown(2) if (get_db_size() > MAIN_DB_MIN_SIZE or COMMENTS_DISABLED ): # once we learn enough start submissions and replies log.info("database size is big enough") if USE_SLEEP_SCHEDULE: while should_we_sleep(): log.info("zzzzzzzz :snore:") time.sleep(60) for action in reddit_bot: if action.rate_limit_unlock_epoch != 0: if action.rate_limit_unlock_epoch > get_current_epoch(): log.info( "{} hit RateLimit recently we need to wait {} seconds with this" .format( action.name, action.rate_limit_unlock_epoch - get_current_epoch(), )) continue else: action._replace(rate_limit_unlock_epoch=0) else: if prob(action.probability): log.info("making a random {}".format(action.name)) try: action.action() except praw.exceptions.APIException as e: secs_to_wait = get_seconds_to_wait(str(e)) action._replace( rate_limit_unlock_epoch=(get_current_epoch() + secs_to_wait)) log.info( "{} hit RateLimit, need to sleep for {} seconds" .format(action.name, secs_to_wait)) except Exception as e: log.error("something weird happened, {}".format(e), exc_info=True) if prob(PROBABILITIES["LEARN"]): # chance we'll learn more log.info("going to learn") learn() # Wait 10 minutes to comment and post because of reddit rate limits countdown(1) log.info("end main loop")
from fileinput import read_cards, write_cards from sys import argv from learn import learn from functools import partial from cli import rate_fn, show_fn if __name__ == '__main__': if len(argv) <= 1: print("Error: no file specified.") exit(1) filename = argv[1] if len(argv) >= 3: try: num_to_learn = int(argv[2]) except ValueError: print("Invalid number of cards to learn.") else: num_to_learn = -1 cards = list(read_cards(filename)) learn(cards, show_fn=show_fn, rate_fn=rate_fn, write_fn=partial(write_cards, filename, cards), num_cards=num_to_learn)
auth = lg_authority.AuthRoot() auth__doc = "The object that serves authentication pages" @cherrypy.expose def index(self): output = "" output += getIndexContent() output = getPage(output, '') return output if __name__ == '__main__': #cherrypy.config.update({'server.socket_port':index_port}) cherrypy.config.update(cherry_settings) index = index() index.upload = upload.upload() index.manage = manage.manage() index.modify = modify.modify() index.download = download.download() index.learn = learn.learn() index.support = support.support() index.visualize = visualize.visualize() #index.dashboard = dashboard.dashboard() cherrypy.quickstart(index)
def train(dataset, k=20, link_func="Round"): import learn import learn_round import logging from time import localtime, strftime from os.path import expanduser fold = "0" mini_batch_size = 1000 init_step = 1 beta = 0.01 data_path = "data/" + dataset + "/" debug_parent = "data/" + dataset + "/" + fold + "/" + link_func + "/" print debug_parent timef = strftime("%Y-%m-%d-%H-%M-%S", localtime()) debug_dir = debug_parent + timef + "/" + str(k) + "/" ensure_dir(debug_dir) logging.basicConfig(format='%(levelname)s %(asctime)s: %(message)s', level=logging.INFO) import json param_f = open(debug_dir + "params.json", 'w') params = { "fold": fold, 'k': k, 'name': timef, 'mini_batch_size': mini_batch_size, 'beta': beta, 'init_step': init_step, 'link_func': link_func, 'dataset': dataset } json.dump(params, param_f, indent=2) print json.dumps(params, param_f, indent=2) logging.info(json.dumps(params, param_f, indent=2)) param_f.close() import cPickle as pickle import gzip U_f = gzip.open(debug_dir + "U." + str(iter) + ".pkl", 'wb') pickle.dump(beta, U_f) U_f.close() if link_func == "Round": link = learn_round.logistic g_link = learn.g_logistic else: (link, g_link) = learn.get_link(link_func) train_file = "data/" + 'u1.base' test_file = "data/" + 'u1.test' train, n1, m1, D1 = read_file(train_file) test, n2, m2, D2 = read_file(test_file) n = max(n1, n2) m = max(m1, m2) D = max(D1, D2) if link_func == "Linear": D = 2 print n, m, D print "Train:", len(train) print "Test :", len(test) def iter_f(iter, U, V, B, errs): import cPickle as pickle import gzip U_f = gzip.open(debug_dir + "U." + str(iter) + ".pkl", 'wb') pickle.dump(U, U_f) U_f.close() V_f = gzip.open(debug_dir + "V." + str(iter) + ".pkl", 'wb') pickle.dump(V, V_f) V_f.close() B_f = gzip.open(debug_dir + "B." + str(iter) + ".pkl", 'wb') pickle.dump(B, B_f) B_f.close() errs_f = open(debug_dir + "errs.pkl", 'w') pickle.dump(errs, errs_f) errs_f.close() if link_func == "Round": U, V, B, errs, _ = learn_round.learn(train, n, m, k, link, g_link, D=D, beta=beta, init_step=init_step, mini_batch_size=mini_batch_size, iter_f=iter_f, iters=40) elif link_func == "Multi-sigmoid": U, V, B, errs = learn.learn(train, n, m, k, link, g_link, D=D, beta=beta, init_step=init_step, mini_batch_size=mini_batch_size, iter_f=iter_f, iters=40) else: U, V, B, errs = learn.learn(train, n, m, k, link, g_link, D=D, beta=beta, init_step=init_step, mini_batch_size=mini_batch_size, iter_f=iter_f, update_B=False, iters=40) return