def trainSystem(directory, unigrams, bigrams, twd, ss, ssb, akw, flag): allText = "" for filename in os.listdir(directory): if filename.startswith(".") or filename.startswith("_"): continue # Open File and get the Text infile = open(directory + filename) filetext = infile.read() newfiletext = re.sub("_"," ",filetext) allText += re.sub("\n", ". ", newfiletext) + " " lines = filetext.split("\n") T.trainGrams(lines, unigrams, bigrams) T.trainStructures(lines, twd, ss, ssb) # Get TextRank Keywords if flag: keywords = tr.main(allText) for key in keywords: if " " not in key.encode('ascii', 'ignore'): akw.append(key.encode('ascii', 'ignore')) '''# Uncomment to use for making a most important words file for word in akw: print word exit(1)''' # Get Conditional Probabilities sentenceStructureProbs = CP.createSentenceProbs(ss, ssb) wordConditionalProbs = CP.getProbabilities(unigrams, bigrams) CP.sortConditionalProbs(wordConditionalProbs) return wordConditionalProbs, sentenceStructureProbs, akw
def main(): # Seed Sentence seed = sys.argv[1] # Directory that the lyrics are in directory = sys.argv[2] # Path to most important words file, NONE if you don't have one keywordsPath = sys.argv[3] # Optional # Key = Word Unigram # Value = Word Unigram Frequency unigrams = {} # Key = Word Bigram # Value = Word Bigram Frequency bigrams = {} # Key: Word Type, t # Value: Set() of words of type t typeWordDict = {} # Key: Structure of a line (ex: "PRP VBP NN") # Value: Frequency of the key sentenceStructures = {} # Key: Structure of 2 consecutive lines separated by a comma # - (ex: "PRP VBP NN,NNP IN DT NN") # Value: Frequency of the key sentenceStrctureBigram = {} # Key = Word Unigram # Value = Priority Queue Q (Highest Probability at the top) # - Get Highest Probability: wordConditionalProbs[key].get().word wordConditionalProbs = {} # Key = Sentence Structure # Value = Priority Queue Q (Highest Probability at the top) # - Get Highest Probability: sentenceStructureProbs[key].get().word sentenceStructureProbs = {} # List of most important words keywords = [] # Key = Part of speech tag # Value = Most frequent unigram of that POS tagToWord = {} # Key = Part of speech tag # Value = Most important unigram of that POS tagToWordImp = {} flag = True if keywordsPath == "NONE" else False # Train on the data wordConditionalProbs, sentenceStructureProbs, keywords = trainSystem(directory, unigrams, bigrams, typeWordDict, sentenceStructures, sentenceStrctureBigram, keywords, flag) tagToWord = T.findMostPopPOS(typeWordDict, unigrams) if not flag: keywords = T.getListFromFile(keywordsPath) tagToWordImp = T.getKeywordsForTag(keywords, tagToWord) # Generate the song generateSong(seed, wordConditionalProbs, sentenceStructureProbs, typeWordDict, tagToWord, tagToWordImp)
def findBestRegularization(s, x_sub, y_sub): regs = np.linspace(0, 10, 20) reg_acc_cv = [] reg_acc_train = [] max_acc = 0 best_reg = 0 for r in regs: th1, th2 = Train.trainSciPy2(s, x_sub, y_sub, r) acc_cv = accuracy_score(y_cv, [SimpleNN2.predictClass(s, th1, th2, w) for w in x_cv]) acc_train = accuracy_score(y_sub, [SimpleNN2.predictClass(s, th1, th2, w) for w in x_sub]) reg_acc_cv.append(acc_cv) reg_acc_train.append(acc_train) if max_acc < acc_cv: max_acc = acc_cv best_reg = r print("Validating regularization parameter [{0}]; Train accuracy: [{1}] CV accuracy: [{2}]" .format(r, acc_train, acc_cv)) print("Best reg param: {0} with accuracy on CV dataset: {1}".format(best_reg, max_acc)) plt.plot(regs, reg_acc_cv);plt.plot(regs, reg_acc_train) plt.show() return best_reg
def test1(): (x, y) = DataModel.loadData("..\\train.csv") (x_train, x_cv, y_train, y_cv) = DataModel.splitData(x, y) x_sub = x_train[:500,:] y_sub = y_train[:500] s = SimpleNN.SimpleNN([784, 70, 10]) #s = Train.trainGradientDescent(s, x_sub, y_sub, 5) s = Train.trainSciPy(s, x_sub, y_sub, 5) acc_cv = accuracy_score(y_cv, [s.predictClass(w) for w in x_cv]) print("Accuracy on CV set: {0}", acc_cv)
def test3(): (x, y) = DataModel.loadData("..\\train.csv") (x_train, x_cv, y_train, y_cv) = DataModel.splitData(x, y) x_sub = x_train[:20000,:] y_sub = y_train[:20000] s = SimpleNN2.NeuralNetConfig(784, 70, 10) regLambda = 6.84 #s = Train.trainGradientDescent(s, x_sub, y_sub, 5) th1, th2 = Train.trainSciPy2(s, x_sub, y_sub, regLambda) #th1, th2 = Train.trainGradientDescent2(s, x_sub, y_sub, 5) acc_cv = accuracy_score(y_cv, [SimpleNN2.predictClass(s, th1, th2, w) for w in x_cv]) print("Accuracy on CV set: {0}".format(acc_cv))
def trainFullAndSave(): (x, y) = DataModel.loadData("..\\train.csv") (x_train, x_cv, y_train, y_cv) = DataModel.splitData(x, y) s = SimpleNN2.NeuralNetConfig(784, 70, 10) regLambda = 6.84 print("Training neural network on full dataset") #s = Train.trainGradientDescent(s, x_sub, y_sub, 5) th1, th2 = Train.trainSciPy2(s, x_train, y_train, regLambda) #th1, th2 = Train.trainGradientDescent2(s, x_sub, y_sub, 5) print("Training complete, checking accuracy on CV data") acc_cv = accuracy_score(y_cv, [SimpleNN2.predictClass(s, th1, th2, w) for w in x_cv]) print("Accuracy on CV set: {0}".format(acc_cv)) SimpleNN2.saveNetwork(s, th1, th2, "..\\NeuralNetwork.bin")
#coding=utf8 import Train, MS, DB # 训练模型 # 第一个参数表示分类器,第二个参数表示训练集的大小,第三个参数表示至多选取n个关键词,第四个参数表示判定为关键词的最小阈值 Train.training(MS.MODEL_TreeClassifier, 0.70, 5, 0.6) # 从硬盘中获取训练好的分类器 # clf = Train.getCLF(MS.MODEL_TreeClassifier) # 输出id为x的至多前n个关键词 #第一个参数表示分类器,第二个参数表示最多几个关键词,第三个参数表示判定为关键词的最小阈值,第四个参数为id # Train.getTopProbability(clf, 5, 0.5, 106868) # 将所有文本的关键词输出到txt与mysql # Train.outputALlDianPingKeyWords(clf, 5, 0.6)
def train(): Train.initialise()
from scipy.optimize import check_grad import SimpleNN import Train mat = loadmat('..\\ex4weights.mat') s = SimpleNN.SimpleNN([400, 25, 10]) s.theta = [np.transpose(mat["Theta1"]), np.transpose(mat["Theta2"])] data = loadmat("..\\ex4data1.mat") x = data["X"][:500,:] y = data["y"][:500] #check_grad(func = lambda p: s.computeCost(s.combineTheta(p), x, y, 0.5), # grad = lambda p: s.computeGrad(s.combineTheta(p), x, y, 0.5), # x0 = s.combineTheta(s.theta)) #(cost, grad) = s.computeCostGrad(s.theta, x, y, 1) s = Train.trainSciPy(s, x, y, 0.05) predictions = [s.predictClass(w) for w in x] err_rate = np.mean([1 if pred != check else 0 for (pred, check) in zip(y, predictions)]) print("Error rate with pre-computed paramaters: {0}".format(err_rate))
import csv if __name__ == "__main__": #In the terminal, enter 'python Deploy.py filename modelname' filename = sys.argv[1] modelname = sys.argv[2] #Read new data set df = DP.read_file(filename) #Clean data processed_df = DP.clean_data(df) #Construct X and y, which stand for predictors and response variable X, y = Tr.construct_X_y(processed_df, 0.5) #Load trained model from Train.py f = open(modelname,'rb') mod = pickle.load(f) f.close() #Get predictions and evaluation metrics for new records y_pred = mod.predict(X.values[:,1:]) accu = (y.values == y_pred).mean() precision, recall, fscore, threshold = precision_recall_fscore_support(y.values,y_pred) potential_saving = Tr.possible_saving(y_pred, y.values, X.values) res_str = "Result: accuracy = {}, precision = {}, recall = {}, fscore = {}, num of TP = {}, PossibleMonthlyBilling = {}"\ .format(accu, precision[1], recall[1], fscore[1], len(potential_saving['ClientID']), sum(potential_saving['AvgMonthlyBilling'])) f = open('ChurnClient.csv', "wb")
def on_message(client, userdata, msg): print msg.topic,"->", str(msg.payload) o = json.loads(msg.payload) #debugging purpose print 'o=',o #playing with Train class print 'sensorID = ', o["SensorID"] tid = int(o["TrainID"]) #string type converted to int print 'trainID = ', tid liveness = False if o["Status"]=="On": liveness = True snid = int(o["SensorID"]) sNodeName = o["SensorName"] sNode = SensorNode.SensorNode(snid, sNodeName, liveness) #process sensors modules inside single sensorNode if sNodeName=="TempHum": # if the the sensor node is TempHum then it caries temp and hum sensor modules sensorModules = dict() sname = "temp" smeasurement = "farenheit" svalue = o["temp"] print 'temp=>', svalue #tempSensor = Sensor(sname, smeasurement, svalue) sensorModules[sname] = Sensor.Sensor(sname, smeasurement, svalue) sname = "hum" smeasurement = "%" svalue = o["hum"] #humSensor = Sensor(sname, smeasurement, svalue) sensorModules[sname] = Sensor.Sensor(sname, smeasurement, svalue) sNode.setCurrentStatus(liveness, sensorModules) print 'sensorModules:',sensorModules print 'sNode is set', sNode.sensors if tid in Trains: print "use existing TRAIN------------------------" Trains[tid].setSensorNodeStatus(liveness, sNode) else: print 'Create new TRAIN++++++++++++++++++++++++++++' Trains[tid] = Train.Train(tid) Trains[tid].setSensorNodeStatus(liveness, sNode) print 'Trains->',Trains # testing wrapSubwayTotalInfo function #jsondumps = wrapSubwayTotalInfo(Trains, bigTable, "15:03") #print 'jsondumps =>',jsondumps """ Temperarily off """ # mongoDB related operations userdata.insert(o) #results = userdata.find() #for record in results: # print 'record = ', record if len(remote_web_socket_clients)>0: for id in remote_web_socket_clients: remote_web_socket_clients[id].sendMessage(unicode(msg.payload)) print 'Message sent to WebSocketClients' """ """
mymap = pygmaps.maps(37.7016, -121.9003, 16) getSegments() getGates() totalDistance = 0 for gate in gateList: if gate.getGateStatStr() == 'open': mymap.addpoint(gate.getGateLat(),gate.getGateLon(), gate.getName(), utility.getColorCode("green")) else: mymap.addpoint(gate.getGateLat(),gate.getGateLon(),gate.getGatename(), utility.getColorCode("red")) print "Number of segments: " + str(len(segmentList)) for seg in segmentList: dBegin = utility.getTraveledDistance(trackCoordinates,seg.getStartPoint()) dEnd = utility.getTraveledDistance(trackCoordinates,seg.getEndPoint()) print "\tSegment: " + seg.getSegName() + " starts at " + str(dBegin/1000) + "km and ends in " + str(dEnd/1000) + "km ---> Segment length:" + str(seg.getSegmentLength()) initialPos1 = utility.getPointAtDistanceInPath(trackCoordinates,30) train1 = Train.train(initialPos1,0,2,1,segmentList,trackCoordinates,gateList) trainList.append(train1) initialPos2 = utility.getPointAtDistanceInPath(trackCoordinates,50) train2 = Train.train(initialPos2,0,2,2,segmentList,trackCoordinates,gateList) trainList.append(train2) stationComputer = StationComputer.StationComputer(trainList,segmentList,trackCoordinates,gateList) mymap.addTrain(initialPos1[0], initialPos1[1], 1) mymap.addTrain(initialPos2[0], initialPos2[1], 2) mymap.draw('./mymap.html') #train.nextSegmentsInRange(10) timer = simplegui.create_timer(100, update) timer.start()
import pickle as pk import argparse import os import random import time import warnings from Train import * parser = argparse.ArgumentParser() log_dir = 'logs-interf2.pk' parser.add_argument('--store_dir', default='test', help='dir for store result') parser.add_argument('--data_dir', default='../../Data/cifar-100-python/test-lv3', help='dir for test data') args = parser.parse_args() fo = open(log_dir, 'rb') logs = pk.load(fo) fo.close() acc = [] for i in range(len(logs)): for j in range(len(logs[i])): testbed = Train() acc.append(testbed.test4seq(logs[i][j], args.data_dir)) fo = open('seq_acc-interf2-lv3.pk', 'wb') pk.dump(acc, fo) fo.close()
#TODO : Preprocess positive = 0 negative = 0 #REAL TESTING for i in range(0,len(dataset)): dist = 0 for j in range(0,len(dataset[i])): dist += (dataset[i][j] - center[j])*(dataset[i][j] - center[j]) if dist<max_dist: positive += 1 else: negative += 1 accuracy = 0 if orig_label: accuracy = positive/(positive+negative) else: accuracy = negative/(negative+positive) return accuracy cen, maxd = Train.train(['Aakash\Aakash_1.csv'],1000000) # cen, maxd = Train.train(['Rahul\Ra_2.csv'],1000000) acc = test('Rahul\Ra_2.csv', False, cen, maxd,100000 ) # acc = test('Aakash\Aakash_2.csv', True, cen, maxd,100000 ) print(acc)
y_train_batch = batch_data['y_train_batch'] X_CV_batch = batch_data['X_CV_batch'] y_CV_batch = batch_data['y_CV_batch'] X_test_batch = batch_data['X_test_batch'] y_test_batch = batch_data['y_test_batch'] X_merge_train_batch = np.append(X_train_batch, X_CV_batch, axis = 0) y_merge_train_batch = np.append(y_train_batch, y_CV_batch, axis = 0) np.savez(Config.FILE_ZERO_RBM_PATH, train_data_output = X_test_batch) if __name__ == '__main__': #PreProcessData.pre_process_data(Config.FILE_ORIGIN_ZIP_DATA_PATH, Config.FILE_BATCH_DATA_PATH) read_batch_data() PreRBMTrain.pre_rbm_train() #Train.train(X_merge_train_batch, y_merge_train_batch, Config.FILE_TRAIN_PATH) Train.train(X_test_batch, y_test_batch, Config.FILE_TRAIN_PATH) #print ForwardPropagation.forward(X_merge_train_batch, y_merge_train_batch) print ForwardPropagation.forward(X_test_batch, y_test_batch)
if '-kern' in args: fullarg = args[args.index("-kern")+1] kerntype = fullarg[:fullarg.rfind('_')] print kerntype dist = float(fullarg[fullarg.rfind('_')+1:]) print dist else: kerntype = 'quartic' dist = 900000.0 except: print "Kernel Argument is not formmated correctly" print "it should be something like quartic_900000 or epanech_800000 (units must be meters)" print "run with -help for more options" sys.exit("Error") Train.train(f, rf_obs_in, rf_std_in, wu_y_direct, ulist, kerntype, lam, b_direct) if mode_arg.lower() == "morans_calc": import MCV1 print "Beginning Morans Calc Process" if '-tf' in args: f = args[args.index("-tf")+1] try: if '-kern' in args: fullarg = args[args.index("-kern")+1] kerntype = fullarg[:fullarg.rfind('_')] print kerntype dist = float(fullarg[fullarg.rfind('_')+1:])
#!/usr/bin/env python3.6 # -*- coding:utf-8 -*- """ __author__ = "YYF" __MTime__ = 18-11-26 上午11:22 """ import Nets import Train if __name__ == '__main__': net = Nets.PNet() Trainer = Train.Trainer(net, './param/pnet.pt', r'/home/lievi/celeba_gen/12') Trainer.train()