Example #1
0
def trainSystem(directory, unigrams, bigrams, twd, ss, ssb, akw, flag):
	allText = ""
	for filename in os.listdir(directory):
		if filename.startswith(".") or filename.startswith("_"):
			continue
		# Open File and get the Text
		infile = open(directory + filename)
		filetext = infile.read()
		newfiletext = re.sub("_"," ",filetext)
		allText += re.sub("\n", ". ", newfiletext) + " "

		lines = filetext.split("\n")

		T.trainGrams(lines, unigrams, bigrams)
		T.trainStructures(lines, twd, ss, ssb)
	# Get TextRank Keywords
	if flag:
		keywords = tr.main(allText)
		for key in keywords:
			if " " not in key.encode('ascii', 'ignore'):
				akw.append(key.encode('ascii', 'ignore'))

		'''# Uncomment to use for making a most important words file
		for word in akw:
			print word
		exit(1)'''
	# Get Conditional Probabilities
	sentenceStructureProbs = CP.createSentenceProbs(ss, ssb)
	wordConditionalProbs = CP.getProbabilities(unigrams, bigrams) 
	CP.sortConditionalProbs(wordConditionalProbs)

	return wordConditionalProbs, sentenceStructureProbs, akw
Example #2
0
def main():
	# Seed Sentence
	seed = sys.argv[1]
	# Directory that the lyrics are in
	directory = sys.argv[2]
	# Path to most important words file, NONE if you don't have one
	keywordsPath = sys.argv[3]
	# Optional
	# Key = Word Unigram
	# Value = Word Unigram Frequency
	unigrams = {}
	# Key = Word Bigram
	# Value = Word Bigram Frequency
	bigrams = {}
	# Key: Word Type, t
	# Value: Set() of words of type t
	typeWordDict = {} 
	# Key: Structure of a line (ex: "PRP VBP NN")
	# Value: Frequency of the key
	sentenceStructures = {}
	# Key: Structure of 2 consecutive lines separated by a comma 
	# 	- (ex: "PRP VBP NN,NNP IN DT NN")
	# Value: Frequency of the key
	sentenceStrctureBigram = {}
	# Key = Word Unigram
	# Value = Priority Queue Q (Highest Probability at the top)
	# 	- Get Highest Probability: wordConditionalProbs[key].get().word
	wordConditionalProbs = {}
	# Key = Sentence Structure
	# Value = Priority Queue Q (Highest Probability at the top)
	# 	- Get Highest Probability: sentenceStructureProbs[key].get().word
	sentenceStructureProbs = {}
	# List of most important words
	keywords = []
	# Key = Part of speech tag
	# Value = Most frequent unigram of that POS
	tagToWord = {}
	# Key = Part of speech tag
	# Value = Most important unigram of that POS
	tagToWordImp = {}


	flag = True if keywordsPath == "NONE" else False
	# Train on the data
	wordConditionalProbs, sentenceStructureProbs, keywords = trainSystem(directory, unigrams, bigrams, typeWordDict, sentenceStructures, sentenceStrctureBigram, keywords, flag)
	tagToWord = T.findMostPopPOS(typeWordDict, unigrams)	
	if not flag:
		keywords = T.getListFromFile(keywordsPath)
		tagToWordImp = T.getKeywordsForTag(keywords, tagToWord)

	# Generate the song
	generateSong(seed, wordConditionalProbs, sentenceStructureProbs, typeWordDict, tagToWord, tagToWordImp)
def findBestRegularization(s, x_sub, y_sub):
    regs = np.linspace(0, 10, 20)
    reg_acc_cv = []
    reg_acc_train = []
    max_acc = 0
    best_reg = 0

    for r in regs:
        th1, th2 = Train.trainSciPy2(s, x_sub, y_sub, r)

        acc_cv = accuracy_score(y_cv, [SimpleNN2.predictClass(s, th1, th2, w) for w in x_cv])
        acc_train = accuracy_score(y_sub, [SimpleNN2.predictClass(s, th1, th2, w) for w in x_sub])
        reg_acc_cv.append(acc_cv)
        reg_acc_train.append(acc_train)

        if max_acc < acc_cv:
            max_acc = acc_cv
            best_reg = r


        print("Validating regularization parameter [{0}]; Train accuracy: [{1}] CV accuracy: [{2}]"
              .format(r, acc_train, acc_cv))

    print("Best reg param: {0} with accuracy on CV dataset: {1}".format(best_reg, max_acc))

    plt.plot(regs, reg_acc_cv);plt.plot(regs, reg_acc_train)
    plt.show()

    return best_reg
def test1():
    (x, y) = DataModel.loadData("..\\train.csv")

    (x_train, x_cv, y_train, y_cv) = DataModel.splitData(x, y)

    x_sub = x_train[:500,:]
    y_sub = y_train[:500]

    s = SimpleNN.SimpleNN([784, 70, 10])

    #s = Train.trainGradientDescent(s, x_sub, y_sub, 5)
    s = Train.trainSciPy(s, x_sub, y_sub, 5)
    acc_cv = accuracy_score(y_cv, [s.predictClass(w) for w in x_cv])
    print("Accuracy on CV set: {0}", acc_cv)
def test3():
    (x, y) = DataModel.loadData("..\\train.csv")

    (x_train, x_cv, y_train, y_cv) = DataModel.splitData(x, y)

    x_sub = x_train[:20000,:]
    y_sub = y_train[:20000]

    s = SimpleNN2.NeuralNetConfig(784, 70, 10)

    regLambda = 6.84
    #s = Train.trainGradientDescent(s, x_sub, y_sub, 5)
    th1, th2 = Train.trainSciPy2(s, x_sub, y_sub, regLambda)
    #th1, th2 = Train.trainGradientDescent2(s, x_sub, y_sub, 5)

    acc_cv = accuracy_score(y_cv, [SimpleNN2.predictClass(s, th1, th2, w) for w in x_cv])
    print("Accuracy on CV set: {0}".format(acc_cv))
def trainFullAndSave():
    (x, y) = DataModel.loadData("..\\train.csv")

    (x_train, x_cv, y_train, y_cv) = DataModel.splitData(x, y)

    s = SimpleNN2.NeuralNetConfig(784, 70, 10)

    regLambda = 6.84
    
    print("Training neural network on full dataset")
    #s = Train.trainGradientDescent(s, x_sub, y_sub, 5)
    th1, th2 = Train.trainSciPy2(s, x_train, y_train, regLambda)
    #th1, th2 = Train.trainGradientDescent2(s, x_sub, y_sub, 5)

    print("Training complete, checking accuracy on CV data")

    acc_cv = accuracy_score(y_cv, [SimpleNN2.predictClass(s, th1, th2, w) for w in x_cv])
    print("Accuracy on CV set: {0}".format(acc_cv))

    SimpleNN2.saveNetwork(s, th1, th2, "..\\NeuralNetwork.bin")
Example #7
0
#coding=utf8
import Train, MS, DB

# 训练模型
# 第一个参数表示分类器,第二个参数表示训练集的大小,第三个参数表示至多选取n个关键词,第四个参数表示判定为关键词的最小阈值
Train.training(MS.MODEL_TreeClassifier, 0.70, 5, 0.6)

# 从硬盘中获取训练好的分类器
# clf = Train.getCLF(MS.MODEL_TreeClassifier)

# 输出id为x的至多前n个关键词
#第一个参数表示分类器,第二个参数表示最多几个关键词,第三个参数表示判定为关键词的最小阈值,第四个参数为id
# Train.getTopProbability(clf, 5, 0.5, 106868)

# 将所有文本的关键词输出到txt与mysql
# Train.outputALlDianPingKeyWords(clf, 5, 0.6)
Example #8
0
def train():
    Train.initialise()
Example #9
0
from scipy.optimize import check_grad

import SimpleNN
import Train

mat = loadmat('..\\ex4weights.mat')

s = SimpleNN.SimpleNN([400, 25, 10])

s.theta = [np.transpose(mat["Theta1"]), np.transpose(mat["Theta2"])]

data = loadmat("..\\ex4data1.mat")

x = data["X"][:500,:]
y = data["y"][:500]

#check_grad(func = lambda p: s.computeCost(s.combineTheta(p), x, y, 0.5),
#                grad = lambda p: s.computeGrad(s.combineTheta(p), x, y, 0.5),
#                x0 = s.combineTheta(s.theta))

#(cost, grad) = s.computeCostGrad(s.theta, x, y, 1)

s = Train.trainSciPy(s, x, y, 0.05)

predictions = [s.predictClass(w) for w in x]

err_rate = np.mean([1 if pred != check else 0 for (pred, check) in zip(y, predictions)])

print("Error rate with pre-computed paramaters: {0}".format(err_rate))

Example #10
0
import csv

if __name__ == "__main__":
    
    #In the terminal, enter 'python Deploy.py filename modelname'
    filename = sys.argv[1]
    modelname = sys.argv[2]
    
    #Read new data set
    df = DP.read_file(filename)
    
    #Clean data
    processed_df = DP.clean_data(df)
    
    #Construct X and y, which stand for predictors and response variable 
    X, y = Tr.construct_X_y(processed_df, 0.5)
    
    #Load trained model from Train.py
    f = open(modelname,'rb')
    mod = pickle.load(f)
    f.close()
    
    #Get predictions and evaluation metrics for new records
    y_pred = mod.predict(X.values[:,1:])
    accu = (y.values == y_pred).mean()
    precision, recall, fscore, threshold = precision_recall_fscore_support(y.values,y_pred)
    potential_saving = Tr.possible_saving(y_pred, y.values, X.values)
    res_str = "Result: accuracy = {}, precision = {}, recall = {}, fscore = {}, num of TP = {}, PossibleMonthlyBilling = {}"\
        .format(accu, precision[1], recall[1], fscore[1], len(potential_saving['ClientID']), sum(potential_saving['AvgMonthlyBilling']))
    
    f = open('ChurnClient.csv', "wb")
def on_message(client, userdata, msg):
   print msg.topic,"->", str(msg.payload)
   o = json.loads(msg.payload)


   #debugging purpose
   print 'o=',o

   #playing with Train class
   print 'sensorID = ', o["SensorID"]
   tid = int(o["TrainID"]) #string type converted to int
   print 'trainID = ', tid

   liveness = False
   if o["Status"]=="On":
      liveness = True

   snid = int(o["SensorID"])
   sNodeName = o["SensorName"]
   sNode = SensorNode.SensorNode(snid, sNodeName, liveness)

   #process sensors modules inside single sensorNode
   if sNodeName=="TempHum": # if the the sensor node is TempHum then it caries temp and hum sensor modules

      sensorModules = dict()

      sname = "temp"
      smeasurement = "farenheit"
      svalue = o["temp"]
      print 'temp=>', svalue
      #tempSensor = Sensor(sname, smeasurement, svalue)
      sensorModules[sname] =  Sensor.Sensor(sname, smeasurement, svalue)

      sname = "hum"
      smeasurement = "%"
      svalue = o["hum"]
      #humSensor = Sensor(sname, smeasurement, svalue)
      sensorModules[sname] = Sensor.Sensor(sname, smeasurement, svalue)

      sNode.setCurrentStatus(liveness, sensorModules)
      print 'sensorModules:',sensorModules
      print 'sNode is set', sNode.sensors
      

   if tid in Trains:
      print "use existing TRAIN------------------------"
      Trains[tid].setSensorNodeStatus(liveness, sNode)
   else:
      print 'Create new TRAIN++++++++++++++++++++++++++++'
      Trains[tid] = Train.Train(tid)
      Trains[tid].setSensorNodeStatus(liveness, sNode)

   print 'Trains->',Trains


   # testing wrapSubwayTotalInfo function
   #jsondumps = wrapSubwayTotalInfo(Trains, bigTable, "15:03")
   #print 'jsondumps =>',jsondumps

   """ Temperarily off """

   # mongoDB related operations
   userdata.insert(o)
   #results = userdata.find()
   #for record in results:
   #   print 'record = ', record

   if len(remote_web_socket_clients)>0:
      for id in remote_web_socket_clients:
         remote_web_socket_clients[id].sendMessage(unicode(msg.payload))
      print 'Message sent to WebSocketClients'
   """ """
mymap = pygmaps.maps(37.7016, -121.9003, 16)
getSegments()
getGates()
totalDistance = 0
for gate in gateList:
    if gate.getGateStatStr() == 'open':
        mymap.addpoint(gate.getGateLat(),gate.getGateLon(), gate.getName(), utility.getColorCode("green"))
    else:
        mymap.addpoint(gate.getGateLat(),gate.getGateLon(),gate.getGatename(), utility.getColorCode("red"))        
print "Number of segments: " + str(len(segmentList))
for seg in segmentList:
    dBegin = utility.getTraveledDistance(trackCoordinates,seg.getStartPoint())
    dEnd = utility.getTraveledDistance(trackCoordinates,seg.getEndPoint())
    print "\tSegment: " + seg.getSegName() + " starts at " + str(dBegin/1000) + "km and ends in " + str(dEnd/1000) + "km ---> Segment length:" + str(seg.getSegmentLength()) 
initialPos1 = utility.getPointAtDistanceInPath(trackCoordinates,30)
train1 = Train.train(initialPos1,0,2,1,segmentList,trackCoordinates,gateList) 
trainList.append(train1)
initialPos2 = utility.getPointAtDistanceInPath(trackCoordinates,50)
train2 = Train.train(initialPos2,0,2,2,segmentList,trackCoordinates,gateList) 
trainList.append(train2)

stationComputer = StationComputer.StationComputer(trainList,segmentList,trackCoordinates,gateList)
mymap.addTrain(initialPos1[0], initialPos1[1], 1)
mymap.addTrain(initialPos2[0], initialPos2[1], 2)
mymap.draw('./mymap.html')
#train.nextSegmentsInRange(10)

timer = simplegui.create_timer(100, update)
timer.start()

Example #13
0
import pickle as pk
import argparse
import os
import random
import time
import warnings

from Train import *

parser = argparse.ArgumentParser()

log_dir = 'logs-interf2.pk'
parser.add_argument('--store_dir', default='test', help='dir for store result')
parser.add_argument('--data_dir',
                    default='../../Data/cifar-100-python/test-lv3',
                    help='dir for test data')
args = parser.parse_args()

fo = open(log_dir, 'rb')
logs = pk.load(fo)
fo.close()
acc = []
for i in range(len(logs)):
    for j in range(len(logs[i])):
        testbed = Train()
        acc.append(testbed.test4seq(logs[i][j], args.data_dir))

fo = open('seq_acc-interf2-lv3.pk', 'wb')
pk.dump(acc, fo)
fo.close()
    #TODO : Preprocess

    positive = 0
    negative = 0

        #REAL TESTING
    for i in range(0,len(dataset)):
        dist = 0
        for j in range(0,len(dataset[i])):
            dist += (dataset[i][j] - center[j])*(dataset[i][j] - center[j])
        if dist<max_dist:
            positive += 1
        else:
            negative += 1

    accuracy = 0

    if orig_label:
        accuracy = positive/(positive+negative)
    else:
        accuracy = negative/(negative+positive)

    return accuracy

cen, maxd = Train.train(['Aakash\Aakash_1.csv'],1000000)
# cen, maxd = Train.train(['Rahul\Ra_2.csv'],1000000)

acc = test('Rahul\Ra_2.csv', False, cen, maxd,100000 )

# acc = test('Aakash\Aakash_2.csv', True, cen, maxd,100000 )
print(acc)
Example #15
0
    y_train_batch = batch_data['y_train_batch']

    X_CV_batch = batch_data['X_CV_batch']
    y_CV_batch = batch_data['y_CV_batch']
    
    X_test_batch = batch_data['X_test_batch']
    y_test_batch = batch_data['y_test_batch']
    
    X_merge_train_batch = np.append(X_train_batch, X_CV_batch, axis = 0)
    y_merge_train_batch = np.append(y_train_batch, y_CV_batch, axis = 0)
    
    np.savez(Config.FILE_ZERO_RBM_PATH, train_data_output = X_test_batch)
    
if __name__ == '__main__':
    
    #PreProcessData.pre_process_data(Config.FILE_ORIGIN_ZIP_DATA_PATH, Config.FILE_BATCH_DATA_PATH)
    
    read_batch_data()
    
    PreRBMTrain.pre_rbm_train()

    #Train.train(X_merge_train_batch, y_merge_train_batch, Config.FILE_TRAIN_PATH)
    Train.train(X_test_batch, y_test_batch, Config.FILE_TRAIN_PATH)
    
    #print ForwardPropagation.forward(X_merge_train_batch, y_merge_train_batch)

    print ForwardPropagation.forward(X_test_batch, y_test_batch)
    
    
    
    
Example #16
0
            if '-kern' in args:
                fullarg = args[args.index("-kern")+1]
                kerntype = fullarg[:fullarg.rfind('_')]
                print kerntype
                dist = float(fullarg[fullarg.rfind('_')+1:])
                print dist
            else:
                kerntype = 'quartic'
                dist = 900000.0
        except:
            print "Kernel Argument is not formmated correctly"
            print "it should be something like quartic_900000 or epanech_800000 (units must be meters)"
            print "run with -help for more options"
            sys.exit("Error")

        Train.train(f, rf_obs_in, rf_std_in, wu_y_direct, ulist, kerntype, lam, b_direct)

    if mode_arg.lower() == "morans_calc":
        import MCV1

        print "Beginning Morans Calc Process"

        if '-tf' in args:
            f = args[args.index("-tf")+1]

        try:
            if '-kern' in args:
                fullarg = args[args.index("-kern")+1]
                kerntype = fullarg[:fullarg.rfind('_')]
                print kerntype
                dist = float(fullarg[fullarg.rfind('_')+1:])
Example #17
0
#!/usr/bin/env python3.6
# -*- coding:utf-8 -*-
""" __author__ = "YYF" 
    __MTime__ = 18-11-26 上午11:22
"""
import Nets
import Train

if __name__ == '__main__':
    net = Nets.PNet()

    Trainer = Train.Trainer(net, './param/pnet.pt',
                            r'/home/lievi/celeba_gen/12')
    Trainer.train()