#corps = list(corps) #slm = statisLM(corps,100) #----------------------------------------------------------------------------------------------------------------------------------------------- #'''pick trained word vec''' import cPickle dirs = "C:\\Users\\Administrator.NBJXUEJUN-LI\\Desktop\\project\\Python\\NLP\\savedObject\\brownCorpus\\" slm = cPickle.load(open(dirs + "slm.pkl", "rb")) #----------------------------------------------------------------------------------------------------------------------------------------------- ''' hyper parameters ''' wordDim = 100 windowDim = 5 actfunc = 'tanh' outlayer = baseNeuronLayer(100, 1, actfunc='sigmoid') #hiddenlayer= baseNeuronLayer(100,100,actfunc='tanh') cnnlayer = recCnn(wordDim, windowDim, actfunc) learning = 0.1 l2 = 0.001 #----------------------------------------------------------------------------------------------------------------------------------------------- ''' training ''' pred = [] true = [] ers = 0 while 2 > 1: for minibatch in np.random.choice(len(slm.codeCorps), 1): corp = slm.codeCorps[minibatch]
if len(sent) > 4: sent = slm.getFakeContext(sent) hashcorp = [] sent = '#'.join(sent) sent = '&' + sent + '*' for ngram in range(2, maxNgram): for idx in range(len(sent) - ngram + 1): if sent[idx:idx + ngram] in nchar2code: hashcorp.append(nchar2code[sent[idx:idx + ngram]]) fakehashCorps.append(hashcorp) #----------------------------------------------------------------------------------------------------------------------------------------------- from sklearn.metrics import roc_auc_score hashVec = np.random.uniform(0, 1, size=(len(nchar2code), 100)) outlayer = baseNeuronLayer(100, 1, actfunc='sigmoid') hiddenlayer = baseNeuronLayer(100, 100, actfunc='tanh') learning = 0.05 l2 = 0.00001 while 2 > 1: for senidx in range(0, len(hashCorps)): tmpx = np.array([ np.sum(hashVec[hashCorps[senidx]], axis=0), np.sum(hashVec[fakehashCorps2[senidx]], axis=0) ]) tmpy = np.array([[1], [0]]) tmpindx = [hashCorps[senidx], fakehashCorps2[senidx]] if senidx == 0: y = tmpy hashProj = tmpx
from DeepLearning.CnnNeuron import word2vecCovLayer from NLP.statisticLanguageModel import statisLM import numpy as np #----------------------------------------------------------------------------------------------------------------------------------------------- corps = brown.sents(categories=None) corps = list(corps) slm = statisLM(corps, 50) #----------------------------------------------------------------------------------------------------------------------------------------------- window = 2 wordDim = 50 outDim = 50 outs = 1 hiddenFunc = 'tanh' outFunc = 'sigmoid' cnnlayer = word2vecCovLayer(window, wordDim, outDim, actfunc=hiddenFunc) outlayer = baseNeuronLayer(outDim, outs, actfunc=outFunc) #----------------------------------------------------------------------------------------------------------------------------------------------- '''if pickle from the save''' #import cPickle #dirs = "C:\\Users\\Administrator.NBJXUEJUN-LI\\Desktop\\project\\Python\\NLP\\savedObject\\CompCorpus\\" #slm = cPickle.load(open(dirs+"slm.pkl","rb")) #cnnlayerPara = cPickle.load(open(dirs+"cnnlayer.pkl","rb")) #outlayerPara = cPickle.load(open(dirs+"outlayer.pkl","rb")) #cnnlayer.W,cnnlayer.b = cnnlayerPara #outlayer.W,outlayer.b = outlayerPara #----------------------------------------------------------------------------------------------------------------------------------------------- ''' function ''' l2 = 0.001 learning = 0.1