def trainmodel(): fvocab = "data/sample/vocab.pickle.gz" fdata = "data/sample/trn.data" flabel = "data/sample/trn.label" D = load(gzip.open(fvocab)) vocab, labelidxmap = D['vocab'], D['labelidxmap'] print 'len(vocab) = {}'.format(len(vocab)) data = Data() trnM, trnL = data.loadmatrix(fdata, flabel) print 'trnM.shape = {}'.format(trnM.shape) idxlabelmap = reversedict(labelidxmap) pm = ParsingModel(vocab=vocab, idxlabelmap=idxlabelmap) pm.train(trnM, trnL) pm.savemodel("model/parsing-model.pickle.gz")
def trainmodel(): fvocab = "data/sample/vocab.pickle.gz" fdata = "data/sample/trn.data" flabel = "data/sample/trn.label" D = load(gzip.open(fvocab)) vocab, labelidxmap = D["vocab"], D["labelidxmap"] print "len(vocab) = {}".format(len(vocab)) data = Data() trnM, trnL = data.loadmatrix(fdata, flabel) print "trnM.shape = {}".format(trnM.shape) idxlabelmap = reversedict(labelidxmap) pm = ParsingModel(vocab=vocab, idxlabelmap=idxlabelmap) pm.train(trnM, trnL) pm.savemodel("model/parsing-model.pickle.gz")
def createtrndata(path="data/training/", topn=10000, bcvocab=None): data = Data( bcvocab=bcvocab, withdp=WITHDP, fdpvocab="data/resources/word-dict.pickle.gz", fprojmat="data/resources/projmat.pickle.gz", ) data.builddata(path) data.buildvocab(topn=topn) data.buildmatrix() fdata = "data/sample/trn.data" flabel = "data/sample/trn.label" data.savematrix(fdata, flabel) data.savevocab("data/sample/vocab.pickle.gz")
os.mkdir(logPath) if not os.path.exists(paramSavePath): os.mkdir(paramSavePath) # init config for tools tf.set_random_seed(seed) logger = set_logger(logPath, timestr, os.path.basename(__file__)) # np.set_printoptions(formatter={'float': '{: 0.3f}'.format}) # Basic setup finishes here --------------------------------------------------------------------------------| # Graph building begins here -------------------------------------------------------------------------------| data = Data(dataPath=dataPath, savePath=savePath, paramSavePath=paramSavePath, logPath=logPath, debug=debug, split_percent=split_percent, batch_size=batch_size, timestr=timestr, timestep=timestep, window=window) vocab_size = data.vocabSize z = tf.placeholder(tf.float32, [batch_size, input_dim * len(window)]) x = tf.placeholder(tf.int32, [batch_size, timestep + 2 * (max(window) - 1)]) gen = Generator(timestep=timestep, window=window, batch_size=batch_size, vocab_size=vocab_size, paramSavePath=paramSavePath, logPath=logPath, input_dim=input_dim,
def createtrndata(path="data/training/", topn=10000, bcvocab=None): data = Data(bcvocab=bcvocab, withdp=WITHDP, fdpvocab="data/resources/word-dict.pickle.gz", fprojmat="data/resources/projmat.pickle.gz") data.builddata(path) data.buildvocab(topn=topn) data.buildmatrix() fdata = "data/sample/trn.data" flabel = "data/sample/trn.label" data.savematrix(fdata, flabel) data.savevocab("data/sample/vocab.pickle.gz")
'yelp13-test': '../dataset/yelp-2013-seg-20-20.test.ss', 'yelp13-train': '../dataset/yelp-2013-seg-20-20.train.ss', 'yelp13-w2vec': '../WordEmbedding/yelp-2013-embedding-200d.txt', 'yelp14-dev': '../dataset/yelp-2014-seg-20-20.dev.ss', 'yelp14-test': '../dataset/yelp-2014-seg-20-20.test.ss', 'yelp14-train': '../dataset/yelp-2014-seg-20-20.train.ss', 'yelp14-w2vec': '../WordEmbedding/yelp-2014-embedding-200d.txt' } max_doc_len = 40 max_sen_len = 50 learning_rate = 0.005 hidden_size = 100 batch_size = 100 train_data = Data(path['yelp13-train']) test_data = Data(path['yelp13-test']) dev_data = Data(path['yelp13-dev']) all_doc = np.concatenate( [train_data.t_docs, test_data.t_docs, dev_data.t_docs]) embedding_file, words_dict = load_word_embedding(path['yelp13-w2vec'], all_doc) u_dict, p_dict = train_data.usr_prd_dict() huapa = HUAPA(embedding_file, hidden_size, max_doc_len, max_sen_len, batch_size, len(u_dict), len(p_dict), 5) train_X, sen_len, doc_len, outlier_index = transform(words_dict, train_data.t_docs, max_doc_len, max_sen_len) u, p = train_data.usr_prd(u_dict, p_dict)
accuracies.append(accuracy) if verboes: print("epochs %d : %.2f" % (epoch_index + 1, accuracy * 100) + '%') return accuracies if __name__ == "__main__": from code.data import Data def reluVar(a): return lambda x: tf.maximum(x * a, tf.constant(0.0, dtype=tf.float64)) def transSigmoid(down): return lambda x: tf.nn.sigmoid(x) - tf.constant(down, dtype=tf.float64) dtype = tf.float64 x = tf.placeholder(dtype, [None, 784]) layer01 = FullyConnectedLayer(784, 100, transSigmoid(0.5), x, dtype) layer12 = FullyConnectedLayer(100, 10, tf.nn.softmax, layer01.var_out, dtype) def cross_entropy(y, y_): return tf.reduce_mean( -tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1])) net = Network([layer01, layer12], cross_entropy) mnist = Data().loadmnistdata() net.runmnist(100, 20000, 0.5, mnist)