Beispiel #1
0
def trainmodel():
    fvocab = "data/sample/vocab.pickle.gz"
    fdata = "data/sample/trn.data"
    flabel = "data/sample/trn.label"
    D = load(gzip.open(fvocab))
    vocab, labelidxmap = D['vocab'], D['labelidxmap']
    print 'len(vocab) = {}'.format(len(vocab))
    data = Data()
    trnM, trnL = data.loadmatrix(fdata, flabel)
    print 'trnM.shape = {}'.format(trnM.shape)
    idxlabelmap = reversedict(labelidxmap)
    pm = ParsingModel(vocab=vocab, idxlabelmap=idxlabelmap)
    pm.train(trnM, trnL)
    pm.savemodel("model/parsing-model.pickle.gz")
Beispiel #2
0
def trainmodel():
    fvocab = "data/sample/vocab.pickle.gz"
    fdata = "data/sample/trn.data"
    flabel = "data/sample/trn.label"
    D = load(gzip.open(fvocab))
    vocab, labelidxmap = D["vocab"], D["labelidxmap"]
    print "len(vocab) = {}".format(len(vocab))
    data = Data()
    trnM, trnL = data.loadmatrix(fdata, flabel)
    print "trnM.shape = {}".format(trnM.shape)
    idxlabelmap = reversedict(labelidxmap)
    pm = ParsingModel(vocab=vocab, idxlabelmap=idxlabelmap)
    pm.train(trnM, trnL)
    pm.savemodel("model/parsing-model.pickle.gz")
Beispiel #3
0
def createtrndata(path="data/training/", topn=10000, bcvocab=None):
    data = Data(
        bcvocab=bcvocab,
        withdp=WITHDP,
        fdpvocab="data/resources/word-dict.pickle.gz",
        fprojmat="data/resources/projmat.pickle.gz",
    )
    data.builddata(path)
    data.buildvocab(topn=topn)
    data.buildmatrix()
    fdata = "data/sample/trn.data"
    flabel = "data/sample/trn.label"
    data.savematrix(fdata, flabel)
    data.savevocab("data/sample/vocab.pickle.gz")
    os.mkdir(logPath)
if not os.path.exists(paramSavePath):
    os.mkdir(paramSavePath)

# init config for tools
tf.set_random_seed(seed)
logger = set_logger(logPath, timestr, os.path.basename(__file__))
# np.set_printoptions(formatter={'float': '{: 0.3f}'.format})

# Basic setup finishes here --------------------------------------------------------------------------------|
# Graph building begins here -------------------------------------------------------------------------------|
data = Data(dataPath=dataPath,
            savePath=savePath,
            paramSavePath=paramSavePath,
            logPath=logPath,
            debug=debug,
            split_percent=split_percent,
            batch_size=batch_size,
            timestr=timestr,
            timestep=timestep,
            window=window)
vocab_size = data.vocabSize

z = tf.placeholder(tf.float32, [batch_size, input_dim * len(window)])
x = tf.placeholder(tf.int32, [batch_size, timestep + 2 * (max(window) - 1)])
gen = Generator(timestep=timestep,
                window=window,
                batch_size=batch_size,
                vocab_size=vocab_size,
                paramSavePath=paramSavePath,
                logPath=logPath,
                input_dim=input_dim,
Beispiel #5
0
def createtrndata(path="data/training/", topn=10000, bcvocab=None):
    data = Data(bcvocab=bcvocab,
                withdp=WITHDP,
                fdpvocab="data/resources/word-dict.pickle.gz",
                fprojmat="data/resources/projmat.pickle.gz")
    data.builddata(path)
    data.buildvocab(topn=topn)
    data.buildmatrix()
    fdata = "data/sample/trn.data"
    flabel = "data/sample/trn.label"
    data.savematrix(fdata, flabel)
    data.savevocab("data/sample/vocab.pickle.gz")
Beispiel #6
0
    'yelp13-test': '../dataset/yelp-2013-seg-20-20.test.ss',
    'yelp13-train': '../dataset/yelp-2013-seg-20-20.train.ss',
    'yelp13-w2vec': '../WordEmbedding/yelp-2013-embedding-200d.txt',
    'yelp14-dev': '../dataset/yelp-2014-seg-20-20.dev.ss',
    'yelp14-test': '../dataset/yelp-2014-seg-20-20.test.ss',
    'yelp14-train': '../dataset/yelp-2014-seg-20-20.train.ss',
    'yelp14-w2vec': '../WordEmbedding/yelp-2014-embedding-200d.txt'
}

max_doc_len = 40
max_sen_len = 50
learning_rate = 0.005
hidden_size = 100
batch_size = 100

train_data = Data(path['yelp13-train'])
test_data = Data(path['yelp13-test'])
dev_data = Data(path['yelp13-dev'])

all_doc = np.concatenate(
    [train_data.t_docs, test_data.t_docs, dev_data.t_docs])
embedding_file, words_dict = load_word_embedding(path['yelp13-w2vec'], all_doc)

u_dict, p_dict = train_data.usr_prd_dict()

huapa = HUAPA(embedding_file, hidden_size, max_doc_len, max_sen_len,
              batch_size, len(u_dict), len(p_dict), 5)
train_X, sen_len, doc_len, outlier_index = transform(words_dict,
                                                     train_data.t_docs,
                                                     max_doc_len, max_sen_len)
u, p = train_data.usr_prd(u_dict, p_dict)
Beispiel #7
0
                    accuracies.append(accuracy)
                    if verboes:
                        print("epochs %d : %.2f" %
                              (epoch_index + 1, accuracy * 100) + '%')

            return accuracies


if __name__ == "__main__":
    from code.data import Data

    def reluVar(a):
        return lambda x: tf.maximum(x * a, tf.constant(0.0, dtype=tf.float64))

    def transSigmoid(down):
        return lambda x: tf.nn.sigmoid(x) - tf.constant(down, dtype=tf.float64)

    dtype = tf.float64
    x = tf.placeholder(dtype, [None, 784])
    layer01 = FullyConnectedLayer(784, 100, transSigmoid(0.5), x, dtype)
    layer12 = FullyConnectedLayer(100, 10, tf.nn.softmax, layer01.var_out,
                                  dtype)

    def cross_entropy(y, y_):
        return tf.reduce_mean(
            -tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))

    net = Network([layer01, layer12], cross_entropy)
    mnist = Data().loadmnistdata()
    net.runmnist(100, 20000, 0.5, mnist)