예제 #1
0
def train():
    filenames = tf.placeholder(tf.string, shape=[None])
    training_filenames = ["./train.records"]
    validation_filenames = ["./train.records"]
    iterator = load_data.read_dataset(filenames, img_height, img_width, BATCH_SIZE)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        sess.run(iterator.initializer, feed_dict={filenames: training_filenames})
        tra_img, tra_label = iterator.get_next()
        print(type(tra_img))
        try:
            for step in range(MAX_STEP):
                tra_img1, tra_label1 = sess.run([tra_img, tra_label])
                for j in range(BATCH_SIZE):
                    print(step, tra_label1[j])
                    print(type(tra_label1))
                    print("-----------------------")
                    plt.imshow(tra_img1[j, :, :, :])
                    plt.show()
        except tf.errors.OutOfRangeError:
            print('done!')
예제 #2
0
tf.flags.DEFINE_string("data_dir", "data/data.dat", "data directory")
tf.flags.DEFINE_integer("vocab_size", 46960, "vocabulary size")
tf.flags.DEFINE_integer("num_classes", 5, "number of classes")
tf.flags.DEFINE_integer("embedding_size", 200, "Dimensionality of character embedding (default: 200)")
tf.flags.DEFINE_integer("hidden_size", 50, "Dimensionality of GRU hidden layer (default: 50)")
tf.flags.DEFINE_integer("batch_size", 32, "Batch Size (default: 64)")
tf.flags.DEFINE_integer("num_epochs", 10, "Number of training epochs (default: 50)")
tf.flags.DEFINE_integer("checkpoint_every", 100, "Save model after this many steps (default: 100)")
tf.flags.DEFINE_integer("num_checkpoints", 5, "Number of checkpoints to store (default: 5)")
tf.flags.DEFINE_integer("evaluate_every", 100, "evaluate every this many batches")
tf.flags.DEFINE_float("learning_rate", 0.01, "learning rate")
tf.flags.DEFINE_float("grad_clip", 5, "grad clip to prevent gradient explode")

FLAGS = tf.flags.FLAGS

train_x, train_y, dev_x, dev_y = read_dataset()
print "data load finished"

with tf.Session() as sess:
    han = model.HAN(vocab_size=FLAGS.vocab_size,
                    num_classes=FLAGS.num_classes,
                    embedding_size=FLAGS.embedding_size,
                    hidden_size=FLAGS.hidden_size)

    with tf.name_scope('loss'):
        loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=han.input_y,
                                                                      logits=han.out,
                                                                      name='loss'))
    with tf.name_scope('accuracy'):
        predict = tf.argmax(han.out, axis=1, name='predict')
        label = tf.argmax(han.input_y, axis=1, name='label')
예제 #3
0
# https://blog.csdn.net/u012052268/article/details/79560768
# coding:utf-8

import os
import sys
from sklearn import feature_extraction
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.feature_extraction.text import CountVectorizer
from load_data import read_dataset
from load_data import cut_dataset
from operator import itemgetter, attrgetter
import xlsxwriter

if __name__ == "__main__":
    cut_dataset('./data/filedata1.pickle')
    train, test = read_dataset('./data/filedata11.pickle')

    workbook = xlsxwriter.Workbook(
        './output/tfidf_output.xlsx'
    )  #打开一个xlsx文件(如果打开的文件存在 ,则清空该文件,如果文件不存在,则新建)
    worksheet = workbook.add_worksheet(
    )  #新建一个Sheet(名字缺省的话,默认从Sheet1开始,可以添加自己的sheet名字
    bold = workbook.add_format({'bold': True})
    format = workbook.add_format({'text_wrap': True})
    worksheet.write('A1', 'word', bold)
    worksheet.write('B1', 'weight', bold)
    worksheet.set_column('A:B', 20)

    data_x = []
    for i, sent in enumerate(train):
        doc = []
예제 #4
0
from train_ens import *

#set random seeds
random.seed(2)
np.random.seed(2)

NUM_CLASSIFIERS = 10
MAX_LABELS = 20
# TOTAL_CLASSES=100
# NUM_TRAINING_SAMPLES=50*1000

from load_data import read_dataset, create_reverse_dict, data_statistics

dataset = read_dataset("mediamill")
metadata = dataset["metadata"]

num_points = metadata["num_points"]
num_features = metadata["num_features"]
num_labels = metadata["num_labels"]

#create training set
allX = dataset["points"]
allY = dataset["vector_labels"]
tr_split = dataset["train_splits"][0]

trainX = allX[tr_split]
trainY = allY[tr_split]
all_labels = [dataset["sparse_labels"][i] for i in tr_split]

reverse_dict = create_reverse_dict(all_labels)
statistics = data_statistics(all_labels, num_labels)
예제 #5
0
파일: train.py 프로젝트: ChingChingYa/HCAN
# PMF parameter
ratio = 0.8
lambda_U = 0.01
lambda_V = 0.01
latent_size = 6
learning_rate = 3e-5  # 3e-5
iterations = 1000

lambda_value_list = []
lambda_value_list.append([0.01, 0.01])

if __name__ == "__main__":

    alldata = load_data('./data/prouduct_rating_data_1.pickle')
    train, test = read_dataset('./data/prouduct_rating_data_11.pickle')
    num_users = cut_data_len(alldata, 'reviewerID')
    num_items = cut_data_len(alldata, 'asin')

    fp = open("log.txt", "a")
    fp.write("dataset:" + "Musical_Instruments_5" + "\n")
    fp.write("ratio:" + str(ratio) + "\n")
    fp.write("latent_factor:" + str(latent_size) + "\n")
    fp.write("learning_rate:" + str(learning_rate) + "\n")

    for lambda_value in lambda_value_list:
        lambda_U = lambda_value[0]
        lambda_V = lambda_value[1]
        # initialization
        pmf_model = PMF(U=None,
                        V=None,
예제 #6
0
# test code
import torch
import random
from collections import defaultdict
from tqdm import tqdm

from load_data import get_HuffmanCodePath, read_dataset
from model import HierSoft_CBOW

nodes, hcodes, hpath = get_HuffmanCodePath('ptb')
#print('All Tree nodes is %d'%nodes[0])

w2i = defaultdict(lambda: len(w2i))

train = list(read_dataset(w2i, 'ptb'))[:2]
i2w = {v: k for k, v in w2i.items()}

nwords = len(i2w)

EMB_SIZE = 20
ITERS = 10
WIN_SIZE = 2

model = HierSoft_CBOW(nwords, EMB_SIZE, nodes[0] + 1)
opt = torch.optim.Adam(model.parameters(), lr=1e-4)

data_type = torch.LongTensor
use_cuda = torch.cuda.is_available()

if use_cuda:
    data_type = torch.cuda.LongTensor