def train(): filenames = tf.placeholder(tf.string, shape=[None]) training_filenames = ["./train.records"] validation_filenames = ["./train.records"] iterator = load_data.read_dataset(filenames, img_height, img_width, BATCH_SIZE) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) sess.run(iterator.initializer, feed_dict={filenames: training_filenames}) tra_img, tra_label = iterator.get_next() print(type(tra_img)) try: for step in range(MAX_STEP): tra_img1, tra_label1 = sess.run([tra_img, tra_label]) for j in range(BATCH_SIZE): print(step, tra_label1[j]) print(type(tra_label1)) print("-----------------------") plt.imshow(tra_img1[j, :, :, :]) plt.show() except tf.errors.OutOfRangeError: print('done!')
tf.flags.DEFINE_string("data_dir", "data/data.dat", "data directory") tf.flags.DEFINE_integer("vocab_size", 46960, "vocabulary size") tf.flags.DEFINE_integer("num_classes", 5, "number of classes") tf.flags.DEFINE_integer("embedding_size", 200, "Dimensionality of character embedding (default: 200)") tf.flags.DEFINE_integer("hidden_size", 50, "Dimensionality of GRU hidden layer (default: 50)") tf.flags.DEFINE_integer("batch_size", 32, "Batch Size (default: 64)") tf.flags.DEFINE_integer("num_epochs", 10, "Number of training epochs (default: 50)") tf.flags.DEFINE_integer("checkpoint_every", 100, "Save model after this many steps (default: 100)") tf.flags.DEFINE_integer("num_checkpoints", 5, "Number of checkpoints to store (default: 5)") tf.flags.DEFINE_integer("evaluate_every", 100, "evaluate every this many batches") tf.flags.DEFINE_float("learning_rate", 0.01, "learning rate") tf.flags.DEFINE_float("grad_clip", 5, "grad clip to prevent gradient explode") FLAGS = tf.flags.FLAGS train_x, train_y, dev_x, dev_y = read_dataset() print "data load finished" with tf.Session() as sess: han = model.HAN(vocab_size=FLAGS.vocab_size, num_classes=FLAGS.num_classes, embedding_size=FLAGS.embedding_size, hidden_size=FLAGS.hidden_size) with tf.name_scope('loss'): loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=han.input_y, logits=han.out, name='loss')) with tf.name_scope('accuracy'): predict = tf.argmax(han.out, axis=1, name='predict') label = tf.argmax(han.input_y, axis=1, name='label')
# https://blog.csdn.net/u012052268/article/details/79560768 # coding:utf-8 import os import sys from sklearn import feature_extraction from sklearn.feature_extraction.text import TfidfTransformer from sklearn.feature_extraction.text import CountVectorizer from load_data import read_dataset from load_data import cut_dataset from operator import itemgetter, attrgetter import xlsxwriter if __name__ == "__main__": cut_dataset('./data/filedata1.pickle') train, test = read_dataset('./data/filedata11.pickle') workbook = xlsxwriter.Workbook( './output/tfidf_output.xlsx' ) #打开一个xlsx文件(如果打开的文件存在 ,则清空该文件,如果文件不存在,则新建) worksheet = workbook.add_worksheet( ) #新建一个Sheet(名字缺省的话,默认从Sheet1开始,可以添加自己的sheet名字 bold = workbook.add_format({'bold': True}) format = workbook.add_format({'text_wrap': True}) worksheet.write('A1', 'word', bold) worksheet.write('B1', 'weight', bold) worksheet.set_column('A:B', 20) data_x = [] for i, sent in enumerate(train): doc = []
from train_ens import * #set random seeds random.seed(2) np.random.seed(2) NUM_CLASSIFIERS = 10 MAX_LABELS = 20 # TOTAL_CLASSES=100 # NUM_TRAINING_SAMPLES=50*1000 from load_data import read_dataset, create_reverse_dict, data_statistics dataset = read_dataset("mediamill") metadata = dataset["metadata"] num_points = metadata["num_points"] num_features = metadata["num_features"] num_labels = metadata["num_labels"] #create training set allX = dataset["points"] allY = dataset["vector_labels"] tr_split = dataset["train_splits"][0] trainX = allX[tr_split] trainY = allY[tr_split] all_labels = [dataset["sparse_labels"][i] for i in tr_split] reverse_dict = create_reverse_dict(all_labels) statistics = data_statistics(all_labels, num_labels)
# PMF parameter ratio = 0.8 lambda_U = 0.01 lambda_V = 0.01 latent_size = 6 learning_rate = 3e-5 # 3e-5 iterations = 1000 lambda_value_list = [] lambda_value_list.append([0.01, 0.01]) if __name__ == "__main__": alldata = load_data('./data/prouduct_rating_data_1.pickle') train, test = read_dataset('./data/prouduct_rating_data_11.pickle') num_users = cut_data_len(alldata, 'reviewerID') num_items = cut_data_len(alldata, 'asin') fp = open("log.txt", "a") fp.write("dataset:" + "Musical_Instruments_5" + "\n") fp.write("ratio:" + str(ratio) + "\n") fp.write("latent_factor:" + str(latent_size) + "\n") fp.write("learning_rate:" + str(learning_rate) + "\n") for lambda_value in lambda_value_list: lambda_U = lambda_value[0] lambda_V = lambda_value[1] # initialization pmf_model = PMF(U=None, V=None,
# test code import torch import random from collections import defaultdict from tqdm import tqdm from load_data import get_HuffmanCodePath, read_dataset from model import HierSoft_CBOW nodes, hcodes, hpath = get_HuffmanCodePath('ptb') #print('All Tree nodes is %d'%nodes[0]) w2i = defaultdict(lambda: len(w2i)) train = list(read_dataset(w2i, 'ptb'))[:2] i2w = {v: k for k, v in w2i.items()} nwords = len(i2w) EMB_SIZE = 20 ITERS = 10 WIN_SIZE = 2 model = HierSoft_CBOW(nwords, EMB_SIZE, nodes[0] + 1) opt = torch.optim.Adam(model.parameters(), lr=1e-4) data_type = torch.LongTensor use_cuda = torch.cuda.is_available() if use_cuda: data_type = torch.cuda.LongTensor