def __init__(self, embedding_dim=100, batch_size=64, n_hidden=100, learning_rate=0.01, n_class=3, max_sentence_len=50, l2_reg=0., display_step=4, n_iter=100, type_=''): self.embedding_dim = embedding_dim self.batch_size = batch_size self.n_hidden = n_hidden self.learning_rate = learning_rate self.n_class = n_class self.max_sentence_len = max_sentence_len self.l2_reg = l2_reg self.display_step = display_step self.n_iter = n_iter self.type_ = type_ self.word_id_mapping, self.w2v = load_word_embedding( FLAGS.word_id_file_path, FLAGS.embedding_file_path, self.embedding_dim) # self.word_embedding = tf.constant(self.w2v, dtype=tf.float32, name='word_embedding') self.word_embedding = tf.Variable(self.w2v, dtype=tf.float32, name='word_embedding') # self.word_id_mapping = load_word_id_mapping(FLAGS.word_id_file_path) # self.word_embedding = tf.Variable( # tf.random_uniform([len(self.word_id_mapping), self.embedding_dim], -0.1, 0.1), name='word_embedding') self.aspect_id_mapping, self.aspect_embed = load_aspect2id( FLAGS.aspect_id_file_path, self.word_id_mapping, self.w2v, self.embedding_dim) self.aspect_embedding = tf.Variable(self.aspect_embed, dtype=tf.float32, name='aspect_embedding') self.keep_prob1 = tf.placeholder(tf.float32) self.keep_prob2 = tf.placeholder(tf.float32) with tf.name_scope('inputs'): self.x = tf.placeholder(tf.int32, [None, self.max_sentence_len], name='x') self.y = tf.placeholder(tf.int32, [None, self.n_class], name='y') self.sen_len = tf.placeholder(tf.int32, None, name='sen_len') self.aspect_id = tf.placeholder(tf.int32, None, name='aspect_id') with tf.name_scope('weights'): self.weights = { 'softmax': tf.get_variable( name='softmax_w', shape=[self.n_hidden, self.n_class], initializer=tf.random_uniform_initializer(-0.01, 0.01), regularizer=tf.contrib.layers.l2_regularizer(self.l2_reg)) } with tf.name_scope('biases'): self.biases = { 'softmax': tf.get_variable( name='softmax_b', shape=[self.n_class], initializer=tf.random_uniform_initializer(-0.01, 0.01), regularizer=tf.contrib.layers.l2_regularizer(self.l2_reg)) } self.W = tf.get_variable( name='W', shape=[ self.n_hidden + self.embedding_dim, self.n_hidden + self.embedding_dim ], initializer=tf.random_uniform_initializer(-0.01, 0.01), regularizer=tf.contrib.layers.l2_regularizer(self.l2_reg)) self.w = tf.get_variable( name='w', shape=[self.n_hidden + self.embedding_dim, 1], initializer=tf.random_uniform_initializer(-0.01, 0.01), regularizer=tf.contrib.layers.l2_regularizer(self.l2_reg)) self.Wp = tf.get_variable( name='Wp', shape=[self.n_hidden, self.n_hidden], initializer=tf.random_uniform_initializer(-0.01, 0.01), regularizer=tf.contrib.layers.l2_regularizer(self.l2_reg)) self.Wx = tf.get_variable( name='Wx', shape=[self.n_hidden, self.n_hidden], initializer=tf.random_uniform_initializer(-0.01, 0.01), regularizer=tf.contrib.layers.l2_regularizer(self.l2_reg))
def __init__(self, embedding_dim=100, batch_size=64, n_hidden=100, learning_rate=0.01, n_class=3, max_sentence_len=50, l2_reg=0., display_step=4, n_iter=100, type_=''): self.embedding_dim = embedding_dim #300 self.batch_size = batch_size #25 self.n_hidden = n_hidden #300 self.learning_rate = learning_rate #0.01 self.n_class = n_class #3 self.max_sentence_len = max_sentence_len #80 self.l2_reg = l2_reg #0.001 self.display_step = display_step #4 self.n_iter = n_iter #20 self.type_ = type_ #AT self.word_id_mapping, self.w2v = load_word_embedding( FLAGS.word_id_file_path, FLAGS.embedding_file_path, self.embedding_dim) # dict(3909) 3910 * 300 word->id 路径 词嵌入的路径 词嵌入维度:300 # self.word_embedding = tf.constant(self.w2v, dtype=tf.float32, name='word_embedding') self.word_embedding = tf.Variable( self.w2v, dtype=tf.float32, name='word_embedding') # 定义word_embedding变量 # self.word_id_mapping = load_word_id_mapping(FLAGS.word_id_file_path) # self.word_embedding = tf.Variable( # tf.random_uniform([len(self.word_id_mapping), self.embedding_dim], -0.1, 0.1), name='word_embedding') self.aspect_id_mapping, self.aspect_embed = load_aspect2id( FLAGS.aspect_id_file_path, self.word_id_mapping, self.w2v, self.embedding_dim) # dict(1219) 1220 * 300 self.aspect_embedding = tf.Variable( self.aspect_embed, dtype=tf.float32, name='aspect_embedding') #定义word_embedding变量 #定义droupout占位符 self.keep_prob1 = tf.placeholder(tf.float32, name="dropout_keep_prob1") self.keep_prob2 = tf.placeholder(tf.float32, name="dropout_keep_prob2") with tf.name_scope('inputs'): self.x = tf.placeholder(tf.int32, [None, self.max_sentence_len], name='x') #25 * 80 #print (self.max_sentence_len) #80 #print ('sxl================') self.y = tf.placeholder(tf.int32, [None, self.n_class], name='y') #25 * 3 self.sen_len = tf.placeholder(tf.int32, None, name='sen_len') #list(25) self.aspect_id = tf.placeholder(tf.int32, None, name='aspect_id') #list(25) with tf.name_scope('weights'): self.weights = { 'softmax': tf.get_variable( name='softmax_w', shape=[self.n_hidden, self.n_class], #300 * 3 initializer=tf.random_uniform_initializer(-0.01, 0.01), regularizer=tf.contrib.layers.l2_regularizer(self.l2_reg)) } with tf.name_scope('biases'): self.biases = { 'softmax': tf.get_variable( name='softmax_b', shape=[self.n_class], # 3 initializer=tf.random_uniform_initializer(-0.01, 0.01), regularizer=tf.contrib.layers.l2_regularizer(self.l2_reg)) } self.W = tf.get_variable( name='W', shape=[ self.n_hidden + self.embedding_dim, self.n_hidden + self.embedding_dim ], #600 * 600 initializer=tf.random_uniform_initializer(-0.01, 0.01), regularizer=tf.contrib.layers.l2_regularizer(self.l2_reg)) self.w = tf.get_variable( name='w', shape=[self.n_hidden + self.embedding_dim, 1], #600 * 1 initializer=tf.random_uniform_initializer(-0.01, 0.01), regularizer=tf.contrib.layers.l2_regularizer(self.l2_reg)) self.Wp = tf.get_variable( name='Wp', shape=[self.n_hidden, self.n_hidden], #300 * 300 initializer=tf.random_uniform_initializer(-0.01, 0.01), regularizer=tf.contrib.layers.l2_regularizer(self.l2_reg)) self.Wx = tf.get_variable( name='Wx', shape=[self.n_hidden, self.n_hidden], #300 * 300 initializer=tf.random_uniform_initializer(-0.01, 0.01), regularizer=tf.contrib.layers.l2_regularizer(self.l2_reg))
def __init__(self, config): self.embedding_dim = config.embedding_dim self.batch_size = config.batch_size self.n_hidden = config.n_hidden self.learning_rate = config.learning_rate self.n_class = config.n_class self.max_len = config.max_len self.l2_reg = config.l2_reg self.display_step = config.display_step self.n_iter = config.n_iter self.embedding_file = config.embedding_file_path self.word2id_file = config.word_id_file_path self.aspect_id_file = config.aspect_id_file_path self.train_file = config.train_file_path self.test_file = config.test_file_path self.val_file = config.validate_file_path self.word2id, self.w2v = load_w2v(self.embedding_file, self.embedding_dim) self.word_embedding = tf.constant(self.w2v, name='word_embedding') self.aspect2id, self.a2v = load_aspect2id(self.aspect_id_file, self.word2id, self.w2v, self.embedding_dim) self.aspect_embedding = tf.constant(self.a2v, name='aspect_embedding') with tf.name_scope('inputs'): self.x = tf.placeholder(tf.int32, [None, self.max_len], name='x') self.y = tf.placeholder(tf.float32, [None, self.n_class], name='y') self.sen_len = tf.placeholder(tf.int32, None, name='sen_len') self.aspect_id = tf.placeholder(tf.int32, None, name='aspect_id') self.position = tf.placeholder(tf.int32, [None, self.max_len], name='position') with tf.name_scope('GRU'): self.w_r = tf.get_variable( name='W_r', shape=[2 * self.n_hidden + 1, self.n_hidden], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer(uniform=True)) self.u_r = tf.get_variable( name='U_r', shape=[self.n_hidden, self.n_hidden], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer(uniform=True)) self.w_z = tf.get_variable( name='W_z', shape=[2 * self.n_hidden + 1, self.n_hidden], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer(uniform=True)) self.u_z = tf.get_variable( name='U_z', shape=[self.n_hidden, self.n_hidden], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer(uniform=True)) self.w_x = tf.get_variable( name='W_x', shape=[self.n_hidden, self.n_hidden], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer(uniform=True)) self.w_g = tf.get_variable( name='W_g', shape=[2 * self.n_hidden + 1, self.n_hidden], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer(uniform=True))
import tensorflow as tf import numpy as np from utils import load_w2v, batch_index, load_word_embedding, load_aspect2id, load_inputs_twitter_at x_raw = ["$T$ is always fresh and hot - ready to eat !", "food"] y_test = [1] word_id_mapping, w2v = load_word_embedding( 'data/restaurant/word_id_new.txt', 'data/restaurant/rest_2014_word_embedding_300_new.txt', 300) # dict(3909) 3910 * 300 aspect_id_mapping, aspect_embed = load_aspect2id( 'data/restaurant/aspect_id_new.txt', word_id_mapping, w2v, 300) # dict(1219) 1220 * 300 # print (aspect_id_mapping['food']) # print ('sxlllllllllll') def change_y_to_onehot(y): class_set = set([1, -1, 0]) n_class = 3 y_onehot_mapping = {0: 0, 1: 1, -1: 2} #print (y_onehot_mapping) onehot = [] for label in y: tmp = [0] * n_class tmp[y_onehot_mapping[label]] = 1 onehot.append(tmp) return np.asarray(onehot, dtype=np.int32)
def __init__(self, embedding_dim=100, batch_size=64, n_hidden=100, learning_rate=0.01, n_class=3, max_sentence_len=50, l2_reg=0., display_step=4, n_iter=100, type_=''): self.embedding_dim = embedding_dim self.batch_size = batch_size self.n_hidden = n_hidden self.learning_rate = learning_rate self.n_class = n_class self.max_sentence_len = max_sentence_len self.l2_reg = l2_reg self.display_step = display_step self.n_iter = n_iter self.type_ = type_ self.word_id_mapping, self.w2v = load_word_embedding(FLAGS.word_id_file_path, FLAGS.embedding_file_path, self.embedding_dim) # self.word_embedding = tf.constant(self.w2v, dtype=tf.float32, name='word_embedding') self.word_embedding = tf.Variable(self.w2v, dtype=tf.float32, name='word_embedding') # self.word_id_mapping = load_word_id_mapping(FLAGS.word_id_file_path) # self.word_embedding = tf.Variable( # tf.random_uniform([len(self.word_id_mapping), self.embedding_dim], -0.1, 0.1), name='word_embedding') self.aspect_id_mapping, self.aspect_embed = load_aspect2id(FLAGS.aspect_id_file_path, self.word_id_mapping, self.w2v, self.embedding_dim) self.aspect_embedding = tf.Variable(self.aspect_embed, dtype=tf.float32, name='aspect_embedding') self.keep_prob1 = tf.placeholder(tf.float32) self.keep_prob2 = tf.placeholder(tf.float32) with tf.name_scope('inputs'): self.x = tf.placeholder(tf.int32, [None, self.max_sentence_len], name='x') self.y = tf.placeholder(tf.int32, [None, self.n_class], name='y') self.sen_len = tf.placeholder(tf.int32, None, name='sen_len') self.aspect_id = tf.placeholder(tf.int32, None, name='aspect_id') with tf.name_scope('weights'): self.weights = { 'softmax': tf.get_variable( name='softmax_w', shape=[self.n_hidden, self.n_class], initializer=tf.random_uniform_initializer(-0.01, 0.01), regularizer=tf.contrib.layers.l2_regularizer(self.l2_reg) ) } with tf.name_scope('biases'): self.biases = { 'softmax': tf.get_variable( name='softmax_b', shape=[self.n_class], initializer=tf.random_uniform_initializer(-0.01, 0.01), regularizer=tf.contrib.layers.l2_regularizer(self.l2_reg) ) } self.W = tf.get_variable( name='W', shape=[self.n_hidden + self.embedding_dim, self.n_hidden + self.embedding_dim], initializer=tf.random_uniform_initializer(-0.01, 0.01), regularizer=tf.contrib.layers.l2_regularizer(self.l2_reg) ) self.w = tf.get_variable( name='w', shape=[self.n_hidden + self.embedding_dim, 1], initializer=tf.random_uniform_initializer(-0.01, 0.01), regularizer=tf.contrib.layers.l2_regularizer(self.l2_reg) ) self.Wp = tf.get_variable( name='Wp', shape=[self.n_hidden, self.n_hidden], initializer=tf.random_uniform_initializer(-0.01, 0.01), regularizer=tf.contrib.layers.l2_regularizer(self.l2_reg) ) self.Wx = tf.get_variable( name='Wx', shape=[self.n_hidden, self.n_hidden], initializer=tf.random_uniform_initializer(-0.01, 0.01), regularizer=tf.contrib.layers.l2_regularizer(self.l2_reg) )