コード例 #1
0
def _generate_mft_features(model, mft_axes, word_data):
    mft_features = {
        k: {
            'framing-bias': 0,
            'intensity': 0
        }
        for k, _ in mft_axes.items()
    }
    total_count = 0
    for category in mft_axes.keys():
        for word, attribs in word_data.items():
            if word not in model.wv: continue
            count = attribs['count']
            mft_features[category]['framing-bias'] += (
                count * cos_sim(mft_axes[category], model.wv.get_vector(word)))
            total_count += count
        if total_count == 0:
            mft_features[category]['framing-bias'] = 0
        else:
            mft_features[category]['framing-bias'] /= total_count
        for word, attribs in word_data.items():
            if word not in model.wv: continue
            count = attribs['count']
            mft_features[category]['intensity'] += (count * (
                (cos_sim(mft_axes[category], model.wv.get_vector(word)) -
                 mft_features[category]['framing-bias'])**2))
        if total_count == 0:
            mft_features[category]['intensity'] = 0
        else:
            mft_features[category]['intensity'] /= total_count
    return mft_features
コード例 #2
0
	def emit_new_weight(self, inp_h, weight_h, memory_h):
		#context addressing

		key = T.dot(inp_h, self.key_w)+self.key_b
		beta = T.nnet.softplus(T.dot(inp_h, self.beta_w )+self.beta_b)

		g = T.nnet.sigmoid(T.dot(inp_h, self.g_w)+self.g_b)

		#gamma = T.nnet.softplus(T.dot(inp_h,self.gamma_w)+self.gamma_b)
				
		weight_c = tools.vector_softmax(beta*tools.cos_sim(key, memory_h))
		#location addressing
		#interpolating
		
		weight_g = g*weight_c+ (1-g)*weight_h

		weight_location = T.tanh(T.dot(weight_g, self.location_w)+self.location_b)

		weight_new = weight_location
		
		#erase and add
		erase = T.nnet.sigmoid(T.dot(inp_h,self.erase_w)+self.erase_b)
		add = T.dot(inp_h,self.add_w)+self.add_b
		#if test:
		#	return key, beta,weight_c,g,weight_g,shift,weight_shift,gamma,weight_gamma,weight_new
		return weight_new, erase, add
コード例 #3
0
from tools import df_split, cos_sim
import random
import statsmodels.api as sm
import pandas as pd
import numpy as np

if __name__ == "__main__":
    # set random seed for consistency, load data
    random.seed(0)
    df_q = pd.read_csv('data/quora_duplicate_questions.tsv', sep="\t")
    q1_split, q1_full = df_split(df_q, 'question1')
    q2_split, q2_full = df_split(df_q, 'question2')
    # get cos sim, get difference in length between the two questions (both features in our model)
    cos = cos_sim(q1_split, q2_split)
    len_diff = (q1_full.str.len() - q2_full.str.len()).values
    # prepare features
    x = np.vstack([cos, len_diff]).T
    y = df_q['is_duplicate'].values
    x = sm.add_constant(x)
    # randomly generate n / 2 indices. we then use these indices to create a train/test set (50/50 split here)
    n = y.shape[0]
    train_ind = random.sample(range(0, n), int(n / 2))
    test_ind = list(set(range(0, n)) - set(train_ind))

    xtrain, xtest = x[train_ind], x[test_ind]
    ytrain, ytest = y[train_ind], y[test_ind]
    # train logistic regression, find accuracy on test set
    model = sm.Logit(ytrain, xtrain).fit()
    ypred = (model.predict(xtest) > 0.5).astype(int)
    accuracy = (ypred == ytest).sum() / len(ypred)
    print(accuracy)
コード例 #4
0
ファイル: head.py プロジェクト: dandxy89/NTMtranslation
	def emit_new_weight(self, inp_h, weight_h, memory_h):
		#context addressing
		
		
		key = T.dot(inp_h, self.key_w)+self.key_b
		beta = T.nnet.softplus(T.dot(inp_h, self.beta_w )+self.beta_b)

		g = T.nnet.sigmoid(T.dot(inp_h, self.g_w)+self.g_b)

		#shift = tools.vector_softmax(T.dot(inp_h,self.shift_w)+self.shift_b)
		#shift = shift.dimshuffle((0,'x'))

		#gamma = T.nnet.softplus(T.dot(inp_h,self.gamma_w)+self.gamma_b)+1.
		

		'''
		key_normal = key/(T.sqrt(T.sum(key**2)) + tools.mini)
		memory_mod = T.sqrt(T.sum(memory**2, axis = 1).dimshuffle((0,'x')))
		memory_normal = memory/(memory_mod  + tools.mini)	

		weight_c = T.exp(beta*T.dot(memory_normal, key_normal))
		weight_cnormal = weight_c/T.sum(weight_c  + tools.mini)
		'''

		
		weight_c = tools.vector_softmax(beta*tools.cos_sim(key, memory_h))
		#location addressing
		#interpolating
		
		#weight_g = g*weight_c+ (1-g)*weight_h
		#weight_conv = theano.tensor.signal.conv(weight_g.reshape(memory.shape[0],1),
		#				shift_normal.reshape(self.shift_width, 1))

		#code from shaw

		#shift_normal = shift/T.sum(shift)

		

		'''
		wlength = weight_g.shape[0]
		shift_sidewidth = self.shift_width/2
		weight_shift = weight_h

		def cal_shift(pos):
			weight_pos = 0.
			for j in range(0,self.shift_width):
				pos = i+j-shift_sidewidth
				if pos < 0:
					pos += wlength
				if pos >= wlength:
					pos -= wlength
				weight_shift += shift[j]*weight_g[pos]
			return weight_pos

		weight_shift
		'''

		#weight_shift = T.sum(shift*weight_g[self.shift_conv], axis = 0)
		#sharpening
		'''
		weight_gamma = weight_shift ** gamma
		#weight_gamma = weight_g
		
		weight_new = weight_gamma/T.sum(weight_gamma)
		'''
		weight_new = weight_c
		

		#erase and add
		erase = T.nnet.sigmoid(T.dot(inp_h,self.erase_w)+self.erase_b)
		add = T.dot(inp_h,self.add_w)+self.add_b
		#if test:
		#	return key, beta,weight_c,g,weight_g,shift,weight_shift,gamma,weight_gamma,weight_new
		return weight_new, erase, add