def getAccSentiment(model,words,f, params=[]): f = open(f,'r') lines = f.readlines() preds = [] golds = [] seq1 = [] ct = 0 for i in lines: i = i.split("\t") p1 = i[0]; score = i[1] X1 = data_io.getSeq(p1,words) seq1.append(X1) ct += 1 if ct % 100 == 0: x1,m1 = data_io.prepare_data(seq1) if params and params.weightfile: m1 = data_io.seq2weight(x1, m1, params.weight4ind) scores = model.scoring_function(x1,m1) scores = np.squeeze(scores) preds.extend(scores.tolist()) seq1 = [] golds.append(score) if len(seq1) > 0: x1,m1 = data_io.prepare_data(seq1) if params and params.weightfile: m1 = data_io.seq2weight(x1, m1, params.weight4ind) scores = model.scoring_function(x1,m1) scores = np.squeeze(scores) preds.extend(scores.tolist()) return accSentiment(preds,golds)
def prepare_first_pc(We, words, weight4ind, generation_function, params, fpc): print("reading file: {}.".format(fpc)) # pre_calculate_first_pc(We, words, fpc, weight4ind, generation_function, params) file_name = fpc f = os.path.join("../data/", fpc) f = open(f, 'r') seq = [] for i in f.readlines(): X = data_io.getSeq(i, words) seq.append(X) x, m = data_io.prepare_data(seq) m = data_io.seq2weight(x, m, weight4ind) generation_function(We, x, m, params, file_name)
def sentences2idx(texts, words): """ Take in data, output array of word indices that can be fed into the algorithms. :param texts: List of texts :return: x1, m1. x1[i, :] is the word indices in sentence i, m1[i,:] is the mask for sentence i (0 means no word at the location) """ seq = [] for t in texts: # Doing some cleaning of the text stopwords = utils.get_stopwords() text = t.strip().strip('"') text_clean = utils.clean_text(text) s = [w for w in text_clean.split(" ") if w not in stopwords] s = s[0:MAX_WORDS] seq.append(data_io.getSeq(' '.join(s), words)) x1, m1 = data_io.prepare_data(seq) return x1, m1