Example #1
0
def getAccSentiment(model,words,f, params=[]):
    f = open(f,'r')
    lines = f.readlines()
    preds = []
    golds = []
    seq1 = []
    ct = 0
    for i in lines:
        i = i.split("\t")
        p1 = i[0]; score = i[1]
        X1 = data_io.getSeq(p1,words)
        seq1.append(X1)
        ct += 1
        if ct % 100 == 0:
            x1,m1 = data_io.prepare_data(seq1)
            if params and params.weightfile:
                m1 = data_io.seq2weight(x1, m1, params.weight4ind)
            scores = model.scoring_function(x1,m1)
            scores = np.squeeze(scores)
            preds.extend(scores.tolist())
            seq1 = []
        golds.append(score)
    if len(seq1) > 0:
        x1,m1 = data_io.prepare_data(seq1)
        if params and params.weightfile:
            m1 = data_io.seq2weight(x1, m1, params.weight4ind)
        scores = model.scoring_function(x1,m1)
        scores = np.squeeze(scores)
        preds.extend(scores.tolist())
    return accSentiment(preds,golds)
Example #2
0
def prepare_first_pc(We, words, weight4ind, generation_function, params, fpc):
    print("reading file: {}.".format(fpc))
    # pre_calculate_first_pc(We, words, fpc, weight4ind, generation_function, params)
    file_name = fpc
    f = os.path.join("../data/", fpc)
    f = open(f, 'r')
    seq = []
    for i in f.readlines():
        X = data_io.getSeq(i, words)
        seq.append(X)
    x, m = data_io.prepare_data(seq)
    m = data_io.seq2weight(x, m, weight4ind)
    generation_function(We, x, m, params, file_name)
def sentences2idx(texts, words):
    """
  Take in data, output array of word indices that can be fed into the algorithms.
  :param texts: List of texts
  :return: x1, m1. x1[i, :] is the word indices in sentence i, m1[i,:] is the mask for sentence i (0 means no word at the location)
  """
    seq = []
    for t in texts:
        # Doing some cleaning of the text
        stopwords = utils.get_stopwords()
        text = t.strip().strip('"')
        text_clean = utils.clean_text(text)
        s = [w for w in text_clean.split(" ") if w not in stopwords]
        s = s[0:MAX_WORDS]
        seq.append(data_io.getSeq(' '.join(s), words))
    x1, m1 = data_io.prepare_data(seq)
    return x1, m1