Python load_model Examples, skipthoughts.skipthoughts.load_model Python Examples

Example #1

0

Show file

File: preprocess.py Project: mynameisfiber/fakenewschallenge

def skipthoughts_articles(articles, max_title_sentences=None,
                          max_article_sentences=None):
    """
    Filter articles so that we have at max `max_title_sentences` sentences in
    the title and `max_article_sentences` sentences in the body of the article.

    Then, we add in the skipthought vectors for all sentences in the titles and
    bodies of the articles into the `headline_vectors` and `article_vectors`
    key.
    """
    article_vectors = []
    st_model = st.load_model(data_path=SKIPTHOUGHTS_DATA)
    for article in tqdm(articles, 'skipthoughts encoding articles'):
        title_sentences = nltk.sent_tokenize(article['Headline'])
        if max_title_sentences is not None and  \
                len(title_sentences) > max_title_sentences:
            continue
        article_sentences = nltk.sent_tokenize(article['articleBody'])
        if max_article_sentences is not None and \
                len(article_sentences) > max_article_sentences:
            continue
        vectors = st.encode(st_model, title_sentences + article_sentences,
                            verbose=False, batch_size=128).astype('float16')
        N = len(title_sentences)
        article['headline_vectors'] = vectors[:N]
        article['article_vectors'] = vectors[N:]
        article_vectors.append(article)
    return article_vectors

Example #2

0

Show file

    def __init__(self):

        self.trmodel = tools.load_model("data/trainer.npz",
                                        "data/dictionary_fry.pkl")
        print "===== Loaded Trained Model ====="
        self.stmodel = skipthoughts.load_model()
        print "===== Loaded Skipthoughts Model ====="

Example #3

0

Show file

File: encode_utils.py Project: aaronzr/CS230

def skipthought_encode(answers):
    """
    Obtains sentence embeddings for each sentence in the emails
    """
    num_answers = len(answers)
    enc_answers = [None] * len(answers)
    cum_sum_sentences = [0]
    sent_count = 0

    for answer in answers:
        sent_count += len(answer)
        cum_sum_sentences.append(sent_count)

    all_sentences = [sent for answer in answers for sent in answer]
    print('Loading pre-trained models...')
    model = skipthoughts.load_model()
    encoder = skipthoughts.Encoder(model)
    print('Encoding sentences...')
    enc_sentences = encoder.encode(all_sentences, verbose=False)

    for i in range(len(answers)):
        begin = cum_sum_sentences[i]
        end = cum_sum_sentences[i + 1]
        enc_answers[i] = enc_sentences[begin:end]
    return enc_answers

Example #4

0

Show file

def get_test_sent(test_file):
    with open(test_file, "r") as f:
        test_sent = []
        for row in f.read().splitlines():
            test_sent.append(row.split(",")[1])
        model = skipthoughts.load_model()
        vecs = skipthoughts.encode(model, test_sent)
        return vecs

Example #5

0

Show file

File: util.py Project: falmusha/sentiment_analysis

def _init_skip_thoughts():
    global skip_thoughts_model

    if skip_thoughts_model:
        return

    global skipthoughts
    from skipthoughts import skipthoughts
    skip_thoughts_model = skipthoughts.load_model()

Example #6

0

Show file

def build_imgs():
    with open('./data/tags_clean.csv', 'r') as tag_file:
        tag_reader = csv.reader(tag_file, delimiter='\t')
        img_objs = []
        colors = [
            "red", "orange", "yellow", "green", "blue", "purple", "blonde",
            "pink", "black", "white", "brown"
        ]
        num = 0
        print "generate captions from training tags....."
        for row in tag_reader:
            img_id = row[0].split(',')[0]
            tag_row = [row[0].split(',')[1]] + row[1:]
            img = skimage.io.imread('./data/faces/{}.jpg'.format(int(img_id)))
            img = skimage.transform.resize(img, (64, 64))
            match_sent = []
            mismatch_sent = []
            tag_hair = []
            tag_eyes = []
            for tag in tag_row:
                tag = tag.split(':')[0]
                for color in colors:
                    if "{} hair".format(color) in tag:
                        tag_hair.append(tag)
                    if "{} eyes".format(color) in tag:
                        tag_eyes.append(tag)
            for t_h in tag_hair:
                for t_e in tag_eyes:
                    r = random.random()
                    if r > 0.5:
                        match_sent.append('{} {}'.format(t_h, t_e))
                    else:
                        match_sent.append('{} {}'.format(t_e, t_h))
            if match_sent:
                #  print match_sent
                img_objs.append(realimg(img, match_sent))
                num += 1
                #  if num >= 64: break
                #  print match_sent
        model = skipthoughts.load_model()
        k = 0
        for idx, img_obj1 in enumerate(img_objs):
            find = 0
            for img_obj2 in img_objs[1:]:
                for sent in img_obj2.match_sent:
                    if sent not in img_obj1.match_sent:
                        img_objs[idx].wimg.append(img_obj2.img)
                        img_obj1.mismatch_sent.append(sent)
                        find += 1
                    if find >= 1: break
                if find >= 1: break
            img_obj1.sent2embed(model)
            print "{}/{}".format(k, len(img_objs))
            k += 1
    with open("./train_data/img_objs_new.pk", "w") as f:
        pk.dump(img_objs, f)

Example #7

0

Show file

File: sent2vec.py Project: stormysmoke/sent2vec-server

def init():
    """
    Initialise the Sent2Vec encoder.

    This includes loading the model, which may take several minutes! This
    function must be called before any other function in this module.
    """
    global _encoder
    model = skipthoughts.load_model()
    _encoder = skipthoughts.Encoder(model)

Example #8

0

Show file

File: model.py Project: BinbinBian/MovieQAMemNet

  def build_model(self):
    self.build_memory()
    # embed()
    self.skip_model = skip.load_model()

    self.loss = tf.nn.sparse_softmax_cross_entropy_with_logits(self.logits, self.target)
    self.opt = tf.train.GradientDescentOptimizer(self.lr)

    grads = self.opt.compute_gradients(self.loss)
    inc_op = self.global_step.assign_add(1)
    with tf.control_dependencies([inc_op]):
      self.apply_grad_op = self.opt.apply_gradients(grads)

    tf.initialize_all_variables().run()

Example #9

0

Show file

File: model.py Project: BinbinBian/MovieQAMemNet

    def build_model(self):
        self.build_memory()
        # embed()
        self.skip_model = skip.load_model()

        self.loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            self.logits, self.target)
        self.opt = tf.train.GradientDescentOptimizer(self.lr)

        grads = self.opt.compute_gradients(self.loss)
        inc_op = self.global_step.assign_add(1)
        with tf.control_dependencies([inc_op]):
            self.apply_grad_op = self.opt.apply_gradients(grads)

        tf.initialize_all_variables().run()

Example #10

0

Show file

File: saif.py Project: kerenivasch/correction-detection

def load_data():
    ret_data = []

    mapping = pd.ExcelFile("transcriptVoiceMap.xlsx").parse(0)

    # load skipthoughts encoder
    model = st.load_model()
    encoder = st.Encoder(model)
    all_fst_transcript_vectors = encoder.encode([str(x) for x in mapping["fst transcript"]])
    all_snd_transcript_vectors = encoder.encode([str(x) for x in mapping["snd transcript"]])

    # load voice emotions model
    emotions_model = pickle.load(open("emotions_model.sav", 'rb'))
    all_fst_wav_vectors = get_activations(emotions_model,
                                          np.array([get_emotions_vec(x) for x in mapping["fst wav name"]]))
    all_snd_wav_vectors = get_activations(emotions_model,
                                          np.array([get_emotions_vec(x) for x in mapping["snd wav name"]]))

    for idx in range(len(mapping)):
        transcript_vec = all_fst_transcript_vectors[idx]
        snd_transcript_vec = all_snd_transcript_vectors[idx]
        transcript_vec = np.concatenate(
            (np.abs(transcript_vec - snd_transcript_vec), transcript_vec * snd_transcript_vec))

        # load agent execution flag (0=executed, 1=not executed):
        output = mapping["output"][idx]

        emotions_vec = all_fst_wav_vectors[idx]
        emotions_vec = np.concatenate((emotions_vec, all_snd_wav_vectors[idx]))

        fst_wav = mapping["fst wav name"][idx]
        snd_wav = mapping["snd wav name"][idx]
        frames_with_speech = count_frames_with_speech(fst_wav)
        frames_with_speech = count_frames_with_speech(snd_wav) - frames_with_speech

        label_idx = mapping["label"][idx]
        label = np.zeros(categories)
        label[label_idx] = 1

        ret_data.append((transcript_vec, [output], emotions_vec, [frames_with_speech], label))

    return ret_data

Example #11

0

Show file

File: embedding.py Project: zawecha1/MLDS2017

def create_embedding_npy(json_file='', ):

    model = st.load_model()

    eyes_color_list = [
        'gray', 'aqua', 'orange', 'red', 'blue', 'black', 'pink', 'green',
        'brown', 'purple', 'yellow'
    ]
    hair_color_list = [
        'gray', 'aqua', 'pink', 'white', 'red', 'purple', 'blue', 'black',
        'green', 'brown', 'orange'
    ]

    fidx2arridx_dict = {}

    jobj = json.load(open(json_file, 'r'))

    tag_strs = []
    count = 0
    for fidx, color_d in jobj.items():
        if len(color_d['eyes']) == 1 and len(color_d['hair']) == 1:
            eyes_color = eyes_color_list[color_d['eyes'][0]]
            hair_color = hair_color_list[color_d['hair'][0]]
            tag_str = ' '.join([hair_color, 'hair', eyes_color, 'eyes'])
            tag_strs.append(tag_str)

            fidx2arridx_dict[fidx] = count
            count += 1

    tag_embeddings = st.encode(model, tag_strs)

    print tag_embeddings.shape
    print len(fidx2arridx_dict)

    with open('fidx2arridx.json', 'w') as f:
        json.dump(fidx2arridx_dict, f)

    np.save('tags_embedding.npy', tag_embeddings)

Example #12

0

Show file

def main():
    caption_file = "captions.txt"
    training_image_file = "train_images4.txt"

    captions = []
    with open(caption_file) as f:
        line_list = f.read().split("\n")
        line_list = line_list[7500:9000]
        f1 = open(training_image_file, "w")
        for i in range(len(line_list)):
            img = line_list[i].split("\t")[0]
            cap = line_list[i].split("\t")[1]
            if len(cap) > 0:
                captions.append(cap)
                f1.write(img + "\n")
        f1.close()

    model = skipthoughts.load_model()
    caption_vectors = skipthoughts.encode(model, captions)

    h = h5py.File("/content/drive/MyDrive/train_caption_vectors4.hdf5", "w")
    h.create_dataset("vectors", data=caption_vectors)
    h.close()

Example #13

0

Show file

def main():
    parser = argparse.ArgumentParser(
        description='Evaluate a model on the quora question-pair dataset.')
    parser.add_argument('--quora-data-dir',
                        required=True,
                        help='path to the directory containing the quora data')
    parser.add_argument(
        '--st-model-dir',
        required=True,
        help='path to the directory containing the skipthoughts model')
    parser.add_argument('--output-dir',
                        default='.',
                        help='path to the directory to write to')
    parser.add_argument('-v', '--verbose', action='store_true')
    args = parser.parse_args()

    output_file = os.path.join(args.output_dir, 'oov_stats.txt')

    with log.FileWriterStdoutPrinter(output_file) as writer:
        print "Loading skipthoughts model..."
        model = st.load_model(args.st_model_dir)
        print "Initializing skipthoughts word dict..."
        word_dict = st.init_word_dict(model)
        print "Analyzing word dict..."
        analyze_dict(word_dict, writer)

        print "Loading training set..."
        train = du.load_csv(os.path.join(args.quora_data_dir, 'train.csv'))
        writer.emit_line("Analyzing word counts in train.csv...")
        analyze_oov(word_dict, train, writer, args.output_dir, 'train')

        # Be sure to write data to disk for train before moving on to test, which is much bigger

        print "Loading test set..."
        test = du.load_csv(os.path.join(args.quora_data_dir, 'test.csv'))
        writer.emit_line("Analyzing word counts in test.csv...")
        analyze_oov(word_dict, test, writer, args.output_dir, 'test')

Example #14

0

Show file

File: quora_st.py Project: pcernek/quora_pairs

def load_encoder(model_dir):
    model = st.load_model(model_dir)
    en = st.Encoder(model)
    return en

Example #15

0

Show file

def gen_model():
    """ Get the Skipthoughts model to be used in encoding """

    model = skipthoughts.load_model()
    return model

Example #16

0

Show file

File: build.py Project: WissalL/Movie-Corpus-Trainer

def gen_model():
	""" Get the Skipthoughts model to be used in encoding """

	model = skipthoughts.load_model()
	return model

Example #17

0

Show file

from __future__ import print_function
from gutenburg import Bookshelf
from skipthoughts import skipthoughts
import numpy as np
from unidecode import unidecode
import progressbar as PB
import os
import time
import string
from nltk import sent_tokenize

skipthoughts_model = skipthoughts.load_model(
    data_path="/home/micha/work/tldr/skipthoughts/data/"
)

EOP = np.ones(4800)
EOC = -1 * np.ones(4800)

def null(*args, **kwargs):
    pass
print = null

def skipthoughts_encode(sentences, model=skipthoughts_model):
    vectors = None
    print(time.time(), len(sentences), "encoding")
    vectors = skipthoughts.encode(
        model, sentences, preprocess=lambda x: x,
        use_norm=False, verbose=False
    )
    print(time.time(), vectors.shape, "done")
    for i, sent in enumerate(sentences):

Example #18

0

Show file

File: chat.py Project: WissalL/Movie-Corpus-Trainer

	def __init__(self):

		self.trmodel = tools.load_model("data/trainer.npz", "data/dictionary_fry.pkl")
		print "===== Loaded Trained Model ====="
		self.stmodel = skipthoughts.load_model()
		print "===== Loaded Skipthoughts Model ====="

Example #19

0

Show file

File: bot3.py Project: ishine/fanfiction-nlp

import skipthoughts.skipthoughts as skipthoughts
model = skipthoughts.load_model()
encoder = skipthoughts.Encoder(model)
import skipthoughts.eval_sick2 as eval_sick2
from keras.models import load_model
import gensim
import os
import collections
import smart_open
import random
import time
import numpy as np

import sys

# limit gpu usage
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.3
set_session(tf.Session(config=config))

prefix = sys.argv[1]

q_file = prefix + '_q.txt.flat.filter'
a_file = prefix + '_a.txt.flat.filter'
#q_speaker = prefix+'_q_speaker.txt'
#a_speaker = prefix+'_a_speaker.txt'
#speaker = sys.argv[2]
top = int(sys.argv[2])
#q_para_file = prefix+'_q_paragraph.txt'