Ejemplos de deepmoji_feature_encoding en Python, ejemplos de deepmoji.model_def.deepmoji_feature_encoding en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: test_finetuning.py Proyecto: Dobatymo/DeepMoji

def test_encode_texts():
    """ Text encoding is stable.
    """

    TEST_SENTENCES = [u'I love mom\'s cooking',
                      u'I love how you never reply back..',
                      u'I love cruising with my homies',
                      u'I love messing with yo mind!!',
                      u'I love you and now you\'re just gone..',
                      u'This is shit',
                      u'This is the shit']

    maxlen = 30
    batch_size = 32

    with open(VOCAB_PATH, 'r') as f:
        vocabulary = json.load(f)
    st = SentenceTokenizer(vocabulary, maxlen)
    tokenized, _, _ = st.tokenize_sentences(TEST_SENTENCES)

    model = deepmoji_feature_encoding(maxlen, PRETRAINED_PATH)

    encoding = model.predict(tokenized)
    avg_across_sentences = np.around(np.mean(encoding, axis=0)[:5], 3)
    assert np.allclose(avg_across_sentences, np.array([-0.023, 0.021, -0.037, -0.001, -0.005]))

Ejemplo n.º 2

0

Mostrar archivo

def test_encode_texts():
    """ Text encoding is stable.
    """

    TEST_SENTENCES = [
        u'I love mom\'s cooking', u'I love how you never reply back..',
        u'I love cruising with my homies', u'I love messing with yo mind!!',
        u'I love you and now you\'re just gone..', u'This is shit',
        u'This is the shit'
    ]

    maxlen = 30
    batch_size = 32

    with open(VOCAB_PATH, 'r') as f:
        vocabulary = json.load(f)
    st = SentenceTokenizer(vocabulary, maxlen)
    tokenized, _, _ = st.tokenize_sentences(TEST_SENTENCES)

    model = deepmoji_feature_encoding(maxlen, PRETRAINED_PATH)

    encoding = model.predict(tokenized)
    avg_across_sentences = np.around(np.mean(encoding, axis=0)[:5], 3)
    assert np.allclose(avg_across_sentences,
                       np.array([-0.023, 0.021, -0.037, -0.001, -0.005]))

Ejemplo n.º 3

0

Mostrar archivo

Archivo: generate_deepmoji_feats.py Proyecto: derekhoward/CLP_shared_task_code

def main():
    df = pd.read_csv('../data/interim/sentences.csv')

    maxlen = 30
    batch_size = 32

    print('Tokenizing using dictionary from {}'.format(VOCAB_PATH))
    with open(VOCAB_PATH, 'r') as f:
        vocabulary = json.load(f)

    st = SentenceTokenizer(vocabulary, maxlen)

    sentences = []
    for sent in df.body.tolist():
        sent = unicode(str(sent), "utf-8")
        if sent.strip() == "":
            sent = 'blank'
            sent = unicode(str(sent), "utf-8")
        sentences.append(sent)

    tokenized, _, _ = st.tokenize_sentences(sentences)

    # generate full deepmoji features for sentences
    print('Loading model from {}.'.format(PRETRAINED_PATH))
    model = deepmoji_feature_encoding(maxlen, PRETRAINED_PATH)
    model.summary()

    print('Encoding texts with deepmoji features...')
    encoding = model.predict(tokenized)

    deepmoji_encodings = pd.DataFrame(encoding)
    deepmoji_encodings.index = df.post_id

    deepmoji_post_scores = deepmoji_encodings.groupby('post_id').agg(
        ['mean', 'max', 'min'])
    deepmoji_post_scores = flatten_cols(deepmoji_post_scores)
    deepmoji_post_scores = deepmoji_post_scores.add_prefix('deepmoji_')

    # generate 64 emoji encodings
    print('Loading model from {}.'.format(PRETRAINED_PATH))
    model = deepmoji_emojis(maxlen, PRETRAINED_PATH)
    model.summary()

    print('Running emoji predictions...')
    prob = model.predict(tokenized)
    emoji_scores = pd.DataFrame(prob)
    emoji_scores = emoji_scores.add_prefix('emoji_')
    emoji_scores.index = df.post_id

    emoji_post_scores = emoji_scores.groupby('post_id').agg(
        ['mean', 'max', 'min'])
    emoji_post_scores = flatten_cols(emoji_post_scores)

    print('deepmoji features shape: {}'.format(deepmoji_post_scores.shape))
    print('emoji features shape: {}'.format(emoji_post_scores.shape))
    total_feats = deepmoji_post_scores.merge(emoji_post_scores,
                                             left_index=True,
                                             right_index=True)
    print('total features shape: {}'.format(total_feats.shape))
    total_feats.to_csv('../data/interim/all_sent_level_deepmoji.csv')

Ejemplo n.º 4

0

Mostrar archivo

Archivo: deep_moji.py Proyecto: ysenarath/opinion-lab

 def initialize(self):
     deepmoji_weights_path = os.path.join(self.model_path,
                                          'deepmoji_weights.hdf5')
     vocabulary_path = os.path.join(self.model_path, 'vocabulary.json')
     with open(vocabulary_path, 'r') as f:
         vocab = json.load(f)
     self._st_ = SentenceTokenizer(vocab, self.max_len)
     self._model_ = deepmoji_feature_encoding(self.max_len,
                                              deepmoji_weights_path,
                                              self.return_attention)

Ejemplo n.º 5

0

Mostrar archivo

def use_deepmoji(maxlen=MAXLEN,
                 vocab_path=DEEPMOJI_VOCAB_FILE,
                 weights_path=DEEPMOJI_WEIGHT_FILE):
    print('Tokenizing using dictionary from {}'.format(vocab_path))
    with open(vocab_path, 'r') as f:
        vocabulary = json.load(f)

    st = SentenceTokenizer(vocabulary, maxlen)

    print('Loading model from {}.'.format(weights_path))
    model = deepmoji_feature_encoding(maxlen, weights_path)
    model.summary()

    return st, model

Ejemplo n.º 6

0

Mostrar archivo

Archivo: encode_texts.py Proyecto: jslow421/DeepMoji-Python3

from deepmoji.global_variables import PRETRAINED_PATH, VOCAB_PATH

TEST_SENTENCES = [
    u'I love mom\'s cooking', u'I love how you never reply back..',
    u'I love cruising with my homies', u'I love messing with yo mind!!',
    u'I love you and now you\'re just gone..', u'This is shit',
    u'This is the shit'
]

maxlen = 30
batch_size = 32

print('Tokenizing using dictionary from {}'.format(VOCAB_PATH))
with open(VOCAB_PATH, 'r') as f:
    vocabulary = json.load(f)
st = SentenceTokenizer(vocabulary, maxlen)
tokenized, _, _ = st.tokenize_sentences(TEST_SENTENCES)

print('Loading model from {}.'.format(PRETRAINED_PATH))
model = deepmoji_feature_encoding(maxlen, PRETRAINED_PATH)
model.summary()

print('Encoding texts..')
encoding = model.predict(tokenized)

print('First 5 dimensions for sentence: {}'.format(TEST_SENTENCES[0]))
print(encoding[0, :5])

# Now you could visualize the encodings to see differences,
# run a logistic regression classifier on top,
# or basically anything you'd like to do.

Ejemplo n.º 7

0

Mostrar archivo

Archivo: encode_texts.py Proyecto: Dobatymo/DeepMoji

TEST_SENTENCES = [u'I love mom\'s cooking',
                  u'I love how you never reply back..',
                  u'I love cruising with my homies',
                  u'I love messing with yo mind!!',
                  u'I love you and now you\'re just gone..',
                  u'This is shit',
                  u'This is the shit']

maxlen = 30
batch_size = 32

print('Tokenizing using dictionary from {}'.format(VOCAB_PATH))
with open(VOCAB_PATH, 'r') as f:
    vocabulary = json.load(f)
st = SentenceTokenizer(vocabulary, maxlen)
tokenized, _, _ = st.tokenize_sentences(TEST_SENTENCES)

print('Loading model from {}.'.format(PRETRAINED_PATH))
model = deepmoji_feature_encoding(maxlen, PRETRAINED_PATH)
model.summary()

print('Encoding texts..')
encoding = model.predict(tokenized)

print('First 5 dimensions for sentence: {}'.format(TEST_SENTENCES[0]))
print(encoding[0, :5])

# Now you could visualize the encodings to see differences,
# run a logistic regression classifier on top,
# or basically anything you'd like to do.