# coding: utf-8
import sys

sys.path.append('..')
from common.util import preprocess, create_co_matrix, most_similar

text = 'You say goodbye and I say helllo.'
corpus, word_to_id, id_to_word = preprocess(text)
vocab_size = len(word_to_id)
C = create_co_matrix(corpus, vocab_size)

most_similar('you', word_to_id, id_to_word, C, top=5)
Beispiel #2
0
import sys
from common.trainer import Trainer
from common.optimizer import Adam
from common.util import preprocess, create_contexts_target, convert_one_hot
from my_ch03.simple_cbow import SimpleCBOW

window_size = 1
hidden_size = 3
batch_size = 3
max_epoch = 1000

text = "You say goodbye and I say hello."
corpus, w2i, i2w = preprocess(text)

vocab_size = len(w2i)
contexts, target = create_contexts_target(corpus, window_size)
target = convert_one_hot(target, vocab_size)
contexts = convert_one_hot(contexts, vocab_size)

model = SimpleCBOW(vocab_size, hidden_size)
optimizer = Adam()
trainer = Trainer(model, optimizer)

trainer.fit(contexts, target, max_epoch, batch_size)
trainer.plot()
Beispiel #3
0
import sys
sys.path.append('..')
import numpy as np
import matplotlib.pyplot as plt
from common.util import preprocess,create_to_matrix,cos_similarity,most_similar,ppmi
from dataset import ptb
text='You say goodbye and I say hello.'
corpus,wordtoid,idtoword=preprocess(text)

#手作り
C=np.array([
[0,1,0,0,0,0,0],
[1,0,1,0,1,1,0],
[0,1,0,1,0,0,0],
[0,0,1,0,1,0,0],
[0,1,0,1,0,0,0],
[0,1,0,0,0,0,1],
[0,0,0,0,0,1,0],
],dtype=np.int32)
print(C[0])
print(C[4])
print(C[wordtoid['goodbye']])

vocab_size=len(wordtoid)
C=create_to_matrix(corpus,vocab_size,window_size=1)

#similarity
c0=C[wordtoid['you']]
c1=C[wordtoid['i']]
print(cos_similarity(c0,c1))
Beispiel #4
0
import sys
sys.path.append('..')
from common.trainer import Trainer
from common.optimizer import Adam
from simple_cbow import SimpleCBOW
from simple_skip_gram import SimpleSkipGram
from common.util import preprocess, create_contexts_target, convert_one_hot

window_size = 1
hidden_size = 5
batch_size = 3
max_epoch = 1000

text = "You say goodbye and I say hello."
corpus, word_to_id, id_to_word = preprocess(
    text)  # corpus: 문장에서 단어의 label ('.'포함),

vocab_size = len(word_to_id)  # 문장에서 단어 종류의 수, 7
contexts, target = create_contexts_target(
    corpus, window_size)  # window_size만큼 단어들의 문맥, 단어 label

target = convert_one_hot(target,
                         vocab_size)  #(6,7) target을 vocab_size에 맞게 one_hot
contexts = convert_one_hot(contexts, vocab_size)  # (6,2,7)
print(contexts)
exit(1)

#model = SimpleSkipGram(vocab_size, hidden_size)
model = SimpleCBOW(vocab_size, hidden_size)
optimizer = Adam()
trainer = Trainer(model, optimizer)
Beispiel #5
0
import sys, os
sys.path.append(os.pardir)

from common.np import np
from common.util import preprocess, create_co_matrix, ppmi

text = 'you say goodbye and I say hello.'
corpus, w2id, id2w = preprocess(text)
vocab_size = len(w2id)
C = create_co_matrix(corpus, vocab_size)
W = ppmi(C)

U, S, V = np.linalg.svd(W)

print('W=>' + str(W.shape))
print(W)
print('U=>' + str(U.shape))
print(np.round(U, 3))
print('S=>' + str(S.shape))
print(np.round(S, 3))
print('V=>' + str(V.shape))
print(np.round(V, 3))
Beispiel #6
0
    def testPreprocess(self):
        expected_result = r"""# 1 "test_files\\main.c"
# 1 "<built-in>"
# 1 "<command-line>"
# 1 "test_files\\main.c"
# 1 "D:\\Machine Learning\\SyntaxErrorRecoveryFramework\\pycparser\\utils\\fake_libc_include/stdio.h" 1
# 1 "D:\\Machine Learning\\SyntaxErrorRecoveryFramework\\pycparser\\utils\\fake_libc_include/_fake_defines.h" 1
# 41 "D:\\Machine Learning\\SyntaxErrorRecoveryFramework\\pycparser\\utils\\fake_libc_include/_fake_defines.h"
typedef int va_list;
# 2 "D:\\Machine Learning\\SyntaxErrorRecoveryFramework\\pycparser\\utils\\fake_libc_include/stdio.h" 2
# 1 "D:\\Machine Learning\\SyntaxErrorRecoveryFramework\\pycparser\\utils\\fake_libc_include/_fake_typedefs.h" 1



typedef int size_t;
typedef int __builtin_va_list;
typedef int __gnuc_va_list;
typedef int __int8_t;
typedef int __uint8_t;
typedef int __int16_t;
typedef int __uint16_t;
typedef int __int_least16_t;
typedef int __uint_least16_t;
typedef int __int32_t;
typedef int __uint32_t;
typedef int __int64_t;
typedef int __uint64_t;
typedef int __int_least32_t;
typedef int __uint_least32_t;
typedef int __s8;
typedef int __u8;
typedef int __s16;
typedef int __u16;
typedef int __s32;
typedef int __u32;
typedef int __s64;
typedef int __u64;
typedef int _LOCK_T;
typedef int _LOCK_RECURSIVE_T;
typedef int _off_t;
typedef int __dev_t;
typedef int __uid_t;
typedef int __gid_t;
typedef int _off64_t;
typedef int _fpos_t;
typedef int _ssize_t;
typedef int wint_t;
typedef int _mbstate_t;
typedef int _flock_t;
typedef int _iconv_t;
typedef int __ULong;
typedef int __FILE;
typedef int ptrdiff_t;
typedef int wchar_t;
typedef int __off_t;
typedef int __pid_t;
typedef int __loff_t;
typedef int u_char;
typedef int u_short;
typedef int u_int;
typedef int u_long;
typedef int ushort;
typedef int uint;
typedef int clock_t;
typedef int time_t;
typedef int daddr_t;
typedef int caddr_t;
typedef int ino_t;
typedef int off_t;
typedef int dev_t;
typedef int uid_t;
typedef int gid_t;
typedef int pid_t;
typedef int key_t;
typedef int ssize_t;
typedef int mode_t;
typedef int nlink_t;
typedef int fd_mask;
typedef int _types_fd_set;
typedef int clockid_t;
typedef int timer_t;
typedef int useconds_t;
typedef int suseconds_t;
typedef int FILE;
typedef int fpos_t;
typedef int cookie_read_function_t;
typedef int cookie_write_function_t;
typedef int cookie_seek_function_t;
typedef int cookie_close_function_t;
typedef int cookie_io_functions_t;
typedef int div_t;
typedef int ldiv_t;
typedef int lldiv_t;
typedef int sigset_t;
typedef int __sigset_t;
typedef int _sig_func_ptr;
typedef int sig_atomic_t;
typedef int __tzrule_type;
typedef int __tzinfo_type;
typedef int mbstate_t;
typedef int sem_t;
typedef int pthread_t;
typedef int pthread_attr_t;
typedef int pthread_mutex_t;
typedef int pthread_mutexattr_t;
typedef int pthread_cond_t;
typedef int pthread_condattr_t;
typedef int pthread_key_t;
typedef int pthread_once_t;
typedef int pthread_rwlock_t;
typedef int pthread_rwlockattr_t;
typedef int pthread_spinlock_t;
typedef int pthread_barrier_t;
typedef int pthread_barrierattr_t;
typedef int jmp_buf;
typedef int rlim_t;
typedef int sa_family_t;
typedef int sigjmp_buf;
typedef int stack_t;
typedef int siginfo_t;
typedef int z_stream;


typedef int int8_t;
typedef int uint8_t;
typedef int int16_t;
typedef int uint16_t;
typedef int int32_t;
typedef int uint32_t;
typedef int int64_t;
typedef int uint64_t;


typedef int int_least8_t;
typedef int uint_least8_t;
typedef int int_least16_t;
typedef int uint_least16_t;
typedef int int_least32_t;
typedef int uint_least32_t;
typedef int int_least64_t;
typedef int uint_least64_t;


typedef int int_fast8_t;
typedef int uint_fast8_t;
typedef int int_fast16_t;
typedef int uint_fast16_t;
typedef int int_fast32_t;
typedef int uint_fast32_t;
typedef int int_fast64_t;
typedef int uint_fast64_t;


typedef int intptr_t;
typedef int uintptr_t;


typedef int intmax_t;
typedef int uintmax_t;


typedef _Bool bool;

typedef int va_list;


typedef void* MirEGLNativeWindowType;
typedef void* MirEGLNativeDisplayType;
typedef struct MirConnection MirConnection;
typedef struct MirSurface MirSurface;
typedef struct MirSurfaceSpec MirSurfaceSpec;
typedef struct MirScreencast MirScreencast;
typedef struct MirPromptSession MirPromptSession;
typedef struct MirBufferStream MirBufferStream;
typedef struct MirPersistentId MirPersistentId;
typedef struct MirBlob MirBlob;
typedef struct MirDisplayConfig MirDisplayConfig;


typedef struct xcb_connection_t xcb_connection_t;
typedef uint32_t xcb_window_t;
typedef uint32_t xcb_visualid_t;
# 3 "D:\\Machine Learning\\SyntaxErrorRecoveryFramework\\pycparser\\utils\\fake_libc_include/stdio.h" 2
# 2 "test_files\\main.c" 2

int main()
{
    printf("a test\n");
    return 0;
}
"""
        self.assertEqual(
            util.preprocess(os.path.join('test_files', 'main.c'), ),
            expected_result, "preprocess failed")