texts, target = text_helpers.load_movie_data(data_folder_name) # Normalize text print('Normalizing Text Data') texts = text_helpers.normalize_text(texts, stops) # Texts must contain at least 3 words target = [target[ix] for ix, x in enumerate(texts) if len(x.split()) > window_size] texts = [x for x in texts if len(x.split()) > window_size] assert(len(target)==len(texts)) # Build our data set and dictionaries print('Creating Dictionary') word_dictionary = text_helpers.build_dictionary(texts, vocabulary_size) word_dictionary_rev = dict(zip(word_dictionary.values(), word_dictionary.keys())) text_data = text_helpers.text_to_numbers(texts, word_dictionary) # Get validation word keys valid_examples = [word_dictionary[x] for x in valid_words] print('Creating Model') # Define Embeddings: embeddings = tf.Variable(tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0)) doc_embeddings = tf.Variable(tf.random_uniform([len(texts), doc_embedding_size], -1.0, 1.0)) # NCE loss parameters nce_weights = tf.Variable(tf.truncated_normal([vocabulary_size, concatenated_size], stddev=1.0 / np.sqrt(concatenated_size))) nce_biases = tf.Variable(tf.zeros([vocabulary_size])) # Create data/target placeholders
test_indices = np.array(list(set(range(len(target))) - set(train_indices))) texts_train = [x for ix, x in enumerate(texts) if ix in train_indices] texts_test = [x for ix, x in enumerate(texts) if ix in test_indices] target_train = np.array( [x for ix, x in enumerate(target) if ix in train_indices]) target_test = np.array( [x for ix, x in enumerate(target) if ix in test_indices]) # Load dictionary and embedding matrix dict_file = os.path.join('..', '05_Working_With_CBOW_Embeddings', 'temp', 'movie_vocab.pkl') word_dictionary = pickle.load(open(dict_file, 'rb')) # Convert texts to lists of indices text_data_train = np.array( text_helpers.text_to_numbers(texts_train, word_dictionary)) text_data_test = np.array( text_helpers.text_to_numbers(texts_test, word_dictionary)) # Pad/crop movie reviews to specific length text_data_train = np.array( [x[0:max_words] for x in [y + [0] * max_words for y in text_data_train]]) text_data_test = np.array( [x[0:max_words] for x in [y + [0] * max_words for y in text_data_test]]) ######################################################################################################################## # Model ######################################################################################################################## x_data = tf.placeholder(shape=[None, max_words], dtype=tf.int32) y_target = tf.placeholder(shape=[None, 1], dtype=tf.float32)
print('Loading Data') texts, target = text_helpers.load_movie_data() # Normalize text print('Normalizing Text Data') texts = text_helpers.normalize_text(texts, stops) # Texts must contain at least 3 words target = [target[ix] for ix, x in enumerate(texts) if len(x.split()) > 2] texts = [x for x in texts if len(x.split()) > 2] # Build our data set and dictionaries print('Creating Dictionary') word_dictionary = text_helpers.build_dictionary(texts, vocabulary_size) word_dictionary_rev = dict(zip(word_dictionary.values(), word_dictionary.keys())) text_data = text_helpers.text_to_numbers(texts, word_dictionary) # Get validation word keys valid_examples = [word_dictionary[x] for x in valid_words] print('Creating Model') # Define Embeddings: embeddings = tf.Variable(tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0)) # NCE loss parameters nce_weights = tf.Variable(tf.truncated_normal([vocabulary_size, embedding_size], stddev=1.0 / np.sqrt(embedding_size))) nce_biases = tf.Variable(tf.zeros([vocabulary_size])) # Create data/target placeholders x_inputs = tf.placeholder(tf.int32, shape=[batch_size, 2*window_size])
stops = stopwords.words('english') # We pick some test words. We are expecting synonyms to appear valid_words = ['love', 'hate', 'happy', 'sad', 'man', 'woman'] # 4. 调用第一步中封装的辅助函数、数据加载函数和文本归一化函数。在本例中,设置电影影评大于三个单词 texts, target = text_helpers.load_movie_data(data_folder_name) texts = text_helpers.normalize_text(texts, stops) # Texts must contain at least 3 words target = [target[ix] for ix, x in enumerate(texts) if len(x.split()) > 2] texts = [x for x in texts if len(x.split()) > 2] # 5. 创建单词字典,以便查找单词。同时,我们也需要一个逆序单词字典,可以通过索引查找单词。当我们想打印出验证单词集中每个单词最近的单词时,可使用逆序单词字典 word_dictionary = text_helpers.build_dictionary(texts, vocabulary_size) word_dictionary_rev = dict( zip(word_dictionary.values(), word_dictionary.keys())) text_data = text_helpers.text_to_numbers(texts, word_dictionary) # Get validation word keys valid_examples = [word_dictionary[x] for x in valid_words] # 6. 初始化待拟合的单词嵌套并声明算法模型的数据占位符 embeddings = tf.Variable( tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0)) # Create data/target placeholders x_inputs = tf.placeholder(tf.int32, shape=[batch_size, 2 * window_size]) y_target = tf.placeholder(tf.int32, shape=[batch_size, 1]) valid_dataset = tf.constant(valid_examples, dtype=tf.int32) # 7. 处理单词嵌套。因为CBOW模型将上下文窗口内的单词嵌套叠加在一起,所以创建一个循环将窗口内的所有单词嵌套加在一起 # Lookup the word embeddings and # Add together window embeddings: embed = tf.zeros([batch_size, embedding_size])
# answer_texts = [answer for idx, (question, answer) in enumerate(zip(question_texts, answer_texts)) # if len(question.split()) > window_size and len(answer.split()) > window_size] # question_texts = [x for x in question_texts if len(x.split()) > window_size] # answer_texts = [x for x in answer_texts if len(x.split()) > window_size] print(len(target) ,len(question_texts), len(answer_texts)) assert(len(target) == len(question_texts) == len(answer_texts)) # Build our data set and dictionaries print('Creating Dictionary') word_dictionary = text_helpers.build_dictionary(question_texts + answer_texts, vocab_size) word_dictionary_rev = dict(zip(word_dictionary.values(), word_dictionary.keys())) # text_data = text_helpers.text_to_numbers(texts, word_dictionary) question_data = text_helpers.text_to_numbers(question_texts, word_dictionary) answer_data = text_helpers.text_to_numbers(answer_texts, word_dictionary) # Get validation word keys valid_examples = [word_dictionary[x] for x in valid_words] print('Creating Model') # Define Embeddings: # embeddings = tf.Variable(tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0)) # doc_embeddings = tf.Variable(tf.random_uniform([len(texts), doc_embedding_size], -1.0, 1.0)) word_embeddings = tf.Variable(tf.random_uniform([vocab_size, word_emb_size], -1.0, 1.0)) question_embeddings = tf.Variable(tf.random_uniform([len(question_texts), doc_emb_size], -1.0, 1.0)) answer_embeddings = tf.Variable(tf.random_uniform([len(answer_texts), doc_emb_size], -1.0, 1.0))
def doc2vecRun(window_size=3, embedding_size=64, dataName='slantour_data.txt'): import tensorflow as tf import numpy as np import random import os import pickle import text_helpers from tensorflow.python.framework import ops ops.reset_default_graph() os.chdir(os.path.dirname(os.path.realpath(__file__))) # Make a saving directory if it doesn't exist data_folder_name = 'data' if not os.path.exists(data_folder_name): os.makedirs(data_folder_name) # Start a graph session sess = tf.Session() # Declare model parameters batch_size = 32 vocabulary_size = 7500 generations = 500000 model_learning_rate = 0.1 #embedding_size = 64 # Word embedding size doc_embedding_size = embedding_size # Document embedding size concatenated_size = embedding_size + doc_embedding_size num_sampled = int(batch_size / 2) # Number of negative examples to sample. #window_size = 3 # How many words to consider to the left. # Add checkpoints to training save_embeddings_every = 50000 print_valid_every = 50000 print_loss_every = 1000 # Declare stop words #stops = stopwords.words('english') stops = [ "a", "aby", "ahoj", "aj", "ale", "anebo", "ani", "aniž", "ano", "asi", "aspoåˆ", "aspoň", "atd", "atp", "az", "aäkoli", "ačkoli", "až", "bez", "beze", "blãzko", "blízko", "bohuå¾el", "bohužel", "brzo", "bude", "budem", "budeme", "budes", "budete", "budeå¡", "budeš", "budou", "budu", "by", "byl", "byla", "byli", "bylo", "byly", "bys", "byt", "bä›hem", "být", "během", "chce", "chceme", "chcete", "chceå¡", "chceš", "chci", "chtãt", "chtä›jã", "chtít", "chtějí", "chut'", "chuti", "ci", "clanek", "clanku", "clanky", "co", "coz", "což", "cz", "daleko", "dalsi", "další", "den", "deset", "design", "devatenáct", "devatenã¡ct", "devä›t", "devět", "dnes", "do", "dobrã½", "dobrý", "docela", "dva", "dvacet", "dvanáct", "dvanã¡ct", "dvä›", "dvě", "dál", "dále", "dã¡l", "dã¡le", "dä›kovat", "dä›kujeme", "dä›kuji", "děkovat", "děkujeme", "děkuji", "email", "ho", "hodnä›", "hodně", "i", "jak", "jakmile", "jako", "jakož", "jde", "je", "jeden", "jedenáct", "jedenã¡ct", "jedna", "jedno", "jednou", "jedou", "jeho", "jehož", "jej", "jeji", "jejich", "jejã", "její", "jelikož", "jemu", "jen", "jenom", "jenž", "jeste", "jestli", "jestliå¾e", "jestliže", "jeå¡tä›", "ještě", "jež", "ji", "jich", "jimi", "jinak", "jine", "jiné", "j*z", "již", "jsem", "jses", "jseš", "jsi", "jsme", "jsou", "jste", "já", "jã¡", "jã", "jãm", "jí", "jím", "jíž", "jšte", "k", "kam", "každý", "kde", "kdo", "kdy", "kdyz", "kdyå¾", "když", "ke", "kolik", "kromä›", "kromě", "ktera", "ktere", "kteri", "kterou", "ktery", "která", "kterã¡", "kterã©", "kterã½", "které", "který", "kteå™ã", "kteři", "kteří", "ku", "kvå¯li", "kvůli", "ma", "majã", "mají", "mate", "me", "mezi", "mi", "mit", "mne", "mnou", "mnä›", "mně", "moc", "mohl", "mohou", "moje", "moji", "moå¾nã¡", "možná", "muj", "musã", "musí", "muze", "my", "má", "málo", "mám", "máme", "máte", "máš", "mã¡", "mã¡lo", "mã¡m", "mã¡me", "mã¡te", "mã¡å¡", "mã©", "mã", "mãt", "mä›", "må¯j", "må¯å¾e", "mé", "mí", "mít", "mě", "můj", "může", "na", "nad", "nade", "nam", "napiste", "napište", "naproti", "nas", "nasi", "naå¡e", "naå¡i", "načež", "naše", "naši", "ne", "nebo", "nebyl", "nebyla", "nebyli", "nebyly", "nechť", "nedä›lajã", "nedä›lã¡", "nedä›lã¡m", "nedä›lã¡me", "nedä›lã¡te", "nedä›lã¡å¡", "nedělají", "nedělá", "nedělám", "neděláme", "neděláte", "neděláš", "neg", "nejsi", "nejsou", "nemajã", "nemají", "nemáme", "nemáte", "nemã¡me", "nemã¡te", "nemä›l", "neměl", "neni", "nenã", "není", "nestaäã", "nestačí", "nevadã", "nevadí", "nez", "neå¾", "než", "nic", "nich", "nimi", "nove", "novy", "nové", "nový", "nula", "ná", "nám", "námi", "nás", "náš", "nã¡m", "nã¡mi", "nã¡s", "nã¡å¡", "nãm", "nä›", "nä›co", "nä›jak", "nä›kde", "nä›kdo", "nä›mu", "ní", "ním", "ně", "něco", "nějak", "někde", "někdo", "němu", "němuž", "o", "od", "ode", "on", "ona", "oni", "ono", "ony", "osm", "osmnáct", "osmnã¡ct", "pak", "patnáct", "patnã¡ct", "po", "pod", "podle", "pokud", "potom", "pouze", "pozdä›", "pozdě", "poå™ã¡d", "pořád", "prave", "pravé", "pred", "pres", "pri", "pro", "proc", "prostä›", "prostě", "prosãm", "prosím", "proti", "proto", "protoze", "protoå¾e", "protože", "proä", "proč", "prvni", "první", "práve", "pta", "pä›t", "på™ed", "på™es", "på™ese", "pět", "před", "přede", "přes", "přese", "při", "přičemž", "re", "rovnä›", "rovně", "s", "se", "sedm", "sedmnáct", "sedmnã¡ct", "si", "sice", "skoro", "smã", "smä›jã", "smí", "smějí", "snad", "spolu", "sta", "sto", "strana", "stã©", "sté", "sve", "svych", "svym", "svymi", "své", "svých", "svým", "svými", "svůj", "ta", "tady", "tak", "take", "takhle", "taky", "takze", "také", "takže", "tam", "tamhle", "tamhleto", "tamto", "tato", "te", "tebe", "tebou", "ted'", "tedy", "tema", "ten", "tento", "teto", "ti", "tim", "timto", "tipy", "tisãc", "tisãce", "tisíc", "tisíce", "to", "tobä›", "tobě", "tohle", "toho", "tohoto", "tom", "tomto", "tomu", "tomuto", "toto", "troå¡ku", "trošku", "tu", "tuto", "tvoje", "tvá", "tvã¡", "tvã©", "två¯j", "tvé", "tvůj", "ty", "tyto", "tä›", "tå™eba", "tå™i", "tå™inã¡ct", "téma", "této", "tím", "tímto", "tě", "těm", "těma", "těmu", "třeba", "tři", "třináct", "u", "uräitä›", "určitě", "uz", "uå¾", "už", "v", "vam", "vas", "vase", "vaå¡e", "vaå¡i", "vaše", "vaši", "ve", "vedle", "veäer", "večer", "vice", "vlastnä›", "vlastně", "vsak", "vy", "vám", "vámi", "vás", "váš", "vã¡m", "vã¡mi", "vã¡s", "vã¡å¡", "vå¡echno", "vå¡ichni", "vå¯bec", "vå¾dy", "více", "však", "všechen", "všechno", "všichni", "vůbec", "vždy", "z", "za", "zatãmco", "zatímco", "zaä", "zač", "zda", "zde", "ze", "zpet", "zpravy", "zprávy", "zpět", "äau", "ätrnã¡ct", "ätyå™i", "å¡est", "å¡estnã¡ct", "å¾e", "čau", "či", "článek", "článku", "články", "čtrnáct", "čtyři", "šest", "šestnáct", "že" ] # Load the movie review data print('Loading Data') texts = text_helpers.load_slantour_data(data_folder_name, dataName) # Normalize text print('Normalizing Text Data') texts = text_helpers.normalize_text(texts, stops) print(len(texts)) # Texts must contain at least 3 words #target = [target[ix] for ix, x in enumerate(texts) if len(x.split()) > window_size] #texts = [x for x in texts if len(x.split()) > window_size] #assert(len(target)==len(texts)) # Build our data set and dictionaries print('Creating Dictionary') word_dictionary = text_helpers.build_dictionary(texts, vocabulary_size) word_dictionary_rev = dict( zip(word_dictionary.values(), word_dictionary.keys())) text_data = text_helpers.text_to_numbers(texts, word_dictionary) # Get validation word keys valid_words = [ word_dictionary_rev[1], word_dictionary_rev[10], word_dictionary_rev[100], word_dictionary_rev[1000] ] valid_examples = [word_dictionary[x] for x in valid_words] print('Creating Model') # Define Embeddings: embeddings = tf.Variable( tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0)) doc_embeddings = tf.Variable( tf.random_uniform([len(texts), doc_embedding_size], -1.0, 1.0)) # NCE loss parameters nce_weights = tf.Variable( tf.truncated_normal([vocabulary_size, concatenated_size], stddev=1.0 / np.sqrt(concatenated_size))) nce_biases = tf.Variable(tf.zeros([vocabulary_size])) # Create data/target placeholders x_inputs = tf.placeholder(tf.int32, shape=[None, window_size + 1]) # plus 1 for doc index y_target = tf.placeholder(tf.int32, shape=[None, 1]) valid_dataset = tf.constant(valid_examples, dtype=tf.int32) # Lookup the word embedding # Add together element embeddings in window: embed = tf.zeros([batch_size, embedding_size]) for element in range(window_size): embed += tf.nn.embedding_lookup(embeddings, x_inputs[:, element]) doc_indices = tf.slice(x_inputs, [0, window_size], [batch_size, 1]) doc_embed = tf.nn.embedding_lookup(doc_embeddings, doc_indices) # concatenate embeddings final_embed = tf.concat([embed, tf.squeeze(doc_embed, [1])], 1) # Get loss from prediction #loss = tf.reduce_mean(tf.nn.nce_loss(nce_weights, nce_biases, final_embed, y_target, num_sampled, vocabulary_size)) loss = tf.reduce_mean( tf.nn.nce_loss(weights=nce_weights, biases=nce_biases, inputs=final_embed, labels=y_target, num_sampled=num_sampled, num_classes=vocabulary_size)) # Create optimizer optimizer = tf.train.GradientDescentOptimizer( learning_rate=model_learning_rate) train_step = optimizer.minimize(loss) # Cosine similarity between words norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keep_dims=True)) normalized_embeddings = embeddings / norm valid_embeddings = tf.nn.embedding_lookup(normalized_embeddings, valid_dataset) similarity = tf.matmul(valid_embeddings, normalized_embeddings, transpose_b=True) # Create model saving operation saver = tf.train.Saver({ "embeddings": embeddings, "doc_embeddings": doc_embeddings }) #Add variable initializer. init = tf.initialize_all_variables() sess.run(init) # Run the skip gram model. print('Starting Training') loss_vec = [] loss_x_vec = [] for i in range(generations): batch_inputs, batch_labels = text_helpers.generate_batch_data( text_data, batch_size, window_size, method='doc2vec') feed_dict = {x_inputs: batch_inputs, y_target: batch_labels} # Run the train step sess.run(train_step, feed_dict=feed_dict) # Return the loss if (i + 1) % print_loss_every == 0: loss_val = sess.run(loss, feed_dict=feed_dict) loss_vec.append(loss_val) loss_x_vec.append(i + 1) print('Loss at step {} : {}'.format(i + 1, loss_val)) # Validation: Print some random words and top 5 related words if (i + 1) % print_valid_every == 0: sim = sess.run(similarity, feed_dict=feed_dict) for j in range(len(valid_words)): valid_word = word_dictionary_rev[valid_examples[j]] top_k = 5 # number of nearest neighbors nearest = (-sim[j, :]).argsort()[1:top_k + 1] log_str = "Nearest to {}:".format(valid_word) for k in range(top_k): close_word = word_dictionary_rev[nearest[k]] log_str = '{} {},'.format(log_str, close_word) print(log_str) # Save dictionary + embeddings if (i + 1) % save_embeddings_every == 0: # Save vocabulary dictionary with open(os.path.join(data_folder_name, 'movie_vocab.pkl'), 'wb') as f: pickle.dump(word_dictionary, f) # Save embeddings model_checkpoint_path = os.path.join( os.getcwd(), data_folder_name, 'doc2vec_movie_embeddings.ckpt') save_path = saver.save(sess, model_checkpoint_path) print('Model saved in file: {}'.format(save_path)) final_embeddings = sess.run(doc_embeddings) embeddingsFname = "embeds/embed_doc2vec_" + str(window_size) + "_" + str( embedding_size) + ".csv" np.savetxt(embeddingsFname, final_embeddings, fmt="%.6e") return final_embeddings
def word2vecRun(window_size=3, embedding_size=64, dataName='user_data_woIndex.txt'): import tensorflow as tf import numpy as np import random import os import text_helpers from tensorflow.python.framework import ops ops.reset_default_graph() os.chdir(os.path.dirname(os.path.realpath(__file__))) # Make a saving directory if it doesn't exist data_folder_name = 'data' if not os.path.exists(data_folder_name): os.makedirs(data_folder_name) # Start a graph session sess = tf.Session() # Declare model parameters batch_size = 32 vocabulary_size = 10000 generations = 500000 model_learning_rate = 0.01 #embedding_size = 64 # Word embedding size #doc_embedding_size = 64 # Document embedding size #concatenated_size = embedding_size + doc_embedding_size num_sampled = int(batch_size / 2) # Number of negative examples to sample. #window_size = 3 # How many words to consider to the left. # Add checkpoints to training save_embeddings_every = 50000 print_valid_every = 50000 print_loss_every = 1000 # Declare stop words #stops = stopwords.words('english') stops = [] # Load the movie review data print('Loading Data') texts = text_helpers.load_slantour_data(data_folder_name, dataName) # Texts must contain at least 3 words #target = [target[ix] for ix, x in enumerate(texts) if len(x.split()) > window_size] #texts = [x for x in texts if len(x.split()) > window_size] #assert(len(target)==len(texts)) # Build our data set and dictionaries print('Creating Dictionary') word_dictionary = text_helpers.build_dictionary(texts, vocabulary_size) word_dictionary_rev = dict( zip(word_dictionary.values(), word_dictionary.keys())) text_data = text_helpers.text_to_numbers(texts, word_dictionary) vocabulary_size = len(word_dictionary) print("Actual vocabulary size:" + str(vocabulary_size)) # Get validation word keys valid_words = [ word_dictionary_rev[1], word_dictionary_rev[10], word_dictionary_rev[100], word_dictionary_rev[1000] ] valid_examples = [word_dictionary[x] for x in valid_words] # Define Embeddings: embeddings = tf.Variable( tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0)) # NCE loss parameters nce_weights = tf.Variable( tf.truncated_normal([vocabulary_size, embedding_size], stddev=1.0 / np.sqrt(embedding_size))) nce_biases = tf.Variable(tf.zeros([vocabulary_size])) # Create data/target placeholders x_inputs = tf.placeholder(tf.int32, shape=[batch_size]) y_target = tf.placeholder(tf.int32, shape=[batch_size, 1]) valid_dataset = tf.constant(valid_examples, dtype=tf.int32) # Lookup the word embedding: embed = tf.nn.embedding_lookup(embeddings, x_inputs) loss = tf.reduce_mean( tf.nn.nce_loss(weights=nce_weights, biases=nce_biases, inputs=embed, labels=y_target, num_sampled=num_sampled, num_classes=vocabulary_size)) # Create optimizer optimizer = tf.train.GradientDescentOptimizer( learning_rate=1.0).minimize(loss) # Cosine similarity between words norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keep_dims=True)) normalized_embeddings = embeddings / norm valid_embeddings = tf.nn.embedding_lookup(normalized_embeddings, valid_dataset) similarity = tf.matmul(valid_embeddings, normalized_embeddings, transpose_b=True) #Add variable initializer. init = tf.initialize_all_variables() sess.run(init) # Run the skip gram model. loss_vec = [] loss_x_vec = [] for i in range(generations): batch_inputs, batch_labels = text_helpers.generate_batch_data( text_data, batch_size, window_size) feed_dict = {x_inputs: batch_inputs, y_target: batch_labels} # Run the train step sess.run(optimizer, feed_dict=feed_dict) # Return the loss if (i + 1) % print_loss_every == 0: loss_val = sess.run(loss, feed_dict=feed_dict) loss_vec.append(loss_val) loss_x_vec.append(i + 1) print("Loss at step {} : {}".format(i + 1, loss_val)) # Validation: Print some random words and top 5 related words if (i + 1) % print_valid_every == 0: sim = sess.run(similarity, feed_dict=feed_dict) for j in range(len(valid_words)): valid_word = word_dictionary_rev[valid_examples[j]] top_k = 5 # number of nearest neighbors nearest = (-sim[j, :]).argsort()[1:top_k + 1] log_str = "Nearest to {}:".format(valid_word) for k in range(top_k): close_word = word_dictionary_rev[nearest[k]] log_str = "%s %s," % (log_str, close_word) print(log_str) final_embeddings = sess.run(embeddings) embeddingsFname = "embeds/embed_word2vec_" + str(window_size) + "_" + str( embedding_size) + ".csv" np.savetxt(embeddingsFname, final_embeddings, fmt="%.6e") return (final_embeddings, word_dictionary_rev, word_dictionary)