from typing import Set, Dict, List import random import numpy as np import os import pickle from qanta.util.io import safe_open from qanta.config import conf from qanta import logging log = logging.get(__name__) def create_embeddings(vocab: Set[str], expand_glove=False, mask_zero=False): """ Create embeddings :param vocab: words in the vocabulary :param expand_glove: Whether or not to expand embeddings past pre-trained ones :param mask_zero: if True, then 0 is reserved as a sequence length mask (distinct from UNK) :return: """ embeddings = [] embedding_lookup = {} with open(conf['word_embeddings']) as f: i = 0 line_number = 0 n_bad_embeddings = 0 if mask_zero: emb = np.zeros((conf['embedding_dimension'])) embeddings.append(emb)
import time import pickle import numpy as np from qanta import logging from qanta.guesser.util import gen_util from qanta.util.io import safe_open from qanta.guesser.classify.learn_classifiers import evaluate, compute_vectors from qanta.guesser.util.adagrad import Adagrad from qanta.guesser.util.functions import relu, drelu from qanta.util.constants import (DEEP_WE_TARGET, DEEP_DAN_PARAMS_TARGET, DEEP_TRAIN_TARGET, DEEP_DEV_TARGET, DEEP_DAN_TRAIN_OUTPUT, DEEP_DAN_DEV_OUTPUT) log = logging.get(__name__) def objective_and_grad(data, params, d, len_voc, word_drop=0.3, rho=1e-5): params = gen_util.unroll_params(params, d, len_voc, deep=3) (W, b, W2, b2, W3, b3, L) = params grads = gen_util.init_grads(d, len_voc, deep=3) error_sum = 0.0 for qs, ans in data: # answer vector comp = L[:, ans[0]].reshape((d, 1)) history = [] for dist in qs: sent = qs[dist]