def initialise_one(name, verbose=True): """ Initialise data structures with no extra variables """ initialise(dims=1, param_name=name, sparse=False) data, rel, known, A, B, T, embed = load(name) W = (A,B,T) options = [-1, -0.5, -0.2, -0.1, 0, 0.1, 0.2, 0.5, 1] for _ in range(5): for item, points in rel.items(): if not item in known: old = embed[item][0] minscore = float('inf') best = None for sent in options: embed[item][0] = sent new = objective(points, embed, W, reg_emb=0) + abs(sent) if new < minscore: minscore = new best = sent embed[item][0] = best if verbose and best != old: print(item, best) print('\n...\n') save(A,B,T,embed,name)
def initialise(dims=3, lex_file = '../../data/sentimerge_nospin.pk', morph_file = '../../data/morph/preproc_nopos.pk', data_file = '../../data/morph/data.pk', param_name = 'out', sparse = True): """ Initialise data structures for learning """ with open(lex_file, 'rb') as f: full_lex, full_lex_weight = pickle.load(f) lex, weight = remove_pos(full_lex, full_lex_weight) with open(morph_file, 'rb') as f: full_morph = pickle.load(f) morph = {x:y for x,y in full_morph.items() if x in lex and len(y)>1} relevant = {} for whole, parts in morph.items(): for atom in parts: relevant.setdefault(atom, []).append((lex[whole], parts)) data = [(lex[whole], parts) for whole, parts in morph.items()] items = sorted(relevant.keys()) #{z for y in morph.values() for z in y}) embed, known = featurise(items, lex, dims-1) A = array([[random() for _ in range(dims)] for _ in range(dims)]) B = array([[random() for _ in range(dims)] for _ in range(dims)]) T = array([[[random() for _ in range(dims)] for _ in range(dims)] for _ in range(dims)]) A[0,0] = 0.4 B[0,0] = 0.7 T[0,0,0] = 0.4 # Make the initialisation sparse: if sparse: for arr in (A,B,T): it = nditer(arr, flags=['multi_index'], op_flags=['readwrite']) for value in it: old_val = value.copy() old_obj = objective(data, embed, (A,B,T)) value[...] = 0 new_obj = objective(data, embed, (A,B,T)) if old_obj < new_obj: value[...] = old_val print(it.multi_index) for atom in items: if atom in known: r = range(1,dims) else: r = range(dims) vec = embed[atom] for i in r: old_val = vec[i].copy() old_obj = objective(relevant[atom], embed, (A,B,T), reg_emb=0) old_obj += abs(old_val) vec[i] = 0 new_obj = objective(relevant[atom], embed, (A,B,T), reg_emb=0) if old_obj < new_obj: vec[i] = old_val print(atom, i) print(A, end='\n\n\n') print(B, end='\n\n\n') print(T) with open(data_file, 'wb') as f: pickle.dump((data, relevant, known), f) save(A, B, T, embed, param_name)