def sequence_ngram(n, entries, out_dim=10): """Create sequence-based n-gram""" ngram = Ngram(n) idx = np.random.randint(0, out_dim, n) while ngram.size() < entries: ngram[tuple(idx)] = np.random.random() idx = np.append(idx[1:], np.random.randint(0, out_dim))
def randomized_ngram(n, entries, out_dim=10): """Create randomized n-gram""" ngram = Ngram(n) while ngram.size() < entries: ngram[tuple(np.random.randint(0, out_dim, n))] = np.random.random() unique = set() for idx in ngram: for i in idx: unique.add(i) if len(unique) != out_dim: return randomized_ngram(n, entries, out_dim) return ngram.norm()
def randomized_ngram(n, size, out_dim=10, min_var=0): """Create randomized n-gram""" ngram = Ngram(n) while ngram.size() < size: ngram[tuple(np.random.randint(0, out_dim, n))] = np.random.random() unique = set() for idx in ngram: for i in idx: unique.add(i) if len(unique) != out_dim: return randomized_ngram(n, size, out_dim, min_var) ngram.norm() mu = sum(ngram.values()) / size var = sum([(x - mu)**2 for x in ngram.values()]) / size if var < min_var: return randomized_ngram(n, size, out_dim, min_var) return ngram
def randomized_ngram(n, entries, out_dim=10): """Create randomized n-gram""" ngram = Ngram(n) while ngram.size() < entries: ngram[tuple(np.random.randint(0, out_dim, n))] = np.random.random() return ngram.norm()