def __init__(self, path, normalize=True): self.wi, self.iw = load_vocabulary(path + '.words.vocab') self.ci, self.ic = load_vocabulary(path + '.contexts.vocab') self.m = load_matrix(path) self.m.data = np.log(self.m.data) self.normal = normalize if normalize: self.normalize()
def load(cls, path, normalize=True, restricted_context=None, **kwargs): mat = load_matrix(path) word_vocab, context_vocab = load_vocabulary(mat, path) return cls(mat, word_vocab, context_vocab, normalize=normalize, restricted_context=restricted_context)
def __init__(self, path, normalize=True, k=1): Explicit.__init__(self, path, False) self.wi, self.iw = load_vocabulary(path + '.words.vocab') self.ci, self.ic = load_vocabulary(path + '.contexts.vocab') self.m = load_matrix(path) self.m.data = self.m.data - np.log(k) # self.normal = normalize if normalize: self.normalize()
def __init__(self, path, normalize=True, glen=5): self.wi, self.iw = load_vocabulary(path + '.words.vocab') self.ci, self.ic = load_vocabulary(path + '.contexts.vocab') self.sz, self.ng_freqs = self.load_counts(path) self.m = load_matrix(path) self.m.data = np.log(self.m.data) self.normal = normalize self.glen = glen if normalize: self.normalize()
def __init__(self, path, normalize=True): Explicit.__init__(self, path, False) self.wi, self.iw = load_vocabulary(path + '.words.vocab') self.ci, self.ic = load_vocabulary(path + '.contexts.vocab') self.m = load_matrix(path) self.m.data = np.log(self.m.data) self.m.data[self.m.data <= 0] = 0 self.m.data[self.m.data > 0] = 1 # self.normal = normalize if normalize: self.normalize()
def load(cls, path, normalize=True, restricted_context=None, thresh=None, neg=1): mat = load_matrix(path, thresh) word_vocab, context_vocab = load_vocabulary(mat, path) return cls(mat, word_vocab, context_vocab, normalize, restricted_context, neg=neg)
def load(cls, path, normalize=True, restricted_context=None, thresh=None, neg=1): #This line produces an error because load_matrix takes only one argument #mat = load_matrix(path, thresh) #Changing the line: mat = load_matrix(path) word_vocab, context_vocab = load_vocabulary(mat, path) return cls(mat, word_vocab, context_vocab, normalize, restricted_context, neg=neg)
def main(proc_num, queue, out_dir, in_dir, context_size): ioutils.mkdir(out_dir) print proc_num, "Start loop" while True: # Iterates through the years try: year = queue.get(block=False) except Empty: print proc_num, "Finished" break print proc_num, "- Loading mat for year", year year_mat = load_matrix(in_dir + str(year) + ".bin") index = ioutils.load_pickle(in_dir + str(year) + "-index.pkl") print proc_num, "- Processing data for year", year counts = year_mat.sum(1) / (2 * context_size) # sums up the occurrence counts = { word: int(counts[index[word]]) for word in index if index[word] < len(counts) } ioutils.write_pickle(counts, out_dir + "/" + str(year) + "-counts.pkl") # writes it in a file