def get(self): start_date = self.request.get('start_date').split("-") end_date = self.request.get('end_date').split("-") start_date.reverse() end_date.reverse() start = dt.date(*(map(int,start_date))) end = dt.date(*(map(int,end_date))) graph = facebook.GraphAPI(self.current_user['access_token']) extended_at = graph.extend_access_token(FACEBOOK_APP_ID, FACEBOOK_APP_SECRET) # utils.generateBdayTasks(start, end, {'at' : extended_at}) # Graph API doesnt allow posting to friends wall utils.generateTasks(start, end, {'at' : extended_at}) self.response.out.write("Done!")
def train(self, sentences, alpha=0.001, min_alpha=0.001, batches=1000, workers=4): print('Start training...') self.alpha = alpha self.min_alpha = min_alpha count = 0 # barrier is used to sync parent and all workers barrier = utils.getBarrier(workers + 1) lock1 = Lock() lock2 = Lock() queue = Queue(workers) # delta_c_raw contains context weights for each position, they are shared, so each child process can # add their delta on them. delta_c is a numpy wrapper which makes the parent process handle it easily delta_c_raw = [ utils.getSharedArray('f', self.dim * self.dim) for i in range(self.context) ] delta_c = [ utils.toNumpyArray(delta_c_raw[i], np.float32, (self.dim, self.dim)) for i in range(self.context) ] delta_r_raw = utils.getSharedArray('f', len(self.vocab) * self.dim) delta_r = utils.toNumpyArray(delta_r_raw, np.float32, (len(self.vocab), self.dim)) ''' vocab: dictionary containing each word and its index, it's copied from the parent process self_wordEm, self_contextW, self_biases, self_delta_c, self_delta_r point to data which is shared among parent and child processes ''' def worker(model, self_delta_c, self_delta_r, barrier, lock1, lock2, queue): self_delta_r = utils.toNumpyArray(self_delta_r, np.float32, (len(model.vocab), model.dim)) self_delta_c = [ utils.toNumpyArray(self_delta_c[i], np.float32, (model.dim, model.dim)) for i in range(model.context) ] # delta_c and delta_r are local to a child process, deltas will be stored in them. # after finishing its task, a child process will add them to their counterparts in # the parent process via self_delta_r and self_delta_c delta_c = [ np.zeros((model.dim, model.dim), np.float32) for i in range(model.context) ] delta_r = np.zeros((len(model.vocab), model.dim), np.float32) # the index of a rare word RARE = model.vocab['<>'] # work_d and work_v are reused in train_sentence_fast work_d = np.empty(model.dim, np.float32) work_v = np.empty(len(model.vocab), np.float32) while True: task = queue.get() if task is None: break for sentence in task: # null padding has a special index of -1 indices = map( lambda w: -1 if w == '<_>' else model.vocab.get(w, RARE), sentence) indices = np.asarray(indices, np.int32) train_sentence(model, indices, delta_c, delta_r, work_d, work_v) lock1.acquire() for i in range(model.context): self_delta_c[i] += delta_c[i] lock1.release() lock2.acquire() self_delta_r += delta_r lock2.release() barrier.sync() for i in range(model.context): delta_c[i].fill(0) delta_r.fill(0) args = (self, delta_c_raw, delta_r_raw, barrier, lock1, lock2, queue) pool = [Process(target=worker, args=args) for i in range(workers)] for p in pool: p.daemon = True p.start() distributor = utils.generateTasks(iter(sentences), self.l_pad, self.r_pad, workers, batches) start = time.time() for tasks in distributor: for i in range(workers): queue.put(tasks[i], block=False) count += batches alpha = self.min_alpha + (self.alpha - self.min_alpha) * ( 1 - 1.0 * count / self.total) barrier.sync() # this point, all child processes have finished their task and parent can update safely for i in range(self.context): self.contextW[i] -= (delta_c[i] + 1e-5 * self.contextW[i]) * alpha self.wordEm -= (delta_r + 1e-4 * self.wordEm) * alpha for i in range(self.context): delta_c[i].fill(0) delta_r.fill(0) elapsed = time.time() - start print('visited {0} words, with {1:.2f} Ws/s, alpha: {2}.'.format( count, count / elapsed, alpha)) # notify processes to exit for i in range(workers): queue.put(None) for p in pool: p.join() print('Training is finished!')
def train(self, sentences, alpha = 0.001, min_alpha = 0.001, batches = 1000, workers = 4): print('Start training...') self.alpha = alpha self.min_alpha = min_alpha count = 0 # barrier is used to sync parent and all workers barrier = utils.getBarrier(workers + 1) lock1 = Lock() lock2 = Lock() queue = Queue(workers) # delta_c_raw contains context weights for each position, they are shared, so each child process can # add their delta on them. delta_c is a numpy wrapper which makes the parent process handle it easily delta_c_raw = [utils.getSharedArray('f', self.dim * self.dim) for i in range(self.context) ] delta_c = [utils.toNumpyArray(delta_c_raw[i], np.float32, (self.dim, self.dim) ) for i in range(self.context) ] delta_r_raw = utils.getSharedArray('f', len(self.vocab) * self.dim) delta_r = utils.toNumpyArray(delta_r_raw, np.float32, (len(self.vocab), self.dim) ) ''' vocab: dictionary containing each word and its index, it's copied from the parent process self_wordEm, self_contextW, self_biases, self_delta_c, self_delta_r point to data which is shared among parent and child processes ''' def worker(model, self_delta_c, self_delta_r, barrier, lock1, lock2, queue): self_delta_r = utils.toNumpyArray(self_delta_r, np.float32, (len(model.vocab), model.dim) ) self_delta_c = [utils.toNumpyArray(self_delta_c[i], np.float32, (model.dim, model.dim) ) for i in range(model.context) ] # delta_c and delta_r are local to a child process, deltas will be stored in them. # after finishing its task, a child process will add them to their counterparts in # the parent process via self_delta_r and self_delta_c delta_c = [np.zeros((model.dim, model.dim), np.float32) for i in range(model.context) ] delta_r = np.zeros((len(model.vocab), model.dim), np.float32) # the index of a rare word RARE = model.vocab['<>'] # work_d and work_v are reused in train_sentence_fast work_d = np.empty(model.dim, np.float32) work_v = np.empty(len(model.vocab), np.float32) while True: task = queue.get() if task is None: break for sentence in task: # null padding has a special index of -1 indices = map(lambda w: -1 if w == '<_>' else model.vocab.get(w, RARE), sentence) indices = np.asarray(indices, np.int32) train_sentence(model, indices, delta_c, delta_r, work_d, work_v) lock1.acquire() for i in range(model.context): self_delta_c[i] += delta_c[i] lock1.release() lock2.acquire() self_delta_r += delta_r lock2.release() barrier.sync() for i in range(model.context): delta_c[i].fill(0) delta_r.fill(0) args = (self, delta_c_raw, delta_r_raw, barrier, lock1, lock2, queue) pool = [Process(target = worker, args = args) for i in range(workers) ] for p in pool: p.daemon = True p.start() distributor = utils.generateTasks(iter(sentences), self.l_pad, self.r_pad, workers, batches) start = time.time() for tasks in distributor: for i in range(workers): queue.put(tasks[i], block = False) count += batches alpha = self.min_alpha + (self.alpha - self.min_alpha) * (1 - 1.0 * count / self.total) barrier.sync() # this point, all child processes have finished their task and parent can update safely for i in range(self.context): self.contextW[i] -= (delta_c[i] + 1e-5 * self.contextW[i]) * alpha self.wordEm -= (delta_r + 1e-4 * self.wordEm) * alpha for i in range(self.context): delta_c[i].fill(0) delta_r.fill(0) elapsed = time.time() - start print('visited {0} words, with {1:.2f} Ws/s, alpha: {2}.'.format(count, count / elapsed, alpha) ) # notify processes to exit for i in range(workers): queue.put(None) for p in pool: p.join() print('Training is finished!')