def run(self, callable, data): # Clear Queue self.clear_tasks() time.sleep(1) # Create all distributed tasks in the queue print("Creating tasks") tasks = [callable.delay(datum) for datum in data] t = tqdm(total=len(tasks), unit="task") results = ResultSet(tasks, app=self.app) start_time = time.time() # Wait for all distributed tasks to finish last_completed = 0 while True: if time.time() - start_time > 3600: # Will happen every hour start_time = time.time() self.spawn_workers() # Restart all slaves try: if results.ready(): break completed = results.completed_count() t.update(completed - last_completed) last_completed = completed except Exception as e: time.sleep(10) pass time.sleep(1) t.update(results.completed_count() - last_completed) return self
def saveResultsAndCleanUp(self): """ Executes after the retrieval is done. """ if self.use_celery: print("Waiting for tasks to complete...") res=ResultSet(self.tasks) while not res.ready(): try: time.sleep(7) except KeyboardInterrupt: print("Cancelled waiting") break print("All tasks finished.") for writer in self.writers: self.writers[writer].saveAsJSON(os.path.join(self.exp["exp_dir"],self.writers[writer].table_name+".json"))
def get_result(request): ret = { 'status': 'error', 'result': '', 'messages': [ '', ], } if request.method == 'POST' and request.user: try: user = MDBUser.objects.get(username=request.user.username) except Exception, e: ret['messages'][0] = "<strong>FATAL</strong>(get_result.user): %s" % e else: # Note: this is NOT status of tasks, 'success' here means that # get_result() request was processed correctly ret['status'] = 'success' async_res = AsyncResult(request.POST['task_id']) if async_res.ready(): # Get all subtasks spawned by parent subtasks = None #ust_get_ids(user) # Create list of AsyncResults from list of task_ids async_results = [] for task_id in subtasks: async_results.append(AsyncResult(task_id)) # And also ResultSet for convenience async_res_set = ResultSet(async_results) ret['messages'][0] = 'parent task %s: %d of %d subtasks completed' %\ (request.POST['task_id'][:8], async_res_set.completed_count(), async_res_set.total, ) # All tasks completed ? if async_res_set.ready(): # All tasks done, forget about those task ids #ust_clear_ids(user) # Any of them failed ? if async_res_set.failed(): ret['result'] = 'FAILURE' for async_res in async_results: if async_res.state == 'FAILURE': ret['messages'].append("<strong>ERROR</strong>(get_result.FAILURE): '%s':'%s'" %\ (async_res.task_id[:8], async_res.result, )) else: ret['result'] = 'SUCCESS' else: ret['result'] = 'PENDING' else: ret['result'] = 'PENDING' ret['messages'][0] = 'parent task %s: PENDING' % \ (request.POST['task_id'], )
def saveResultsAndCleanUp(self): """ Executes after the retrieval is done. """ if self.use_celery: print("Waiting for tasks to complete...") res = ResultSet(self.tasks) while not res.ready(): try: time.sleep(7) except KeyboardInterrupt: print("Cancelled waiting") break print("All tasks finished.") for writer in self.writers: self.writers[writer].saveAsJSON( os.path.join(self.exp["exp_dir"], self.writers[writer].table_name + ".json"))
def saveResultsAndCleanUp(self): """ Executes after the retrieval is done. Should the results be saved? """ # super().saveResultsAndCleanUp() if self.use_celery: print("Waiting for tasks to complete...") res = ResultSet(self.tasks) while not res.ready(): try: time.sleep(7) except KeyboardInterrupt: print("Cancelled waiting") break print("All tasks finished.") if self.options.get("list_missing_files", False): self.saveMissingFiles()
elif filename != "": result.add(processFile.delay(filename, 2)) #sponsored = train.loc[train['file'] == openfile] #if not sponsored.empty: #result.add(processFile.delay(openfile, data, int(sponsored['sponsored']))) #testing = sample.loc[sample['file'] == openfile] #if not testing.empty: #result.add(processFile.delay(openfile, data, int(sponsored['sponsored']))) bar.numerator = k print("Sending out processes ", bar, end='\r') sys.stdout.flush() bar = ProgressBar(len(train) + len(test_files), max_width=40) while not result.ready(): time.sleep(5) bar.numerator = result.completed_count() print("Waiting for return results ", bar, end='\r') sys.stdout.flush() results = result.join() #wait for jobs to finish df_full = pd.DataFrame(list(results)) print('--- Training random forest') clf = RandomForestClassifier(n_estimators=150, n_jobs=-1, random_state=0) train_data = df_full[df_full.sponsored.notnull()].fillna(0) test = df_full[df_full.sponsored.isnull() & df_full.file.isin(test_files)].fillna(0) clf.fit(train_data.drop(['file', 'sponsored'], 1), train_data.sponsored)
result.add(processFile.delay(filename, 2)) #sponsored = train.loc[train['file'] == openfile] #if not sponsored.empty: #result.add(processFile.delay(openfile, data, int(sponsored['sponsored']))) #testing = sample.loc[sample['file'] == openfile] #if not testing.empty: #result.add(processFile.delay(openfile, data, int(sponsored['sponsored']))) bar.numerator = k print("Sending out processes ", bar, end='\r') sys.stdout.flush() bar = ProgressBar(len(train)+len(test_files), max_width=40) while not result.ready(): time.sleep(5) bar.numerator = result.completed_count() print("Waiting for return results ", bar, end='\r') sys.stdout.flush() results = result.join() #wait for jobs to finish df_full = pd.DataFrame(list(results)) print('--- Training random forest') clf = RandomForestClassifier(n_estimators=150, n_jobs=-1, random_state=0) train_data = df_full[df_full.sponsored.notnull()].fillna(0) test = df_full[df_full.sponsored.isnull() & df_full.file.isin(test_files)].fillna(0) clf.fit(train_data.drop(['file', 'sponsored'], 1), train_data.sponsored)