def run(self, callable, data): # Clear Queue self.clear_tasks() time.sleep(1) # Create all distributed tasks in the queue print("Creating tasks") tasks = [callable.delay(datum) for datum in data] t = tqdm(total=len(tasks), unit="task") results = ResultSet(tasks, app=self.app) start_time = time.time() # Wait for all distributed tasks to finish last_completed = 0 while True: if time.time() - start_time > 3600: # Will happen every hour start_time = time.time() self.spawn_workers() # Restart all slaves try: if results.ready(): break completed = results.completed_count() t.update(completed - last_completed) last_completed = completed except Exception as e: time.sleep(10) pass time.sleep(1) t.update(results.completed_count() - last_completed) return self
def get_result(request): ret = { 'status': 'error', 'result': '', 'messages': [ '', ], } if request.method == 'POST' and request.user: try: user = MDBUser.objects.get(username=request.user.username) except Exception, e: ret['messages'][0] = "<strong>FATAL</strong>(get_result.user): %s" % e else: # Note: this is NOT status of tasks, 'success' here means that # get_result() request was processed correctly ret['status'] = 'success' async_res = AsyncResult(request.POST['task_id']) if async_res.ready(): # Get all subtasks spawned by parent subtasks = None #ust_get_ids(user) # Create list of AsyncResults from list of task_ids async_results = [] for task_id in subtasks: async_results.append(AsyncResult(task_id)) # And also ResultSet for convenience async_res_set = ResultSet(async_results) ret['messages'][0] = 'parent task %s: %d of %d subtasks completed' %\ (request.POST['task_id'][:8], async_res_set.completed_count(), async_res_set.total, ) # All tasks completed ? if async_res_set.ready(): # All tasks done, forget about those task ids #ust_clear_ids(user) # Any of them failed ? if async_res_set.failed(): ret['result'] = 'FAILURE' for async_res in async_results: if async_res.state == 'FAILURE': ret['messages'].append("<strong>ERROR</strong>(get_result.FAILURE): '%s':'%s'" %\ (async_res.task_id[:8], async_res.result, )) else: ret['result'] = 'SUCCESS' else: ret['result'] = 'PENDING' else: ret['result'] = 'PENDING' ret['messages'][0] = 'parent task %s: PENDING' % \ (request.POST['task_id'], )
#sponsored = train.loc[train['file'] == openfile] #if not sponsored.empty: #result.add(processFile.delay(openfile, data, int(sponsored['sponsored']))) #testing = sample.loc[sample['file'] == openfile] #if not testing.empty: #result.add(processFile.delay(openfile, data, int(sponsored['sponsored']))) bar.numerator = k print("Sending out processes ", bar, end='\r') sys.stdout.flush() bar = ProgressBar(len(train) + len(test_files), max_width=40) while not result.ready(): time.sleep(5) bar.numerator = result.completed_count() print("Waiting for return results ", bar, end='\r') sys.stdout.flush() results = result.join() #wait for jobs to finish df_full = pd.DataFrame(list(results)) print('--- Training random forest') clf = RandomForestClassifier(n_estimators=150, n_jobs=-1, random_state=0) train_data = df_full[df_full.sponsored.notnull()].fillna(0) test = df_full[df_full.sponsored.isnull() & df_full.file.isin(test_files)].fillna(0) clf.fit(train_data.drop(['file', 'sponsored'], 1), train_data.sponsored) print('--- Create predictions and submission')
#sponsored = train.loc[train['file'] == openfile] #if not sponsored.empty: #result.add(processFile.delay(openfile, data, int(sponsored['sponsored']))) #testing = sample.loc[sample['file'] == openfile] #if not testing.empty: #result.add(processFile.delay(openfile, data, int(sponsored['sponsored']))) bar.numerator = k print("Sending out processes ", bar, end='\r') sys.stdout.flush() bar = ProgressBar(len(train)+len(test_files), max_width=40) while not result.ready(): time.sleep(5) bar.numerator = result.completed_count() print("Waiting for return results ", bar, end='\r') sys.stdout.flush() results = result.join() #wait for jobs to finish df_full = pd.DataFrame(list(results)) print('--- Training random forest') clf = RandomForestClassifier(n_estimators=150, n_jobs=-1, random_state=0) train_data = df_full[df_full.sponsored.notnull()].fillna(0) test = df_full[df_full.sponsored.isnull() & df_full.file.isin(test_files)].fillna(0) clf.fit(train_data.drop(['file', 'sponsored'], 1), train_data.sponsored) print('--- Create predictions and submission') submission = test[['file']].reset_index(drop=True)