Exemple #1
0
    def run(self, callable, data):
        # Clear Queue
        self.clear_tasks()
        time.sleep(1)

        # Create all distributed tasks in the queue
        print("Creating tasks")
        tasks = [callable.delay(datum) for datum in data]
        t = tqdm(total=len(tasks), unit="task")
        results = ResultSet(tasks, app=self.app)

        start_time = time.time()

        # Wait for all distributed tasks to finish
        last_completed = 0
        while True:
            if time.time() - start_time > 3600: # Will happen every hour
                start_time = time.time()
                self.spawn_workers() # Restart all slaves

            try:
                if results.ready():
                    break
                completed = results.completed_count()
                t.update(completed - last_completed)
                last_completed = completed
            except Exception as e:
                time.sleep(10)
                pass

            time.sleep(1)

        t.update(results.completed_count() - last_completed)

        return self
    def saveResultsAndCleanUp(self):
        """
            Executes after the retrieval is done.
        """
        if self.use_celery:
            print("Waiting for tasks to complete...")
            res=ResultSet(self.tasks)
            while not res.ready():
                try:
                    time.sleep(7)
                except KeyboardInterrupt:
                    print("Cancelled waiting")
                    break
            print("All tasks finished.")

        for writer in self.writers:
            self.writers[writer].saveAsJSON(os.path.join(self.exp["exp_dir"],self.writers[writer].table_name+".json"))
Exemple #3
0
def get_result(request):
	ret = { 'status': 'error', 'result': '', 'messages': [ '', ],  }
	if request.method == 'POST' and request.user:
		try:
			user = MDBUser.objects.get(username=request.user.username)
		except Exception, e:
			ret['messages'][0] = "<strong>FATAL</strong>(get_result.user): %s" % e
		else:
			# Note: this is NOT status of tasks, 'success' here means that
			# get_result() request was processed correctly
			ret['status'] = 'success'
			async_res = AsyncResult(request.POST['task_id'])
			if async_res.ready():
				# Get all subtasks spawned by parent
				subtasks = None #ust_get_ids(user)
				# Create list of AsyncResults from list of task_ids
				async_results = []
				for task_id in subtasks:
					async_results.append(AsyncResult(task_id))
				# And also ResultSet for convenience
				async_res_set = ResultSet(async_results)
				ret['messages'][0] = 'parent task %s: %d of %d subtasks completed' %\
									 (request.POST['task_id'][:8],
									  async_res_set.completed_count(),
									  async_res_set.total,
									 )
				# All tasks completed ?
				if async_res_set.ready():
					# All tasks done, forget about those task ids
					#ust_clear_ids(user)
					# Any of them failed ?
					if async_res_set.failed():
						ret['result'] = 'FAILURE'
						for async_res in async_results:
							if async_res.state == 'FAILURE':
								ret['messages'].append("<strong>ERROR</strong>(get_result.FAILURE): '%s':'%s'" %\
													   (async_res.task_id[:8], async_res.result, ))
					else:
						ret['result'] = 'SUCCESS'
				else:
					ret['result'] = 'PENDING'
			else:
				ret['result'] = 'PENDING'
				ret['messages'][0] = 'parent task %s: PENDING' % \
					(request.POST['task_id'], )
Exemple #4
0
    def saveResultsAndCleanUp(self):
        """
            Executes after the retrieval is done.
        """
        if self.use_celery:
            print("Waiting for tasks to complete...")
            res = ResultSet(self.tasks)
            while not res.ready():
                try:
                    time.sleep(7)
                except KeyboardInterrupt:
                    print("Cancelled waiting")
                    break
            print("All tasks finished.")

        for writer in self.writers:
            self.writers[writer].saveAsJSON(
                os.path.join(self.exp["exp_dir"], self.writers[writer].table_name + ".json"))
Exemple #5
0
    def saveResultsAndCleanUp(self):
        """
            Executes after the retrieval is done.

            Should the results be saved?
        """
        # super().saveResultsAndCleanUp()

        if self.use_celery:
            print("Waiting for tasks to complete...")
            res = ResultSet(self.tasks)
            while not res.ready():
                try:
                    time.sleep(7)
                except KeyboardInterrupt:
                    print("Cancelled waiting")
                    break
            print("All tasks finished.")

        if self.options.get("list_missing_files", False):
            self.saveMissingFiles()
Exemple #6
0
    elif filename != "":
        result.add(processFile.delay(filename, 2))

    #sponsored = train.loc[train['file'] == openfile]
    #if not sponsored.empty:
    #result.add(processFile.delay(openfile, data, int(sponsored['sponsored'])))
    #testing = sample.loc[sample['file'] == openfile]
    #if not testing.empty:
    #result.add(processFile.delay(openfile, data, int(sponsored['sponsored'])))

    bar.numerator = k
    print("Sending out processes ", bar, end='\r')
    sys.stdout.flush()

bar = ProgressBar(len(train) + len(test_files), max_width=40)
while not result.ready():
    time.sleep(5)
    bar.numerator = result.completed_count()
    print("Waiting for return results ", bar, end='\r')
    sys.stdout.flush()

results = result.join()  #wait for jobs to finish

df_full = pd.DataFrame(list(results))

print('--- Training random forest')
clf = RandomForestClassifier(n_estimators=150, n_jobs=-1, random_state=0)
train_data = df_full[df_full.sponsored.notnull()].fillna(0)
test = df_full[df_full.sponsored.isnull()
               & df_full.file.isin(test_files)].fillna(0)
clf.fit(train_data.drop(['file', 'sponsored'], 1), train_data.sponsored)
		result.add(processFile.delay(filename, 2))
	
	#sponsored = train.loc[train['file'] == openfile]
	#if not sponsored.empty:
		#result.add(processFile.delay(openfile, data, int(sponsored['sponsored'])))
	#testing = sample.loc[sample['file'] == openfile]
	#if not testing.empty:
		#result.add(processFile.delay(openfile, data, int(sponsored['sponsored'])))


	bar.numerator = k
	print("Sending out processes ", bar, end='\r')
	sys.stdout.flush()

bar = ProgressBar(len(train)+len(test_files), max_width=40)
while not result.ready():
	time.sleep(5)
	bar.numerator = result.completed_count()
	print("Waiting for return results ", bar, end='\r')
	sys.stdout.flush()

results = result.join() #wait for jobs to finish

df_full = pd.DataFrame(list(results))

print('--- Training random forest')
clf = RandomForestClassifier(n_estimators=150, n_jobs=-1, random_state=0)
train_data = df_full[df_full.sponsored.notnull()].fillna(0)
test = df_full[df_full.sponsored.isnull() & df_full.file.isin(test_files)].fillna(0)
clf.fit(train_data.drop(['file', 'sponsored'], 1), train_data.sponsored)