def launch_tasks(): print('Launching different types of tasks') r1 = background(tasks.fast_task, x=5, y=10) r1.then(report) r2 = background(tasks.long_task, 42) r2.then(report) r3 = background(tasks.default_task) r3.then(report) r4 = background(tasks.elasticsearch_task) r4.then(report) rs = ResultSet([r1, r2, r3, r4]) rs.join() print('Done')
def launch_elasticsearch(): from celery_routing.elasticsearch.tasks import index_users, index_tweets print('Launching indexing') rs = ResultSet([ background(index_users), background(index_tweets), ]) print(rs.join()) print('Done')
def test_get(self): x = ResultSet(map(AsyncResult, [1, 2, 3])) b = x.results[0].backend = Mock() b.supports_native_join = False x.join_native = Mock() x.join = Mock() x.get() self.assertTrue(x.join.called) b.supports_native_join = True x.get() self.assertTrue(x.join_native.called)
def test_get(self): x = ResultSet(map(AsyncResult, [1, 2, 3])) b = x.results[0].backend = Mock() b.supports_native_join = False x.join_native = Mock() x.join = Mock() x.get() self.assertTrue(x.join.called) b.supports_native_join = True x.get() self.assertTrue(x.join_native.called)
def testbuf(padbytes=0, megabytes=0): padding = float(padbytes) + 2 ** 20 * float(megabytes) results = [] print('> padding: %r' % (padding, )) for i in range(8 * 4): results.append(sleeping.delay(1, kw='x' * int(padding))) time.sleep(0.01) res = ResultSet(results) print(res.join())
def testbuf(padbytes=0, megabytes=0): padding = float(padbytes) + 2**20 * float(megabytes) results = [] print('> padding: %r' % (padding, )) for i in range(8 * 4): results.append(sleeping.delay(1, kw='x' * int(padding))) time.sleep(0.01) res = ResultSet(results) print(res.join())
def divide_work(start_angle, stop_angle, num_angles, num_nodes, levels, NACA, num_samples, viscosity, speed, sim_time): anglediff = (stop_angle - start_angle) // num_angles # http://docs.celeryproject.org/en/latest/reference/celery.result.html#celery.result.ResultSet in_progress = ResultSet([]) results = [] # +1 to match runme.sh for i in range(0, num_angles + 1): angle = start_angle + anglediff * i key = _key_string(angle, num_nodes, levels, NACA, num_samples, viscosity, speed, sim_time) # query database # possibly add something more to check if the result is queued and pending. Simultaneous queries will cause double tasks to be added to the queue, since the tasks are pending. Possibly add a global queue that we append in progress results to, which we check if status is pending. result = airfoil.AsyncResult(key) if result.status == 'SUCCESS': alrdy_queued = True results.append(result.get()) elif result.status in ['STARTED', 'RETRY']: alrdy_queued = True in_progress.add(result) elif result.status == 'PENDING': alrdy_queued = False else: print('Task status FAILURE.') alrdy_queued = False if not alrdy_queued: result = airfoil.apply_async( (angle, num_nodes, levels, NACA, num_samples, viscosity, speed, sim_time, key), task_id=key) in_progress.add(result) # waiting for all results # in_progress.join_native() is supposed to be more efficient, but it seems to hang results.extend(in_progress.join()) # list of all results result_file = _result_string(start_angle, stop_angle, num_angles, num_nodes, levels, NACA, num_samples, viscosity, speed, sim_time) results_archive = '/home/ubuntu/results/' + result_file tar_cmd = [ 'tar', '-zcvf', results_archive, '-C', '/home/ubuntu/sync_results' ] tar_cmd.extend(results) subprocess.call(tar_cmd) return result_file
def initialise_fb_user(domain_uri, access_token): fb_user = get_fb_user(access_token) group_name = fb_user.id photos = get_fb_photos(access_token) if settings.USE_ASYNC: results = ResultSet([process_fb_photo.delay(d, access_token) for d in photos['data']]) processed_photos = [p for photos in results.join() for p in photos] else: processed_photos = [process_fb_photo(d, access_token) for d in photos['data']] processed_photos = [p for photos in processed_photos for p in photos] filtered_photos = filter_fb_photos_for_training(processed_photos) media_uri = urlparse.urljoin(domain_uri, settings.MEDIA_URL) if settings.USE_ASYNC: results = ResultSet([upload_fb_photos_for_training.delay([p], group_name, media_uri) for p in filtered_photos]) results.join() else: upload_fb_photos_for_training(filtered_photos, group_name, media_uri) train_fb_photos(group_name)
def generate_image_samplings_from_kaltura(entry_id): duration = None # duration in seconds attempts = 5 while attempts > 0: data = get_entry_metadata(entry_id) if 'duration' in data: duration = data['duration'] break time.sleep(60.0 / attempts) attempts -= 1 if duration is None: raise Exception('Cannot find video on Kaltura: "%s"' % entry_id) # number of samples should be proportion to the video duration step = min(1, max(5, int(duration / 5.0))) if settings.USE_ASYNC: results = ResultSet([generate_thumbnail_at_time_from_kaltura.delay(entry_id, i) for i in range(0, duration, step)]) return results.join() else: return [generate_thumbnail_at_time_from_kaltura(entry_id, i) for i in range(0, duration, step)]
#testing = sample.loc[sample['file'] == openfile] #if not testing.empty: #result.add(processFile.delay(openfile, data, int(sponsored['sponsored']))) bar.numerator = k print("Sending out processes ", bar, end='\r') sys.stdout.flush() bar = ProgressBar(len(train) + len(test_files), max_width=40) while not result.ready(): time.sleep(5) bar.numerator = result.completed_count() print("Waiting for return results ", bar, end='\r') sys.stdout.flush() results = result.join() #wait for jobs to finish df_full = pd.DataFrame(list(results)) print('--- Training random forest') clf = RandomForestClassifier(n_estimators=150, n_jobs=-1, random_state=0) train_data = df_full[df_full.sponsored.notnull()].fillna(0) test = df_full[df_full.sponsored.isnull() & df_full.file.isin(test_files)].fillna(0) clf.fit(train_data.drop(['file', 'sponsored'], 1), train_data.sponsored) print('--- Create predictions and submission') submission = test[['file']].reset_index(drop=True) submission['sponsored'] = clf.predict_proba(test.drop(['file', 'sponsored'], 1))[:, 1] submission.to_csv('native_btb_basic_submission.csv', index=False)
def gather_scrub_work(): """ Retrieve and execute scrub work :return: None """ logger.info('Gather Scrub - Started') scrub_locations = {} for storage_driver in StorageDriverList.get_storagedrivers(): for partition in storage_driver.partitions: if DiskPartition.ROLES.SCRUB == partition.role: logger.info('Gather Scrub - Storage Router {0:<15} has SCRUB partition at {1}'.format(storage_driver.storagerouter.ip, partition.path)) if storage_driver.storagerouter not in scrub_locations: try: _ = SSHClient(storage_driver.storagerouter) scrub_locations[storage_driver.storagerouter] = str(partition.path) except UnableToConnectException: logger.warning('Gather Scrub - Storage Router {0:<15} is not reachable'.format(storage_driver.storagerouter.ip)) if len(scrub_locations) == 0: raise RuntimeError('No scrub locations found') vdisk_guids = set() for vmachine in VMachineList.get_customer_vmachines(): for vdisk in vmachine.vdisks: if vdisk.info['object_type'] == 'BASE': vdisk_guids.add(vdisk.guid) for vdisk in VDiskList.get_without_vmachine(): if vdisk.info['object_type'] == 'BASE': vdisk_guids.add(vdisk.guid) logger.info('Gather Scrub - Checking {0} volumes for scrub work'.format(len(vdisk_guids))) local_machineid = System.get_my_machine_id() local_storage_router = None local_scrub_location = None local_vdisks_to_scrub = [] result_set = ResultSet([]) storage_router_list = [] for index, scrub_info in enumerate(scrub_locations.items()): start_index = index * len(vdisk_guids) / len(scrub_locations) end_index = (index + 1) * len(vdisk_guids) / len(scrub_locations) storage_router = scrub_info[0] vdisk_guids_to_scrub = list(vdisk_guids)[start_index:end_index] local = storage_router.machine_id == local_machineid logger.info('Gather Scrub - Storage Router {0:<15} ({1}) - Scrubbing {2} virtual disks'.format(storage_router.ip, 'local' if local is True else 'remote', len(vdisk_guids_to_scrub))) if local is True: local_storage_router = storage_router local_scrub_location = scrub_info[1] local_vdisks_to_scrub = vdisk_guids_to_scrub else: result_set.add(ScheduledTaskController._execute_scrub_work.s(scrub_location=scrub_info[1], vdisk_guids=vdisk_guids_to_scrub).apply_async( routing_key='sr.{0}'.format(storage_router.machine_id) )) storage_router_list.append(storage_router) # Remote tasks have been launched, now start the local task and then wait for remote tasks to finish processed_guids = [] if local_scrub_location is not None and len(local_vdisks_to_scrub) > 0: try: processed_guids = ScheduledTaskController._execute_scrub_work(scrub_location=local_scrub_location, vdisk_guids=local_vdisks_to_scrub) except Exception as ex: logger.error('Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'.format(local_storage_router.ip, ex)) all_results = result_set.join(propagate=False) # Propagate False makes sure all jobs are waited for even when 1 or more jobs fail for index, result in enumerate(all_results): if isinstance(result, list): processed_guids.extend(result) else: logger.error('Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'.format(storage_router_list[index].ip, result)) if len(processed_guids) != len(vdisk_guids) or set(processed_guids).difference(vdisk_guids): raise RuntimeError('Scrubbing failed for 1 or more storagerouters') logger.info('Gather Scrub - Finished')
#testing = sample.loc[sample['file'] == openfile] #if not testing.empty: #result.add(processFile.delay(openfile, data, int(sponsored['sponsored']))) bar.numerator = k print("Sending out processes ", bar, end='\r') sys.stdout.flush() bar = ProgressBar(len(train)+len(test_files), max_width=40) while not result.ready(): time.sleep(5) bar.numerator = result.completed_count() print("Waiting for return results ", bar, end='\r') sys.stdout.flush() results = result.join() #wait for jobs to finish df_full = pd.DataFrame(list(results)) print('--- Training random forest') clf = RandomForestClassifier(n_estimators=150, n_jobs=-1, random_state=0) train_data = df_full[df_full.sponsored.notnull()].fillna(0) test = df_full[df_full.sponsored.isnull() & df_full.file.isin(test_files)].fillna(0) clf.fit(train_data.drop(['file', 'sponsored'], 1), train_data.sponsored) print('--- Create predictions and submission') submission = test[['file']].reset_index(drop=True) submission['sponsored'] = clf.predict_proba(test.drop(['file', 'sponsored'], 1))[:, 1] submission.to_csv('native_btb_basic_submission.csv', index=False)
def gather_scrub_work(): """ Retrieve and execute scrub work :return: None """ logger.info('Gather Scrub - Started') scrub_locations = {} for storage_driver in StorageDriverList.get_storagedrivers(): for partition in storage_driver.partitions: if DiskPartition.ROLES.SCRUB == partition.role: logger.info( 'Gather Scrub - Storage Router {0:<15} has SCRUB partition at {1}' .format(storage_driver.storagerouter.ip, partition.path)) if storage_driver.storagerouter not in scrub_locations: try: _ = SSHClient(storage_driver.storagerouter) scrub_locations[ storage_driver.storagerouter] = str( partition.path) except UnableToConnectException: logger.warning( 'Gather Scrub - Storage Router {0:<15} is not reachable' .format(storage_driver.storagerouter.ip)) if len(scrub_locations) == 0: raise RuntimeError('No scrub locations found') vdisk_guids = set() for vmachine in VMachineList.get_customer_vmachines(): for vdisk in vmachine.vdisks: if vdisk.info['object_type'] == 'BASE': vdisk_guids.add(vdisk.guid) for vdisk in VDiskList.get_without_vmachine(): if vdisk.info['object_type'] == 'BASE': vdisk_guids.add(vdisk.guid) logger.info( 'Gather Scrub - Checking {0} volumes for scrub work'.format( len(vdisk_guids))) local_machineid = System.get_my_machine_id() local_storage_router = None local_scrub_location = None local_vdisks_to_scrub = [] result_set = ResultSet([]) storage_router_list = [] for index, scrub_info in enumerate(scrub_locations.items()): start_index = index * len(vdisk_guids) / len(scrub_locations) end_index = (index + 1) * len(vdisk_guids) / len(scrub_locations) storage_router = scrub_info[0] vdisk_guids_to_scrub = list(vdisk_guids)[start_index:end_index] local = storage_router.machine_id == local_machineid logger.info( 'Gather Scrub - Storage Router {0:<15} ({1}) - Scrubbing {2} virtual disks' .format(storage_router.ip, 'local' if local is True else 'remote', len(vdisk_guids_to_scrub))) if local is True: local_storage_router = storage_router local_scrub_location = scrub_info[1] local_vdisks_to_scrub = vdisk_guids_to_scrub else: result_set.add( ScheduledTaskController._execute_scrub_work.s( scrub_location=scrub_info[1], vdisk_guids=vdisk_guids_to_scrub).apply_async( routing_key='sr.{0}'.format( storage_router.machine_id))) storage_router_list.append(storage_router) # Remote tasks have been launched, now start the local task and then wait for remote tasks to finish processed_guids = [] if local_scrub_location is not None and len(local_vdisks_to_scrub) > 0: try: processed_guids = ScheduledTaskController._execute_scrub_work( scrub_location=local_scrub_location, vdisk_guids=local_vdisks_to_scrub) except Exception as ex: logger.error( 'Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}' .format(local_storage_router.ip, ex)) all_results = result_set.join( propagate=False ) # Propagate False makes sure all jobs are waited for even when 1 or more jobs fail for index, result in enumerate(all_results): if isinstance(result, list): processed_guids.extend(result) else: logger.error( 'Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}' .format(storage_router_list[index].ip, result)) if len(processed_guids) != len(vdisk_guids) or set( processed_guids).difference(vdisk_guids): raise RuntimeError('Scrubbing failed for 1 or more storagerouters') logger.info('Gather Scrub - Finished')
#!/usr/bin/python import beggar import worker from celery.result import ResultSet start = 343685886 trials = 4 d = beggar.GameNo(start=start) rs = ResultSet([]) while(trials > 0): trials -= 1 gameno = d.next() rs.add(worker.Search.delay(gameno)) #print "{}".format(Search(gameno)) print max(rs.join()) #try: # from numpy import histogram # (hist, bins) = histogram(turnlist, bins=50, range=(0,5000)) # for (v,b) in zip(hist, [int(i) for i in bins]): # print "{0},{1}".format(b,v) #except ImportError: # print "No Numpy module"
def gather_scrub_work(): logger.info("Divide scrubbing work among allowed Storage Routers") scrub_locations = {} for storage_driver in StorageDriverList.get_storagedrivers(): for partition in storage_driver.partitions: if DiskPartition.ROLES.SCRUB == partition.role: logger.info( "Scrub partition found on Storage Router {0}: {1}".format(storage_driver.name, partition.folder) ) if storage_driver.storagerouter not in scrub_locations: try: _ = SSHClient(storage_driver.storagerouter.ip) scrub_locations[storage_driver.storagerouter] = str(partition.path) except UnableToConnectException: logger.warning("StorageRouter {0} is not reachable".format(storage_driver.storagerouter.ip)) if len(scrub_locations) == 0: raise RuntimeError("No scrub locations found") vdisk_guids = set() for vmachine in VMachineList.get_customer_vmachines(): for vdisk in vmachine.vdisks: if vdisk.info["object_type"] in ["BASE"] and len(vdisk.child_vdisks) == 0: vdisk_guids.add(vdisk.guid) for vdisk in VDiskList.get_without_vmachine(): if vdisk.info["object_type"] in ["BASE"] and len(vdisk.child_vdisks) == 0: vdisk_guids.add(vdisk.guid) logger.info("Found {0} virtual disks which need to be check for scrub work".format(len(vdisk_guids))) local_machineid = System.get_my_machine_id() local_scrub_location = None local_vdisks_to_scrub = [] result_set = ResultSet([]) storage_router_list = [] for index, scrub_info in enumerate(scrub_locations.items()): start_index = index * len(vdisk_guids) / len(scrub_locations) end_index = (index + 1) * len(vdisk_guids) / len(scrub_locations) storage_router = scrub_info[0] vdisk_guids_to_scrub = list(vdisk_guids)[start_index:end_index] local = storage_router.machine_id == local_machineid logger.info( "Executing scrub work on {0} Storage Router {1} for {2} virtual disks".format( "local" if local is True else "remote", storage_router.name, len(vdisk_guids_to_scrub) ) ) if local is True: local_scrub_location = scrub_info[1] local_vdisks_to_scrub = vdisk_guids_to_scrub else: result_set.add( ScheduledTaskController._execute_scrub_work.s( scrub_location=scrub_info[1], vdisk_guids=vdisk_guids_to_scrub ).apply_async(routing_key="sr.{0}".format(storage_router.machine_id)) ) storage_router_list.append(storage_router) logger.info("Launched scrub task on Storage Router {0}".format(storage_router.name)) # Remote tasks have been launched, now start the local task and then wait for remote tasks to finish if local_scrub_location is not None and len(local_vdisks_to_scrub) > 0: ScheduledTaskController._execute_scrub_work( scrub_location=local_scrub_location, vdisk_guids=local_vdisks_to_scrub ) all_results = result_set.join( propagate=False ) # Propagate False makes sure all jobs are waited for even when 1 or more jobs fail for index, result in enumerate(all_results): if result is not None: logger.error( "Scrubbing failed on Storage Router {0} with error {1}".format( storage_router_list[index].name, result ) )