def getHashes(roms): resultSet = ResultSet([]) for rom in roms: # Need the full path for the MD5 hash function to operate on the file fullPath = settings.ROMS_FOLDER + "/" + rom['filename'] resultSet.add(getMD5Hash.delay(fullPath)) return resultSet.get()
def test_add_discard(self): x = ResultSet([]) x.add(AsyncResult('1')) self.assertIn(AsyncResult('1'), x.results) x.discard(AsyncResult('1')) x.discard(AsyncResult('1')) x.discard('1') self.assertNotIn(AsyncResult('1'), x.results) x.update([AsyncResult('2')])
def test_add_discard(self): x = ResultSet([]) x.add(AsyncResult("1")) self.assertIn(AsyncResult("1"), x.results) x.discard(AsyncResult("1")) x.discard(AsyncResult("1")) x.discard("1") self.assertNotIn(AsyncResult("1"), x.results) x.update([AsyncResult("2")])
def divide_work(start_angle, stop_angle, num_angles, num_nodes, levels, NACA, num_samples, viscosity, speed, sim_time): anglediff = (stop_angle - start_angle) // num_angles # http://docs.celeryproject.org/en/latest/reference/celery.result.html#celery.result.ResultSet in_progress = ResultSet([]) results = [] # +1 to match runme.sh for i in range(0, num_angles + 1): angle = start_angle + anglediff * i key = _key_string(angle, num_nodes, levels, NACA, num_samples, viscosity, speed, sim_time) # query database # possibly add something more to check if the result is queued and pending. Simultaneous queries will cause double tasks to be added to the queue, since the tasks are pending. Possibly add a global queue that we append in progress results to, which we check if status is pending. result = airfoil.AsyncResult(key) if result.status == 'SUCCESS': alrdy_queued = True results.append(result.get()) elif result.status in ['STARTED', 'RETRY']: alrdy_queued = True in_progress.add(result) elif result.status == 'PENDING': alrdy_queued = False else: print('Task status FAILURE.') alrdy_queued = False if not alrdy_queued: result = airfoil.apply_async( (angle, num_nodes, levels, NACA, num_samples, viscosity, speed, sim_time, key), task_id=key) in_progress.add(result) # waiting for all results # in_progress.join_native() is supposed to be more efficient, but it seems to hang results.extend(in_progress.join()) # list of all results result_file = _result_string(start_angle, stop_angle, num_angles, num_nodes, levels, NACA, num_samples, viscosity, speed, sim_time) results_archive = '/home/ubuntu/results/' + result_file tar_cmd = [ 'tar', '-zcvf', results_archive, '-C', '/home/ubuntu/sync_results' ] tar_cmd.extend(results) subprocess.call(tar_cmd) return result_file
def convert_file(is_video, input_path, output_format, video_length, part_length): rs = ResultSet([]) for i in range(get_total_workers()): start_at = i * part_length stop_at = start_at + part_length if i != get_total_workers( ) - 1 else video_length print("worker {} will process from {}s to {}s".format( i + 1, start_at, stop_at)) rs.add( process_part.delay(is_video, input_path, output_format, start_at, stop_at)) return rs.get()
def distributetasks(self, dnsublist, targetdnlist, workername=""): # Send the Task to Celery Queue import ltecpxx.mrosimpleexecutor as r taskid = uuid.uuid1().int # create a unique main task id self.tasklist.append({taskid: ''}) resultset = ResultSet([]) for sourcedns in dnsublist: # send sub tasks for the main task to Celery args = (sourcedns, targetdnlist) kwargs= {'workername':workername} celery = Celery() celery.conf.CELERY_ALWAYS_EAGER = True celery.conf.CELERY_ALWAYS_EAGER = True result = r.doTargetprefilter.apply_async(args,kwargs) resultset.add(result) #print("Result Is Done %s Value is %d Is Done Now %s" % (result.ready(),result.get(),result.ready())) print("Result Is Done %s " % result.ready()) self.tasklist[-1][taskid] = resultset print("Task List Conents", self.tasklist) # return the status of of the operation resp = {'TaskId': taskid, 'NumberofSubtasks': dnsublist.__len__(), 'RunState': str(RunState.Submitted)} return resp
def obj_get_list(self, bundle, **kwargs): query = bundle.request.GET.get('q') if not query: response = {'status': 0, 'message': 'Empty query'} else: #from my_task.tasks import google, duck_duck_go, twitter from my_task.tasks import google, duck_duck_go, twitter # Async process from celery.result import ResultSet #A collection of results. rs = ResultSet([]) # Add AsyncResult as a new member of the set. rs.add(google.delay(query)) rs.add(duck_duck_go.delay(query)) rs.add(twitter.delay(query)) response = rs.get() # waiting for the results url = "http://127.0.0.1:8000/my_resources/v1/search/?q={query}".format( query=query) try: response = { 'query': query, 'results': { 'google': { 'text': response[0], 'url': url }, 'duckduckgo': { 'text': response[1], 'url': url }, 'twitter': { 'text': response[2], 'url': url } } } except AttributeError: response = {'status': 0, 'message': 'Result Timeout'} # For immediate response raise ImmediateHttpResponse(response=HttpCreated( content=json.dumps(response), content_type='application/json; charset=UTF-8'))
import pandas as pd #https://pypi.python.org/pypi/etaprogress/ from etaprogress.progress import ProgressBar print('--- Read training labels') train = pd.read_csv('./data/train_v2.csv') train_keys = dict([a[1] for a in train.iterrows()]) test_files = set(pd.read_csv('./data/sampleSubmission_v2.csv').file.values) print("--- Started processing") result = ResultSet([]) bar = ProgressBar(len(train) + len(test_files), max_width=40) #https://celery.readthedocs.org/en/latest/reference/celery.result.html#celery.result.ResultSet for k, filename in enumerate(list(train['file']) + list(test_files)): if filename in train_keys: result.add(processFile.delay(filename, train_keys[filename])) elif filename != "": result.add(processFile.delay(filename, 2)) #sponsored = train.loc[train['file'] == openfile] #if not sponsored.empty: #result.add(processFile.delay(openfile, data, int(sponsored['sponsored']))) #testing = sample.loc[sample['file'] == openfile] #if not testing.empty: #result.add(processFile.delay(openfile, data, int(sponsored['sponsored']))) bar.numerator = k print("Sending out processes ", bar, end='\r') sys.stdout.flush() bar = ProgressBar(len(train) + len(test_files), max_width=40)
def gather_scrub_work(): logger.info("Divide scrubbing work among allowed Storage Routers") scrub_locations = {} for storage_driver in StorageDriverList.get_storagedrivers(): for partition in storage_driver.partitions: if DiskPartition.ROLES.SCRUB == partition.role: logger.info( "Scrub partition found on Storage Router {0}: {1}".format(storage_driver.name, partition.folder) ) if storage_driver.storagerouter not in scrub_locations: try: _ = SSHClient(storage_driver.storagerouter.ip) scrub_locations[storage_driver.storagerouter] = str(partition.path) except UnableToConnectException: logger.warning("StorageRouter {0} is not reachable".format(storage_driver.storagerouter.ip)) if len(scrub_locations) == 0: raise RuntimeError("No scrub locations found") vdisk_guids = set() for vmachine in VMachineList.get_customer_vmachines(): for vdisk in vmachine.vdisks: if vdisk.info["object_type"] in ["BASE"] and len(vdisk.child_vdisks) == 0: vdisk_guids.add(vdisk.guid) for vdisk in VDiskList.get_without_vmachine(): if vdisk.info["object_type"] in ["BASE"] and len(vdisk.child_vdisks) == 0: vdisk_guids.add(vdisk.guid) logger.info("Found {0} virtual disks which need to be check for scrub work".format(len(vdisk_guids))) local_machineid = System.get_my_machine_id() local_scrub_location = None local_vdisks_to_scrub = [] result_set = ResultSet([]) storage_router_list = [] for index, scrub_info in enumerate(scrub_locations.items()): start_index = index * len(vdisk_guids) / len(scrub_locations) end_index = (index + 1) * len(vdisk_guids) / len(scrub_locations) storage_router = scrub_info[0] vdisk_guids_to_scrub = list(vdisk_guids)[start_index:end_index] local = storage_router.machine_id == local_machineid logger.info( "Executing scrub work on {0} Storage Router {1} for {2} virtual disks".format( "local" if local is True else "remote", storage_router.name, len(vdisk_guids_to_scrub) ) ) if local is True: local_scrub_location = scrub_info[1] local_vdisks_to_scrub = vdisk_guids_to_scrub else: result_set.add( ScheduledTaskController._execute_scrub_work.s( scrub_location=scrub_info[1], vdisk_guids=vdisk_guids_to_scrub ).apply_async(routing_key="sr.{0}".format(storage_router.machine_id)) ) storage_router_list.append(storage_router) logger.info("Launched scrub task on Storage Router {0}".format(storage_router.name)) # Remote tasks have been launched, now start the local task and then wait for remote tasks to finish if local_scrub_location is not None and len(local_vdisks_to_scrub) > 0: ScheduledTaskController._execute_scrub_work( scrub_location=local_scrub_location, vdisk_guids=local_vdisks_to_scrub ) all_results = result_set.join( propagate=False ) # Propagate False makes sure all jobs are waited for even when 1 or more jobs fail for index, result in enumerate(all_results): if result is not None: logger.error( "Scrubbing failed on Storage Router {0} with error {1}".format( storage_router_list[index].name, result ) )
def test_add(self): x = ResultSet([1]) x.add(2) self.assertEqual(len(x), 2) x.add(2) self.assertEqual(len(x), 2)
def gather_scrub_work(): """ Retrieve and execute scrub work :return: None """ logger.info('Gather Scrub - Started') scrub_locations = {} for storage_driver in StorageDriverList.get_storagedrivers(): for partition in storage_driver.partitions: if DiskPartition.ROLES.SCRUB == partition.role: logger.info('Gather Scrub - Storage Router {0:<15} has SCRUB partition at {1}'.format(storage_driver.storagerouter.ip, partition.path)) if storage_driver.storagerouter not in scrub_locations: try: _ = SSHClient(storage_driver.storagerouter) scrub_locations[storage_driver.storagerouter] = str(partition.path) except UnableToConnectException: logger.warning('Gather Scrub - Storage Router {0:<15} is not reachable'.format(storage_driver.storagerouter.ip)) if len(scrub_locations) == 0: raise RuntimeError('No scrub locations found') vdisk_guids = set() for vmachine in VMachineList.get_customer_vmachines(): for vdisk in vmachine.vdisks: if vdisk.info['object_type'] == 'BASE': vdisk_guids.add(vdisk.guid) for vdisk in VDiskList.get_without_vmachine(): if vdisk.info['object_type'] == 'BASE': vdisk_guids.add(vdisk.guid) logger.info('Gather Scrub - Checking {0} volumes for scrub work'.format(len(vdisk_guids))) local_machineid = System.get_my_machine_id() local_storage_router = None local_scrub_location = None local_vdisks_to_scrub = [] result_set = ResultSet([]) storage_router_list = [] for index, scrub_info in enumerate(scrub_locations.items()): start_index = index * len(vdisk_guids) / len(scrub_locations) end_index = (index + 1) * len(vdisk_guids) / len(scrub_locations) storage_router = scrub_info[0] vdisk_guids_to_scrub = list(vdisk_guids)[start_index:end_index] local = storage_router.machine_id == local_machineid logger.info('Gather Scrub - Storage Router {0:<15} ({1}) - Scrubbing {2} virtual disks'.format(storage_router.ip, 'local' if local is True else 'remote', len(vdisk_guids_to_scrub))) if local is True: local_storage_router = storage_router local_scrub_location = scrub_info[1] local_vdisks_to_scrub = vdisk_guids_to_scrub else: result_set.add(ScheduledTaskController._execute_scrub_work.s(scrub_location=scrub_info[1], vdisk_guids=vdisk_guids_to_scrub).apply_async( routing_key='sr.{0}'.format(storage_router.machine_id) )) storage_router_list.append(storage_router) # Remote tasks have been launched, now start the local task and then wait for remote tasks to finish processed_guids = [] if local_scrub_location is not None and len(local_vdisks_to_scrub) > 0: try: processed_guids = ScheduledTaskController._execute_scrub_work(scrub_location=local_scrub_location, vdisk_guids=local_vdisks_to_scrub) except Exception as ex: logger.error('Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'.format(local_storage_router.ip, ex)) all_results = result_set.join(propagate=False) # Propagate False makes sure all jobs are waited for even when 1 or more jobs fail for index, result in enumerate(all_results): if isinstance(result, list): processed_guids.extend(result) else: logger.error('Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'.format(storage_router_list[index].ip, result)) if len(processed_guids) != len(vdisk_guids) or set(processed_guids).difference(vdisk_guids): raise RuntimeError('Scrubbing failed for 1 or more storagerouters') logger.info('Gather Scrub - Finished')
#!/usr/bin/python import beggar import worker from celery.result import ResultSet start = 343685886 trials = 4 d = beggar.GameNo(start=start) rs = ResultSet([]) while(trials > 0): trials -= 1 gameno = d.next() rs.add(worker.Search.delay(gameno)) #print "{}".format(Search(gameno)) print max(rs.join()) #try: # from numpy import histogram # (hist, bins) = histogram(turnlist, bins=50, range=(0,5000)) # for (v,b) in zip(hist, [int(i) for i in bins]): # print "{0},{1}".format(b,v) #except ImportError: # print "No Numpy module"
def compile_and_copy(self, design_name, hashes, jobinfo, userjobconfig): if userjobconfig.enableEMAIL: email_start(userjobconfig, jobinfo, design_name, self.request.hostname) rs = ResultSet([]) rl = RedisLogger(design_name, jobinfo, userjobconfig.logging_on) rl2 = RedisLoggerStream(design_name, jobinfo, userjobconfig.logging_on) #isfbox = re.match("^f[0-9][0-9]+", self.request.hostname) #isfbox = isfbox is not None base_dir = "/scratch/" #if isfbox: # base_dir = "/data/" # create scratch space on this node for compiling the design, then clone # for now, do not delete the scratch space, just keep making new ones # 1) preserve work dir for debugging # 2) let a user run multiple jobs at once design_dir = base_dir + userjobconfig.username + '/celery-temp/' + jobinfo + "/" + design_name # remove old results for that design if they exist #rl.local_logged('rm -rf ' + design_dir) rl.local_logged('mkdir -p ' + design_dir) with lcd(design_dir): rl.local_logged('git clone ' + userjobconfig.rocket_chip_location + " rocket-chip") rc_dir = design_dir + '/rocket-chip' with lcd(rc_dir): # checkout the correct hash rl.local_logged('git checkout ' + hashes['rocket-chip']) rl.local_logged('git submodule update --init') # copy designs scala file configs_dir = 'src/main/scala/config' rl.local_logged('mkdir -p ' + configs_dir) rl.local_logged('cp ' + userjobconfig.distribute_rocket_chip_loc + '/' + jobinfo + '/' + userjobconfig.CONF + '.scala ' + configs_dir + '/') with lcd(rc_dir + '/vlsi'): rl.local_logged('git submodule update --init --recursive') # now, apply patches apply_recursive_patches(userjobconfig.distribute_rocket_chip_loc + '/' + jobinfo + '/patches', rc_dir) # at this point, design_dir/rocket-chip will contain everything we need to # do the various compiles shell_env_args_conf = copy(userjobconfig.shell_env_args) shell_env_args_conf['CONFIG'] = design_name cpp_emu_name = 'emulator-' + userjobconfig.MODEL + '-' + design_name vsim_emu_name = 'simv-' + userjobconfig.MODEL + '-' + design_name # make C++ emulator # NOTE: This is currently required to get the dramsim2_ini directory # and get list of tests to run # TODO: do we need to get list of tests to run per environment? if 'emulator' in userjobconfig.tests: with lcd(rc_dir + '/emulator'), shell_env(**shell_env_args_conf): rl2.local_logged('make ' + cpp_emu_name + ' 2>&1') else: with lcd(rc_dir + '/emulator'), shell_env(**shell_env_args_conf), settings(warn_only=True): # even if emulator is broken, need dramsim rl2.local_logged('make ' + cpp_emu_name + ' 2>&1') with lcd(rc_dir + '/emulator'), shell_env(**shell_env_args_conf): rl.local_logged('cp -Lr ../emulator ' + userjobconfig.distribute_rocket_chip_loc + '/' + jobinfo + '/' + design_name + '/emulator/') testslist = read_tests(rc_dir + '/emulator/generated-src/', design_name) print("running tests:") print(testslist) """ Run C++ emulator """ if 'emulator' in userjobconfig.tests: for y in testslist: rs.add(emulatortest.apply_async([design_name, y, jobinfo, userjobconfig], queue='test')) """ Run vsim """ if 'vsim' in userjobconfig.tests: # make vsim, copy with lcd(rc_dir + '/vsim'), shell_env(**shell_env_args_conf), prefix('source ' + vlsi_bashrc): rl2.local_logged('make ' + vsim_emu_name + ' 2>&1') rl.local_logged('cp -Lr ../vsim ' + userjobconfig.distribute_rocket_chip_loc + '/' + jobinfo + '/' + design_name + '/vsim/') # copy dramsim2_ini directory for vsim with lcd(userjobconfig.distribute_rocket_chip_loc + '/' + jobinfo + '/' + design_name): rl.local_logged('cp -r emulator/emulator/dramsim2_ini vsim/vsim/') # start vsim tasks for y in testslist: rs.add(vsimtest.apply_async([design_name, y, jobinfo, userjobconfig], queue='test')) """ Run vcs-sim-rtl """ if 'vcs-sim-rtl' in userjobconfig.tests: # make vcs-sim-rtl, copy with lcd(rc_dir + '/vlsi/vcs-sim-rtl'), shell_env(**shell_env_args_conf), prefix('source ' + vlsi_bashrc): rl2.local_logged('make ' + vsim_emu_name + ' 2>&1') rl.local_logged('cp -Lr ../vcs-sim-rtl ' + userjobconfig.distribute_rocket_chip_loc + '/' + jobinfo + '/' + design_name + '/vcs-sim-rtl/') # copy dramsim2_ini directory for vcs-sim-rtl with lcd(userjobconfig.distribute_rocket_chip_loc + '/' + jobinfo + '/' + design_name): rl.local_logged('cp -r emulator/emulator/dramsim2_ini vcs-sim-rtl/vcs-sim-rtl/') for y in testslist: rs.add(vcs_sim_rtl_test.apply_async([design_name, y, jobinfo, userjobconfig], queue='test')) """ run dc-syn """ if 'dc-syn' in userjobconfig.tests: with lcd(rc_dir + '/vlsi'), shell_env(**shell_env_args_conf), prefix('source ' + vlsi_bashrc): rl2.local_logged('make dc 2>&1') # vlsi, dc with lcd(rc_dir + '/vlsi/dc-syn'), shell_env(**shell_env_args_conf), prefix('source ' + vlsi_bashrc): # TODO: what does -jN do here? #rl2.local_logged('make 2>&1') rl.local_logged('cp -r current-dc/reports ' + userjobconfig.distribute_rocket_chip_loc + '/' + jobinfo + '/' + design_name + '/dc-syn/') rl.local_logged('cp -r current-dc/results ' + userjobconfig.distribute_rocket_chip_loc + '/' + jobinfo + '/' + design_name + '/dc-syn/') rl.clear_log() # if we made it this far, clear the redis log list if 'vcs-sim-gl-syn' in userjobconfig.tests: with lcd(rc_dir + '/vlsi/vcs-sim-gl-syn'), shell_env(**shell_env_args_conf), prefix('source ' + vlsi_bashrc): # todo actually use the name rl2.local_logged('make 2>&1') # todo copy rl.local_logged('cp -Lr ../vcs-sim-gl-syn ' + userjobconfig.distribute_rocket_chip_loc + '/' + jobinfo + '/' + design_name + '/vcs-sim-gl-syn/') # copy dramsim2_ini directory for vcs-sim-gl-syn with lcd(userjobconfig.distribute_rocket_chip_loc + '/' + jobinfo + '/' + design_name): rl.local_logged('cp -r emulator/emulator/dramsim2_ini vcs-sim-gl-syn/vcs-sim-gl-syn/') for y in testslist: rs.add(vcs_sim_gl_syn_test.apply_async([design_name, y, jobinfo, userjobconfig], queue='test')) """ run icc-par """ if 'icc-par' in userjobconfig.tests: with lcd(rc_dir + '/vlsi'), shell_env(**shell_env_args_conf), prefix('source ' + vlsi_bashrc): rl2.local_logged('make icc 2>&1') # vlsi, icc with lcd(rc_dir + '/vlsi/icc-par'), shell_env(**shell_env_args_conf), prefix('source ' + vlsi_bashrc): # TODO: what does -jN do here? #rl2.local_logged('make 2>&1') rl.local_logged('cp -r current-icc/reports ' + userjobconfig.distribute_rocket_chip_loc + '/' + jobinfo + '/' + design_name + '/icc-par/') rl.local_logged('cp -r current-icc/results ' + userjobconfig.distribute_rocket_chip_loc + '/' + jobinfo + '/' + design_name + '/icc-par/') if 'vcs-sim-gl-par' in userjobconfig.tests: with lcd(rc_dir + '/vlsi/vcs-sim-gl-par'), shell_env(**shell_env_args_conf), prefix('source ' + vlsi_bashrc): # todo actually use the name rl2.local_logged('make 2>&1') # todo copy rl.local_logged('cp -Lr ../vcs-sim-gl-par ' + userjobconfig.distribute_rocket_chip_loc + '/' + jobinfo + '/' + design_name + '/vcs-sim-gl-par/') # copy dramsim2_ini directory for vcs-sim-gl-par with lcd(userjobconfig.distribute_rocket_chip_loc + '/' + jobinfo + '/' + design_name): rl.local_logged('cp -r emulator/emulator/dramsim2_ini vcs-sim-gl-par/vcs-sim-gl-par/') for y in testslist: rs.add(vcs_sim_gl_par_test.apply_async([design_name, y, jobinfo, userjobconfig], queue='test')) rl.clear_log() # clear the redis log list return rs
#https://pypi.python.org/pypi/etaprogress/ from etaprogress.progress import ProgressBar print('--- Read training labels') train = pd.read_csv('./data/train_v2.csv') train_keys = dict([a[1] for a in train.iterrows()]) test_files = set(pd.read_csv('./data/sampleSubmission_v2.csv').file.values) print("--- Started processing") result = ResultSet([]) bar = ProgressBar(len(train)+len(test_files), max_width=40) #https://celery.readthedocs.org/en/latest/reference/celery.result.html#celery.result.ResultSet for k, filename in enumerate(list(train['file'])+list(test_files)): if filename in train_keys: result.add(processFile.delay(filename, train_keys[filename])) elif filename != "": result.add(processFile.delay(filename, 2)) #sponsored = train.loc[train['file'] == openfile] #if not sponsored.empty: #result.add(processFile.delay(openfile, data, int(sponsored['sponsored']))) #testing = sample.loc[sample['file'] == openfile] #if not testing.empty: #result.add(processFile.delay(openfile, data, int(sponsored['sponsored']))) bar.numerator = k print("Sending out processes ", bar, end='\r') sys.stdout.flush()
class manager(server): def __init__(self, config, pool, manager_conf, pick_conf, cleaner): super(manager, self).__init__(manager_conf, pick_conf) self.path = getenv('MW_HOME') assert self.path != None self.tasks = ResultSet([]) self.backoff = int(config['retry_backoff']) self.powlim = int(config['max_backoff_power']) # backend codes to retry # CONFIRM: we're not retrying with other codes codes = [] if config['retry_forever_list'] != '': codes += [int(c) for c in config['retry_forever_list'].split(',')] if config['retry_sometime_list'] != '': codes += [int(c) for c in config['retry_sometime_list'].split(',')] self.backend_retry = set(codes) # thresholds self.thresholds = defaultdict(lambda: {}) self.thresholds['audio']['score'] = int(config["fp_audio_score"]) self.thresholds['video']['score'] = int(config["fp_video_score"]) self.thresholds['audio']['duration'] = int(config["fp_audio_duration"]) self.thresholds['video']['duration'] = int(config["fp_video_duration"]) self.task_set_join_timeout = int(config['task_set_join_timeout']) self.pool = pool self.cleaner = cleaner self.taskm = defaultdict(dict) @catch_and_die('mwtm_manager') def run(self): while True: self.loop() @staticmethod def record(t, pid=0): yield db_execute(BEGIN_QUERY, pid, t.id) #c, r = yield db_insert(QUERY_EVENT, t.uuid, pid) #if c <= 0: # yield db_result() #else: # yield db_result(r) def one_request(self, wait=False): with self.req_cond: return self.reqq.get(wait) def task_check(self, t, accs, backends): if not t.account in accs: self.logger.debug("account %s maybe deleted" % t.account) return False for b in accs[t.account].backends: if not b.backend in backends: self.logger.warning( 'backend %s for account %s ' + 'inconsistent with backends in db', b.backend, t.account) return False return True def buf_tasks(self, reqs): accs = self.accounts() backends = self.backends() for t in reqs: try: self.logger.info("receivce task from picker, task_uuid: %s, " "site_asset_id: %s" % (t.uuid, t.site_asset_id)) self.logger.debug("receive task info:%s" % t._asdict()) g_logger.info( trans2json(message="site_asset_id:%s, " "task_uuid:%s, external_id:%s" % (t.site_asset_id, t.uuid, t.external_id), action="receive picked task")) if not self.task_check(t, accs, backends): self.reply(t) continue acc = accs[t.account]._asdict() acc["backends"] = [v._asdict() for v in acc["backends"]] backs = {} for k, v in backends.iteritems(): backs[k] = v._asdict() self.logger.debug("add task's account: %s, backends: %s" % (acc, backs)) ct = Task().query.delay(t._asdict(), acc, backs) self.taskm[ct.task_id]['celery_task'] = ct self.taskm[ct.task_id]['task'] = t self.tasks.add(ct) self.logger.info("add task to celery, task_uuid: %s, " "site_asset_id: %s, celery_uuid: %s " % \ (t.uuid, t.site_asset_id, ct.task_id)) g_logger.info( trans2json(message="site_asset_id:%s, " "task_uuid:%s, external_id:%s" % (t.site_asset_id, t.uuid, t.external_id), action="add task to celery")) except Exception, ex: self.reply(t) self.logger.error("catch exception from buf tasks, " "task_uuid: %s , site_asset_id: %s" % (t.uuid, t.site_asset_id), exc_info=True) continue try: db_txn(self.pool, partial(self.record, t)) except Exception: self.logger.error("failed to record execution for task %s" % t.uuid)
class manager(server): def __init__(self, config, pool, manager_conf, pick_conf, cleaner): super(manager, self).__init__(manager_conf, pick_conf) self.path = getenv('MW_HOME') assert self.path != None self.tasks = ResultSet([]) self.backoff = int(config['retry_backoff']) self.powlim = int(config['max_backoff_power']) # backend codes to retry # CONFIRM: we're not retrying with other codes codes = [] if config['retry_forever_list'] != '': codes += [int(c) for c in config['retry_forever_list'].split(',')] if config['retry_sometime_list'] != '': codes += [int(c) for c in config['retry_sometime_list'].split(',')] self.backend_retry = set(codes) # thresholds self.thresholds = defaultdict(lambda: {}) self.thresholds['audio']['score'] = int(config["fp_audio_score"]) self.thresholds['video']['score'] = int(config["fp_video_score"]) self.thresholds['audio']['duration'] = int(config["fp_audio_duration"]) self.thresholds['video']['duration'] = int(config["fp_video_duration"]) self.task_set_join_timeout = int(config['task_set_join_timeout']) self.pool = pool self.cleaner = cleaner self.taskm = defaultdict(dict) @catch_and_die('mwtm_manager') def run(self): while True: self.loop() @staticmethod def record(t, pid=0): yield db_execute(BEGIN_QUERY, pid, t.id) #c, r = yield db_insert(QUERY_EVENT, t.uuid, pid) #if c <= 0: # yield db_result() #else: # yield db_result(r) def one_request(self, wait=False): with self.req_cond: return self.reqq.get(wait) def task_check(self, t, accs, backends): if not t.account in accs: self.logger.debug("account %s maybe deleted" % t.account) return False for b in accs[t.account].backends: if not b.backend in backends: self.logger.warning('backend %s for account %s ' + 'inconsistent with backends in db', b.backend, t.account) return False return True def buf_tasks(self, reqs): accs = self.accounts() backends = self.backends() for t in reqs: try: self.logger.info("receivce task from picker, task_uuid: %s, " "site_asset_id: %s" % (t.uuid, t.site_asset_id)) self.logger.debug("receive task info:%s" % t._asdict()) g_logger.info(trans2json(message="site_asset_id:%s, " "task_uuid:%s, external_id:%s" % (t.site_asset_id, t.uuid, t.external_id), action="receive picked task")) if not self.task_check(t, accs, backends): self.reply(t) continue acc = accs[t.account]._asdict() acc["backends"] = [v._asdict() for v in acc["backends"]] backs = {} for k, v in backends.iteritems(): backs[k] = v._asdict() self.logger.debug("add task's account: %s, backends: %s" % (acc, backs)) ct = Task().query.delay(t._asdict(), acc, backs) self.taskm[ct.task_id]['celery_task'] = ct self.taskm[ct.task_id]['task'] = t self.tasks.add(ct) self.logger.info("add task to celery, task_uuid: %s, " "site_asset_id: %s, celery_uuid: %s " % \ (t.uuid, t.site_asset_id, ct.task_id)) g_logger.info(trans2json(message="site_asset_id:%s, " "task_uuid:%s, external_id:%s" % (t.site_asset_id, t.uuid, t.external_id), action="add task to celery")) except Exception, ex: self.reply(t) self.logger.error("catch exception from buf tasks, " "task_uuid: %s , site_asset_id: %s" % (t.uuid, t.site_asset_id), exc_info=True) continue try: db_txn(self.pool, partial(self.record, t)) except Exception: self.logger.error("failed to record execution for task %s" % t.uuid)
# sanity check that the riscv-tests hash is not the same as the riscv-tools hash # (this probably means that you forgot to do git submodule update --init inside riscv-tools) if hashes['riscv-tests'] == hashes['riscv-tools']: print(bcolors.FAIL + "riscv-tests hash matches riscv-tools hash. Did you forget to init the\nriscv-tests submodule?" + bcolors.ENDC) exit(1) with lcd(userjobconfig.install_dir + '/tests-installs'), shell_env(**userjobconfig.shell_env_args), settings(warn_only=True): local('git clone ' + userjobconfig.tests_location + ' ' + hashes['riscv-tests']) local('cd ' + hashes['riscv-tests'] + ' && git checkout ' + hashes['riscv-tests']) local('cd ' + hashes['riscv-tests'] + ' && git submodule update --init') local('cd ' + hashes['riscv-tests'] + '/isa && make -j32') local('cd ' + hashes['riscv-tests'] + '/benchmarks && make -j32') do_jackhammer() build_riscv_tests() compiles = ResultSet([]) for x in designs: compiles.add(compile_and_copy.apply_async([x, hashes, jobdirname, userjobconfig], queue='build')) print(bcolors.OKBLUE + "Your job has been launched. You can monitor it at fbox:8080" + bcolors.ENDC) print(bcolors.OKGREEN + "Your job id is " + jobdirname + bcolors.ENDC) # TODO generate job run report # 1 whether or not new tests/tools were installed # 2 where to find outputs # 3 how to use watch script # 4 jobid # 5 write it to file so that the watch script can use it
def gather_scrub_work(): """ Retrieve and execute scrub work :return: None """ logger.info('Gather Scrub - Started') scrub_locations = {} for storage_driver in StorageDriverList.get_storagedrivers(): for partition in storage_driver.partitions: if DiskPartition.ROLES.SCRUB == partition.role: logger.info( 'Gather Scrub - Storage Router {0:<15} has SCRUB partition at {1}' .format(storage_driver.storagerouter.ip, partition.path)) if storage_driver.storagerouter not in scrub_locations: try: _ = SSHClient(storage_driver.storagerouter) scrub_locations[ storage_driver.storagerouter] = str( partition.path) except UnableToConnectException: logger.warning( 'Gather Scrub - Storage Router {0:<15} is not reachable' .format(storage_driver.storagerouter.ip)) if len(scrub_locations) == 0: raise RuntimeError('No scrub locations found') vdisk_guids = set() for vmachine in VMachineList.get_customer_vmachines(): for vdisk in vmachine.vdisks: if vdisk.info['object_type'] == 'BASE': vdisk_guids.add(vdisk.guid) for vdisk in VDiskList.get_without_vmachine(): if vdisk.info['object_type'] == 'BASE': vdisk_guids.add(vdisk.guid) logger.info( 'Gather Scrub - Checking {0} volumes for scrub work'.format( len(vdisk_guids))) local_machineid = System.get_my_machine_id() local_storage_router = None local_scrub_location = None local_vdisks_to_scrub = [] result_set = ResultSet([]) storage_router_list = [] for index, scrub_info in enumerate(scrub_locations.items()): start_index = index * len(vdisk_guids) / len(scrub_locations) end_index = (index + 1) * len(vdisk_guids) / len(scrub_locations) storage_router = scrub_info[0] vdisk_guids_to_scrub = list(vdisk_guids)[start_index:end_index] local = storage_router.machine_id == local_machineid logger.info( 'Gather Scrub - Storage Router {0:<15} ({1}) - Scrubbing {2} virtual disks' .format(storage_router.ip, 'local' if local is True else 'remote', len(vdisk_guids_to_scrub))) if local is True: local_storage_router = storage_router local_scrub_location = scrub_info[1] local_vdisks_to_scrub = vdisk_guids_to_scrub else: result_set.add( ScheduledTaskController._execute_scrub_work.s( scrub_location=scrub_info[1], vdisk_guids=vdisk_guids_to_scrub).apply_async( routing_key='sr.{0}'.format( storage_router.machine_id))) storage_router_list.append(storage_router) # Remote tasks have been launched, now start the local task and then wait for remote tasks to finish processed_guids = [] if local_scrub_location is not None and len(local_vdisks_to_scrub) > 0: try: processed_guids = ScheduledTaskController._execute_scrub_work( scrub_location=local_scrub_location, vdisk_guids=local_vdisks_to_scrub) except Exception as ex: logger.error( 'Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}' .format(local_storage_router.ip, ex)) all_results = result_set.join( propagate=False ) # Propagate False makes sure all jobs are waited for even when 1 or more jobs fail for index, result in enumerate(all_results): if isinstance(result, list): processed_guids.extend(result) else: logger.error( 'Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}' .format(storage_router_list[index].ip, result)) if len(processed_guids) != len(vdisk_guids) or set( processed_guids).difference(vdisk_guids): raise RuntimeError('Scrubbing failed for 1 or more storagerouters') logger.info('Gather Scrub - Finished')