def make_group(number): group = Group.objects.create(reference=unicode(number)) outof = number * 100 for index in range(1, outof): schedule(fast_job, args=[number, index, outof], group=group) last = schedule(make_group, args=[number+1]) group.on_completion(last)
def make_group(number): group = Group.objects.create(reference=unicode(number)) outof = number * 100 for index in range(1, outof): schedule(fast_job, args=[number, index, outof], group=group) last = schedule(make_group, args=[number + 1]) group.on_completion(last)
def pull_down(model, callback, callback_kwargs=None, **kwargs): """Start a job pulling data from latest to beginning instance. """ if callback_kwargs is None: callback_kwargs = {} kwargs['callback_kwargs'] = callback_kwargs kwargs['delay'] = 0 schedule('pubsubpull.async.pull_monitor', args=[model, callback], kwargs=kwargs)
def pull(model, callback, **kwargs): """Start a job pulling data from one service to this one. """ if kwargs.has_key('pull_priority'): schedule('pubsubpull.async.pull_monitor', args=[model, callback], kwargs=kwargs, priority=kwargs['pull_priority']) else: schedule('pubsubpull.async.pull_monitor', args=[model, callback], kwargs=kwargs)
def pull_up(model, callback, callback_kwargs=None, **kwargs): """Start a job monitoring new instance from latest instance. """ if callback_kwargs is None: callback_kwargs = {} kwargs['callback_kwargs'] = callback_kwargs model_instance = get_model(model) instance_url = model_instance._operations['instances'] _, json_data = get(instance_url) kwargs['floor'] = json_data['page'][0]['pk'] if json_data['page'] else 0 schedule('pubsubpull.async.pull_monitor', args=[model, callback], kwargs=kwargs)
def test_final_when_added_first(self): self.j1 = schedule(do_job) self.group = Group.objects.create(reference='final-job', final=self.j1) self.j2 = schedule(do_job, group=self.group) self.j3 = schedule(do_job, group=self.group) management.call_command('flush_queue') j1 = Job.objects.get(pk=self.j1.pk) j2 = Job.objects.get(pk=self.j2.pk) j3 = Job.objects.get(pk=self.j3.pk) self.assertLess(j2.executed, j3.executed) self.assertLess(j3.executed, j1.executed)
def pull(model, callback, callback_kwargs=None, **kwargs): """Start a job pulling data from one service to this one. """ if callback_kwargs is None: callback_kwargs = {} kwargs['callback_kwargs'] = callback_kwargs if 'pull_priority' in kwargs: schedule('pubsubpull.async.pull_monitor', args=[model, callback], kwargs=kwargs, priority=kwargs['pull_priority']) else: schedule('pubsubpull.async.pull_monitor', args=[model, callback], kwargs=kwargs)
def run_spider(node_name, project_name, spider): domain = NODES[node_name] r = api.schedule(domain, project_name, spider) if r.get("status") != "ok": print("run spider %s failed" % spider) else: print("run spider %s success, jobid is %s" % (spider, r["jobid"]))
def schedule(*a, **kw): """Wrapper for async.schedule.schedule that allow coverage. """ # Redefining name 'schedule' from outer scope # pylint: disable=W0621 from async.api import schedule return schedule(*a, **kw)
def async_monitor(update_log_url, update_log_model_url): """ schedule jobs in ChangeSubscription model corresponding to update_log_model_url """ json_data = _get_data_from_slumber(update_log_url) relative_instance_url = json_data['fields']['instance_url']['data'] table = json_data['fields']['table']['data'] base_url = _get_base_url(from_slumber_scheme(update_log_url)) instance_url = urljoin(base_url, relative_instance_url) subscriptions = ChangeSubscription.objects.filter( update_log_model_url=update_log_model_url, table=table) for subscription in subscriptions: schedule(subscription.callback, args=[instance_url], kwargs=json.loads(subscription.callback_kwargs))
def async_monitor(update_log_url, update_log_model_url): """ schedule jobs in ChangeSubscription model corresponding to update_log_model_url """ json_data = _get_data_from_slumber(update_log_url) relative_instance_url = json_data['fields']['instance_url']['data'] table = json_data['fields']['table']['data'] base_url = _get_base_url(from_slumber_scheme(update_log_url)) instance_url = urljoin(base_url, relative_instance_url) subscriptions = ChangeSubscription.objects.filter( update_log_model_url=update_log_model_url, table=table ) for subscription in subscriptions: schedule(subscription.callback, args=[instance_url], kwargs=json.loads(subscription.callback_kwargs))
def test_health_for_errors(self): dt_now = timezone.now() job1 = api.schedule('job-1', group=None) error1 = Error.objects.create(job=job1, executed=dt_now, exception="First", traceback="None") dt_now = dt_now + datetime.timedelta(seconds=10) error2 = Error.objects.create(job=job1, executed=dt_now, exception="Second", traceback="None") queue_errors = api.health().get('errors', None) self.assertEquals(queue_errors['number'], 2)
def pull_monitor(model_url, callback, delay=dict(minutes=1), page_url=None, floor=0, pull_priority=5, job_priority=5): """Used to look for instances that need to be pulled. This only works with models who use an auto-incremented primary key. """ if not page_url: model = get_model(model_url) instances_url = model._operations['instances'] else: instances_url = page_url _, json = get(instances_url or page_url) latest, highest = None, floor for item in json['page']: highest = max(item['pk'], highest) latest = item['pk'] if latest > floor: schedule(callback, args=[urljoin(instances_url, item['data'])], priority=job_priority) if json.has_key('next_page') and latest > floor: schedule('pubsubpull.async.pull_monitor', args=[model_url, callback], kwargs=dict(delay=delay, floor=floor, page_url=urljoin(instances_url, json['next_page']), pull_priority=pull_priority, job_priority=job_priority), priority=pull_priority) print("Got another page to process", json['next_page'], floor) if not page_url: run_after = timezone.now() + timedelta(**delay) schedule('pubsubpull.async.pull_monitor', run_after=run_after, args=[model_url, callback], kwargs=dict(delay=delay, floor=highest, pull_priority=pull_priority, job_priority=job_priority), priority=pull_priority) print("Looking for new instances above", highest)
def pull_monitor(model_url, callback, delay=None, page_url=None, floor=0, pull_priority=5, job_priority=5, callback_kwargs=None): """Used to look for instances that need to be pulled. This only works with models who use an auto-incremented primary key. """ if callback_kwargs is None: callback_kwargs = {} if delay is None: delay = dict(minutes=1) if not page_url: model = get_model(model_url) instances_url = model._operations['instances'] else: instances_url = page_url _, json = get(instances_url or page_url) latest, highest = None, floor for item in json['page']: highest = max(item['pk'], highest) latest = item['pk'] if latest > floor: schedule(callback, args=[urljoin(instances_url, item['data'])], kwargs=callback_kwargs, priority=job_priority) if 'next_page' in json and latest > floor: schedule('pubsubpull.async.pull_monitor', args=[model_url, callback], kwargs=dict(callback_kwargs=callback_kwargs, delay=delay, floor=floor, job_priority=job_priority, page_url=urljoin(instances_url, json['next_page']), pull_priority=pull_priority), priority=pull_priority) print("Got another page to process", json['next_page'], floor) if not page_url and delay: run_after = timezone.now() + timedelta(**delay) schedule('pubsubpull.async.pull_monitor', args=[model_url, callback], run_after=run_after, priority=pull_priority, kwargs=dict(callback_kwargs=callback_kwargs, delay=delay, floor=highest, job_priority=job_priority, pull_priority=pull_priority)) print("Looking for new instances above", highest)
def create_job(jid, group=None): job = api.schedule('job-%s' % jid, group=group) Error.objects.create(job=job, exception='Error', traceback='code stack') return job
def test_only_has_final_job(self): self.j1 = schedule(do_job) self.group = Group.objects.create(reference='final-job', final=self.j1) management.call_command('flush_queue') j1 = Job.objects.get(pk=self.j1.pk) self.assertIsNotNone(j1.executed)
#!/usr/bin/env python import os import sys os.environ.setdefault("DJANGO_SETTINGS_MODULE", "django_1_4.settings") # Add the top level to the path so we can find async_exec sys.path.append('../..') from async .api import schedule from async .models import Group def fast_job(*args): pass def make_group(number): group = Group.objects.create(reference=unicode(number)) outof = number * 100 for index in range(1, outof): schedule(fast_job, args=[number, index, outof], group=group) last = schedule(make_group, args=[number + 1]) group.on_completion(last) if __name__ == "__main__": schedule('speed.make_group', args=[1])
def setUp(self): self.group = Group.objects.create(reference='1of2') self.j1 = schedule(do_job, group=self.group) self.j2 = schedule(do_job, group=self.group)
#!/usr/bin/env python import os import sys os.environ.setdefault("DJANGO_SETTINGS_MODULE", "django_1_4.settings") # Add the top level to the path so we can find async_exec sys.path.append('../..') from async.api import schedule from async.models import Group def fast_job(*args): pass def make_group(number): group = Group.objects.create(reference=unicode(number)) outof = number * 100 for index in range(1, outof): schedule(fast_job, args=[number, index, outof], group=group) last = schedule(make_group, args=[number+1]) group.on_completion(last) if __name__ == "__main__": schedule('speed.make_group', args=[1])