def test_group_to_chord(self): c = ( self.add.s(5) | group([self.add.s(i, i) for i in range(5)], app=self.app) | self.add.s(10) | self.add.s(20) | self.add.s(30) ) c._use_link = True tasks, results = c.prepare_steps((), c.tasks) assert tasks[-1].args[0] == 5 assert isinstance(tasks[-2], chord) assert len(tasks[-2].tasks) == 5 body = tasks[-2].body assert len(body.tasks) == 3 assert body.tasks[0].args[0] == 10 assert body.tasks[1].args[0] == 20 assert body.tasks[2].args[0] == 30 c2 = self.add.s(2, 2) | group(self.add.s(i, i) for i in range(10)) c2._use_link = True tasks2, _ = c2.prepare_steps((), c2.tasks) assert isinstance(tasks2[0], group)
def test_group_to_chord(self): c = ( self.add.s(5) | group([self.add.s(i, i) for i in range(5)], app=self.app) | self.add.s(10) | self.add.s(20) | self.add.s(30) ) c._use_link = True tasks, results = c.prepare_steps((), c.tasks) assert tasks[-1].args[0] == 5 assert isinstance(tasks[-2], chord) assert len(tasks[-2].tasks) == 5 assert tasks[-2].parent_id == tasks[-1].id assert tasks[-2].root_id == tasks[-1].id assert tasks[-2].body.args[0] == 10 assert tasks[-2].body.parent_id == tasks[-2].id assert tasks[-3].args[0] == 20 assert tasks[-3].root_id == tasks[-1].id assert tasks[-3].parent_id == tasks[-2].body.id assert tasks[-4].args[0] == 30 assert tasks[-4].parent_id == tasks[-3].id assert tasks[-4].root_id == tasks[-1].id assert tasks[-2].body.options['link'] assert tasks[-2].body.options['link'][0].options['link'] c2 = self.add.s(2, 2) | group(self.add.s(i, i) for i in range(10)) c2._use_link = True tasks2, _ = c2.prepare_steps((), c2.tasks) assert isinstance(tasks2[0], group)
def destroy(self, request, **kwargs): app = self.get_object() app.destroy() group( *[tasks.converge_formation.si(app.formation), tasks.converge_controller.si()] # @UndefinedVariable ).apply_async().join() # @UndefinedVariable return Response(status=status.HTTP_204_NO_CONTENT)
def test_single_task(self): g = group([self.add.s(1, 1)]) assert isinstance(g, group) assert len(g.tasks) == 1 g = group(self.add.s(1, 1)) assert isinstance(g, group) assert len(g.tasks) == 1
def scale_layers(self, **kwargs): """Scale layers up or down to match requested.""" layers = self.layers.copy() funcs = [] for layer_id, requested in layers.items(): layer = self.layer_set.get(id=layer_id) nodes = list(layer.node_set.all().order_by('created')) diff = requested - len(nodes) if diff == 0: continue while diff < 0: node = nodes.pop(0) funcs.append(node.terminate) diff = requested - len(nodes) while diff > 0: node = Node.objects.new(self, layer) nodes.append(node) funcs.append(node.launch) diff = requested - len(nodes) # http://docs.celeryproject.org/en/latest/userguide/canvas.html#groups job = [func() for func in funcs] # balance containers containers_balanced = self._balance_containers() # launch/terminate nodes in parallel if job: group(*job).apply_async().join() # once nodes are in place, recalculate the formation and update the data bag databag = self.calculate() # force-converge nodes if there were changes if job or containers_balanced: self.converge(databag) # save the formation with updated layers self.save() return databag
def test_group_to_chord(self): c = ( self.add.s(5) | group([self.add.s(i, i) for i in range(5)], app=self.app) | self.add.s(10) | self.add.s(20) | self.add.s(30) ) c._use_link = True tasks, results = c.prepare_steps((), c.tasks) self.assertEqual(tasks[-1].args[0], 5) self.assertIsInstance(tasks[-2], chord) self.assertEqual(len(tasks[-2].tasks), 5) self.assertEqual(tasks[-2].parent_id, tasks[-1].id) self.assertEqual(tasks[-2].root_id, tasks[-1].id) self.assertEqual(tasks[-2].body.args[0], 10) self.assertEqual(tasks[-2].body.parent_id, tasks[-2].id) self.assertEqual(tasks[-3].args[0], 20) self.assertEqual(tasks[-3].root_id, tasks[-1].id) self.assertEqual(tasks[-3].parent_id, tasks[-2].body.id) self.assertEqual(tasks[-4].args[0], 30) self.assertEqual(tasks[-4].parent_id, tasks[-3].id) self.assertEqual(tasks[-4].root_id, tasks[-1].id) self.assertTrue(tasks[-2].body.options['link']) self.assertTrue(tasks[-2].body.options['link'][0].options['link']) c2 = self.add.s(2, 2) | group(self.add.s(i, i) for i in range(10)) c2._use_link = True tasks2, _ = c2.prepare_steps((), c2.tasks) self.assertIsInstance(tasks2[0], group)
def converge_formation(formation): nodes = formation.node_set.all() subtasks = [] for n in nodes: subtask = converge_node.si(n) subtasks.append(subtask) group(*subtasks).apply_async().join()
def test_run_task_groups(self): # define mocks mock_task_recs = [ {"store_id": 0, "context": self.context}, {"store_id": 1, "context": self.context} ] mock_results = [ {"requires_gp7": False, "trade_area_id": "billy"}, {"requires_gp7": True, "trade_area_id": "willy"} ] mock_task = self.mox.CreateMockAnything() mock_group = self.mox.CreateMockAnything() mock_group_results = self.mox.CreateMockAnything() # stub out some stuff self.mox.StubOutWithMock(canvas, "group") # begin recording mock_task.s(mock_task_recs[0]).AndReturn("shalom") mock_task.s(mock_task_recs[1]).AndReturn("hello") canvas.group(["shalom", "hello"]).AndReturn(mock_group) mock_group().AndReturn(mock_group_results) mock_group_results.get(propagate=False).AndReturn(mock_results) # replay all self.mox.ReplayAll() # go! results = run_task_groups(mock_task_recs, mock_task) # make sure results are correct self.assertEqual(results, mock_results)
def destroy_formation(formation): app_tasks = [destroy_app.si(a) for a in formation.app_set.all()] node_tasks = [destroy_node.si(n) for n in formation.node_set.all()] layer_tasks = [destroy_layer.si(l) for l in formation.layer_set.all()] group(app_tasks + node_tasks).apply_async().join() group(layer_tasks).apply_async().join() CM.purge_formation(formation.flat()) formation.delete()
def converge(self, **kwargs): databag = self.publish() nodes = self.node_set.all() subtasks = [] for n in nodes: subtask = tasks.converge_node.si(n) subtasks.append(subtask) group(*subtasks).apply_async().join() return databag
def destroy(self, *args, **kwargs): app_tasks = [tasks.destroy_app.si(a) for a in self.app_set.all()] node_tasks = [tasks.destroy_node.si(n) for n in self.node_set.all()] layer_tasks = [tasks.destroy_layer.si(l) for l in self.layer_set.all()] group(app_tasks + node_tasks).apply_async().join() group(layer_tasks).apply_async().join() CM.purge_formation(self.flat()) self.delete() tasks.converge_controller.apply_async().wait()
def destroy(self, *args, **kwargs): for app in self.app_set.all(): app.destroy() node_tasks = [tasks.destroy_node.si(n) for n in self.node_set.all()] layer_tasks = [tasks.destroy_layer.si(l) for l in self.layer_set.all()] group(node_tasks).apply_async().join() group(layer_tasks).apply_async().join() CM.purge_formation(self.flat()) self.delete()
def load_feeds(user=None): """ updates all feeds (if a user is given, only the feeds of this user are updatet :param user: user which feeds should be updated, all user if not set """ user_feeds = UserFeed.objects.all() if user: user_feeds = user_feeds.filter(user=user) feeds = set(user_feed.feed for user_feed in user_feeds) logger.debug('found %s feeds to update for user %s.', len(feeds), user) group(load_feed_task.s(feed) for feed in feeds).delay()
def converge(self, controller=False, **kwargs): databag = self.publish() nodes = self.node_set.all() subtasks = [] for n in nodes: subtask = tasks.converge_node.si(n) subtasks.append(subtask) if controller is True: subtasks.append(tasks.converge_controller.si()) group(*subtasks).apply_async().join() return databag
def handle(self, *args, **options): Logger = namedtuple('Logger', 'info, error') log = Logger(info=self.stdout.write, error=self.stderr.write) if options['all'] or options['locale']: filters = {} if options['locale'] and not options['all']: locale = options['locale'] log.info('Publishing all documents in locale {}'.format(locale)) filters.update(locale=locale) else: locale = None log.info('Publishing all documents') chunk_size = max(options['chunk_size'], 1) docs = Document.objects.filter(**filters) doc_pks = docs.values_list('id', flat=True) num_docs = len(doc_pks) num_tasks = int(ceil(num_docs / float(chunk_size))) log.info('...found {} documents.'.format(num_docs)) # Let's publish the documents in a group of chunks, where the # tasks in the group can be run in parallel. tasks = [] for i, chunk in enumerate(chunked(doc_pks, chunk_size)): message = 'Published chunk #{} of {}'.format(i + 1, num_tasks) tasks.append(publish.si(chunk, completion_message=message)) if num_tasks == 1: msg = ('Launching a single task handling ' 'all {} documents.'.format(num_docs)) else: msg = ('Launching {} paralellizable tasks, each handling ' 'at most {} documents.'.format(num_tasks, chunk_size)) log.info(msg) group(*tasks).apply_async() else: paths = options['paths'] if not paths: raise CommandError('Need at least one document path to publish') doc_pks = [] get_doc_pk = Document.objects.values_list('id', flat=True).get for path in paths: if path.startswith('/'): path = path[1:] locale, sep, slug = path.partition('/') head, sep, tail = slug.partition('/') if head == 'docs': slug = tail try: doc_pk = get_doc_pk(locale=locale, slug=slug) except Document.DoesNotExist: msg = 'Document with locale={} and slug={} does not exist' log.error(msg.format(locale, slug)) else: doc_pks.append(doc_pk) publish(doc_pks, log=log)
def scale(self, **kwargs): # noqa """Scale containers up or down to match requested.""" requested_containers = self.structure.copy() release = self.release_set.latest() # test for available process types available_process_types = release.build.procfile or {} for container_type in requested_containers.keys(): if container_type == 'cmd': continue # allow docker cmd types in case we don't have the image source if container_type not in available_process_types: raise EnvironmentError( 'Container type {} does not exist in application'.format(container_type)) msg = 'containers scaled ' + ' '.join( "{}={}".format(k, v) for k, v in requested_containers.items()) # iterate and scale by container type (web, worker, etc) changed = False to_add, to_remove = [], [] for container_type in requested_containers.keys(): containers = list(self.container_set.filter(type=container_type).order_by('created')) # increment new container nums off the most recent container results = self.container_set.filter(type=container_type).aggregate(Max('num')) container_num = (results.get('num__max') or 0) + 1 requested = requested_containers.pop(container_type) diff = requested - len(containers) if diff == 0: continue changed = True while diff < 0: c = containers.pop() to_remove.append(c) diff += 1 while diff > 0: c = Container.objects.create(owner=self.owner, app=self, release=release, type=container_type, num=container_num) to_add.append(c) container_num += 1 diff -= 1 if changed: subtasks = [] if to_add: subtasks.append(tasks.start_containers.s(to_add)) if to_remove: subtasks.append(tasks.stop_containers.s(to_remove)) group(*subtasks).apply_async().join() log_event(self, msg) self.log(msg) return changed
def test_run_geoprocessing_and_cci_update__success(self): self.plan_b.banner_ids = "banner_ids" self.plan_b.context = "context" self.mox.StubOutWithMock(canvas, "group") input_rec = { "company_ids": self.plan_b.banner_ids, "context": self.plan_b.context } group_mock = self.mox.CreateMockAnything() canvas.group([ self.plan_b.core_tasks[self.plan_b.gp_workflow_name].s(input_rec).AndReturn(None), self.plan_b.core_tasks[self.plan_b.cci_workflow_name].s(input_rec).AndReturn(None) ]).AndReturn(group_mock) result_mock = self.mox.CreateMockAnything() group_mock().AndReturn(result_mock) gp7res = {"succeeded": [], "failed": []} gp9res = {"succeeded": [], "failed": []} gp14res = {"succeeded": [], "failed": []} gp16res = {"succeeded": [], "failed": []} cci_res = {"succeeded": [], "failed": []} results = [ { "gp7_results": gp7res, "gp9_results": gp9res, "gp14_results": gp14res, "gp16_results": gp16res }, cci_res ] result_mock.get().AndReturn(results) # replay all self.mox.ReplayAll() CompanyAnalyticsPlanB._run_geoprocessing_and_cci_update(self.plan_b) self.assertDictEqual(self.plan_b.gp7_results, gp7res) self.assertDictEqual(self.plan_b.gp9_results, gp9res) self.assertDictEqual(self.plan_b.gp14_results, gp14res) self.assertDictEqual(self.plan_b.gp16_results, gp16res) self.assertDictEqual(self.plan_b.cci_results, cci_res)
def test_group_to_chord__protocol_2(self): c = ( group([self.add.s(i, i) for i in range(5)], app=self.app) | self.add.s(10) | self.add.s(20) | self.add.s(30) ) c._use_link = False tasks, _ = c.prepare_steps((), c.tasks) self.assertIsInstance(tasks[-1], chord) c2 = self.add.s(2, 2) | group(self.add.s(i, i) for i in range(10)) c2._use_link = False tasks2, _ = c2.prepare_steps((), c2.tasks) self.assertIsInstance(tasks2[0], group)
def test_group_to_chord__protocol_2(self): c = chain( group([self.add.s(i, i) for i in range(5)], app=self.app), self.add.s(10), self.add.s(20), self.add.s(30) ) assert isinstance(c, chord) assert isinstance(c.body, _chain) assert len(c.body.tasks) == 3 c2 = self.add.s(2, 2) | group(self.add.s(i, i) for i in range(10)) c2._use_link = False tasks2, _ = c2.prepare_steps((), c2.tasks) assert isinstance(tasks2[0], group)
def _deploy_all(deployment, search_params, next_task=None): """ Deploys all services for a given deployment :param deployment: Deployment parameters :type deployment: dict :return: Result of execution of next tasj """ security_profile = deployment.get('security', {})\ .get('profile', 'default') app_template = deployment['templates']['app'] if not app_template['enabled']: return [] name, version, nodes = deployment['deployment']['name'], \ deployment['deployment']['version'], \ deployment['deployment']['nodes'] return chord( group( _fleet_deploy.si(search_params, name, version, nodes, service_type, template, security_profile) for service_type, template in deployment['templates'].items() if template['enabled'] ), _fleet_start_and_wait.si(deployment, search_params, next_task=next_task), options=DEFAULT_CHORD_OPTIONS )()
def assert_group_to_chord_parent_ids(self, freezefun): c = ( self.add.s(5, 5) | group([self.add.s(i, i) for i in range(5)], app=self.app) | self.add.si(10, 10) | self.add.si(20, 20) | self.add.si(30, 30) ) tasks = freezefun(c) self.assertEqual(tasks[-1].parent_id, 'foo') self.assertEqual(tasks[-1].root_id, 'root') self.assertEqual(tasks[-2].parent_id, tasks[-1].id) self.assertEqual(tasks[-2].root_id, 'root') self.assertEqual(tasks[-2].body.parent_id, tasks[-2].tasks.id) self.assertEqual(tasks[-2].body.parent_id, tasks[-2].id) self.assertEqual(tasks[-2].body.root_id, 'root') self.assertEqual(tasks[-2].tasks.tasks[0].parent_id, tasks[-1].id) self.assertEqual(tasks[-2].tasks.tasks[0].root_id, 'root') self.assertEqual(tasks[-2].tasks.tasks[1].parent_id, tasks[-1].id) self.assertEqual(tasks[-2].tasks.tasks[1].root_id, 'root') self.assertEqual(tasks[-2].tasks.tasks[2].parent_id, tasks[-1].id) self.assertEqual(tasks[-2].tasks.tasks[2].root_id, 'root') self.assertEqual(tasks[-2].tasks.tasks[3].parent_id, tasks[-1].id) self.assertEqual(tasks[-2].tasks.tasks[3].root_id, 'root') self.assertEqual(tasks[-2].tasks.tasks[4].parent_id, tasks[-1].id) self.assertEqual(tasks[-2].tasks.tasks[4].root_id, 'root') self.assertEqual(tasks[-3].parent_id, tasks[-2].body.id) self.assertEqual(tasks[-3].root_id, 'root') self.assertEqual(tasks[-4].parent_id, tasks[-3].id) self.assertEqual(tasks[-4].root_id, 'root')
def run(self, header, body, partial_args=(), interval=None, countdown=1, max_retries=None, propagate=None, eager=False, **kwargs): app = self.app propagate = default_propagate if propagate is None else propagate group_id = uuid() AsyncResult = app.AsyncResult prepare_member = self._prepare_member # - convert back to group if serialized tasks = header.tasks if isinstance(header, group) else header header = group([ maybe_signature(s, app=app).clone() for s in tasks ]) # - eager applies the group inline if eager: return header.apply(args=partial_args, task_id=group_id) results = [AsyncResult(prepare_member(task, body, group_id)) for task in header.tasks] # - fallback implementations schedules the chord_unlock task here app.backend.on_chord_apply(group_id, body, interval=interval, countdown=countdown, max_retries=max_retries, propagate=propagate, result=results) # - call the header group, returning the GroupResult. final_res = header(*partial_args, task_id=group_id) return final_res
def test_kwargs_apply_async(self): self.app.conf.task_always_eager = True x = group([self.add.s(), self.add.s()]) res = self.helper_test_get_delay( x.apply_async(kwargs={'x': 1, 'y': 1}) ) assert res == [2, 2]
def test_link_error(self): g1 = group(Mock(name='t1'), Mock(name='t2'), app=self.app) sig = Mock(name='sig') g1.link_error(sig) g1.tasks[0].link_error.assert_called_with( sig.clone().set(immutable=True), )
def run(self, header, body, partial_args=(), interval=1, max_retries=None, propagate=False, eager=False, **kwargs): group_id = uuid() AsyncResult = self.app.AsyncResult prepare_member = self._prepare_member # - convert back to group if serialized if not isinstance(header, group): header = group(map(maybe_subtask, header)) # - eager applies the group inline if eager: return header.apply(args=partial_args, task_id=group_id) results = [AsyncResult(prepare_member(task, body, group_id)) for task in header.tasks] # - fallback implementations schedules the chord_unlock task here app.backend.on_chord_apply(group_id, body, interval=interval, max_retries=max_retries, propagate=propagate, result=results) # - call the header group, returning the GroupResult. # XXX Python 2.5 doesn't allow kwargs after star-args. return header(*partial_args, **{'task_id': group_id})
def recover_cluster(self, recovery_params): """ Recovers the cluster by re-scheduling deployments :param recovery_params: Parameters for recovering cluster :type recovery_params: dict :return: GroupResult """ logger.info('Begin Cluster recovery for: {}'.format(recovery_params)) state = recovery_params.get('state', DEPLOYMENT_STATE_PROMOTED) deployments = get_store().filter_deployments( state=state, name=recovery_params.get('name'), version=recovery_params.get('version'), exclude_names=recovery_params.get('exclude-names') ) return chord( group(create.si(clone_deployment(deployment)) for deployment in deployments), async_wait.s( default_retry_delay=TASK_SETTINGS['DEPLOYMENT_WAIT_RETRY_DELAY'], max_retries=TASK_SETTINGS['DEPLOYMENT_WAIT_RETRIES']), options=DEFAULT_CHORD_OPTIONS ).delay()
def run(self, header, body, partial_args=(), interval=None, countdown=1, max_retries=None, propagate=None, eager=False, **kwargs): app = self.app propagate = default_propagate if propagate is None else propagate group_id = uuid() AsyncResult = app.AsyncResult prepare_member = self._prepare_member # - convert back to group if serialized tasks = header.tasks if isinstance(header, group) else header header = group([ maybe_signature(s, app=app).clone() for s in tasks ]) # - eager applies the group inline if eager: return header.apply(args=partial_args, task_id=group_id) results = [AsyncResult(prepare_member(task, body, group_id)) for task in header.tasks] return self.backend.apply_chord( header, partial_args, group_id, body, interval=interval, countdown=countdown, max_retries=max_retries, propagate=propagate, result=results, )
def _fleet_start_and_wait(deployment, search_params, next_task=None): """ Starts the units for the deployment and performs an asynchronous wait for all unit states to reach running state. :param deployment: Deployment parameters. :type deployment: dict :param search_params: Search parameters :type search_params: dict :return: """ name, version, nodes = deployment['deployment']['name'], \ deployment['deployment']['version'], \ deployment['deployment']['nodes'] if not deployment['schedule']: service_types = {service_type for service_type, template in deployment['templates'].items() if template['enabled']} else: service_types = {'timer'} min_nodes = deployment['deployment'].get('check', {}).get( 'min-nodes', nodes) templates = deployment['templates'] return chord( group( _fleet_start.si(search_params, name, version, nodes, service_type, templates[service_type]) for service_type in service_types ), _fleet_check_deploy.si(name, version, len(service_types), min_nodes, search_params, next_task=next_task), options=DEFAULT_CHORD_OPTIONS )()
def generate_tracks_import(tracks_list): # TODO Generate random folder folder = 'tmp' print tracks_list # for every list in file_list create a scrape. Do a pipe? files_generated = group((scrape_track.s(track, folder) for track in tracks_list)) (files_generated | generate_report.s())() return True
def test_group_to_chord__protocol_2__or(self): c = ( group([self.add.s(i, i) for i in range(5)], app=self.app) | self.add.s(10) | self.add.s(20) | self.add.s(30) ) assert isinstance(c, chord)
def test_kwargs_apply(self): x = group([self.add.s(), self.add.s()]) res = x.apply(kwargs=dict(x=1, y=1)).get() assert res == [2, 2]
def test_set_parent_id(self): x = chord(group(self.add.s(2, 2))) x.tasks = [self.add.s(2, 2)] x.set_parent_id('pid')
def test_reverse(self): x = group([add.s(2, 2), add.s(4, 4)]) self.assertIsInstance(subtask(x), group) self.assertIsInstance(subtask(dict(x)), group)
def test_call_empty_group(self): x = group() self.assertFalse(len(x()))
def test_repr(self): x = group([self.add.s(2, 2), self.add.s(4, 4)]) self.assertEqual(repr(x), repr(x.tasks))
def test_iter(self): g = group([self.add.s(i, i) for i in range(10)]) self.assertListEqual(list(iter(g)), g.tasks)
def test_group_to_chord__protocol_2__or(self): c = (group([self.add.s(i, i) for i in range(5)], app=self.app) | self.add.s(10) | self.add.s(20) | self.add.s(30)) assert isinstance(c, chord)
def test_kwargs_apply(self): x = group([self.add.s(), self.add.s()]) res = x.apply(kwargs={'x': 1, 'y': 1}).get() assert res == [2, 2]
def test_iter(self): g = group([self.add.s(i, i) for i in range(10)]) assert list(iter(g)) == list(g.keys())
def test_skew(self): g = group([self.add.s(i, i) for i in range(10)]) g.skew(start=1, stop=10, step=1) for i, task in enumerate(g.tasks): assert task.options['countdown'] == i + 1
def test_call_empty_group(self): x = group(app=self.app) assert not len(x()) x.delay() x.apply_async() x()
def test_from_dict(self): x = group([self.add.s(2, 2), self.add.s(4, 4)]) x['args'] = (2, 2) assert group.from_dict(dict(x)) x['args'] = None assert group.from_dict(dict(x))
def test_apply_empty(self): x = group(app=self.app) x.apply() res = x.apply_async() assert res assert not res.results
def test_group_in_group(self): g1 = group(self.add.s(2, 2), self.add.s(4, 4), app=self.app) g2 = group(self.add.s(8, 8), g1, self.add.s(16, 16), app=self.app) g2.apply_async()
def test_repr(self): x = group([self.add.s(2, 2), self.add.s(4, 4)]) assert repr(x)
def test_kwargs_delay_partial(self): self.app.conf.task_always_eager = True x = group([self.add.s(1), self.add.s(x=1)]) res = self.helper_test_get_delay(x.delay(y=1)) assert res == [2, 2]
def test_groups_in_chain_to_chord(self): g1 = group([self.add.s(2, 2), self.add.s(4, 4)]) g2 = group([self.add.s(3, 3), self.add.s(5, 5)]) c = g1 | g2 assert isinstance(c, chord)
def test_argument_is_group(self): x = chord(group(self.add.s(2, 2), self.add.s(4, 4), app=self.app)) assert x.tasks
def test_call_empty_group(self): x = group(app=self.app) self.assertFalse(len(x())) x.delay() x.apply_async() x()
def test_prepare_with_dict(self): x = group([self.add.s(4, 4), dict(self.add.s(8, 8))], app=self.app) x.apply_async()
def handle(self, *args, **options): """Identify parallel citations and save them as requested. This process proceeds in two phases. The first phase is to work through the entire corpus, identifying citations that occur very near to each other. These are considered parallel citations, and they are built into a graph data structure where citations are nodes and each parallel citation is an edge. The weight of each edge is determined by the number of times a parallel citation has been identified between two citations. This should solve problems like typos or other issues with our heuristic approach. The second phase of this process is to update the database with the high quality citations. This can only be done by matching the citations with actual items in the database and then updating them with parallel citations that are sufficiently likely to be good. """ super(Command, self).handle(*args, **options) no_option = not any([options.get("doc_id"), options.get("all")]) if no_option: raise CommandError( "Please specify if you want all items or a specific item.") if not options["update_database"]: logger.info( "--update_database is not set. No changes will be made to the " "database.") # Update Citation object to consider similar objects equal. self.monkey_patch_citation() logger.info("## Entering phase one: Building a network object of " "all citations.\n") q = Opinion.objects.all() if options.get("doc_id"): q = q.filter(pk__in=options["doc_id"]) count = q.count() opinions = queryset_generator(q, chunksize=10000) node_count = edge_count = completed = 0 subtasks = [] for o in opinions: subtasks.append( # This will call the second function with the results from the # first. get_document_citations.s(o) | identify_parallel_citations.s()) last_item = count == completed + 1 if (completed % 50 == 0) or last_item: job = group(subtasks) result = job.apply_async().join() [ self.add_groups_to_network(citation_groups) for citation_groups in result ] subtasks = [] completed += 1 if completed % 250 == 0 or last_item: # Only do this once in a while. node_count = len(self.g.nodes()) edge_count = len(self.g.edges()) sys.stdout.write("\r Completed %s of %s. (%s nodes, %s edges)" % (completed, count, node_count, edge_count)) sys.stdout.flush() logger.info("\n\n## Entering phase two: Saving the best edges to " "the database.\n\n") for sub_graph in nx.connected_component_subgraphs(self.g): self.handle_subgraph(sub_graph, options) logger.info("\n\n## Done. Added %s new citations." % self.update_count) self.do_solr(options)
def test_apply_async(self): x = group([self.add.s(4, 4), self.add.s(8, 8)]) x.apply_async()
def test_apply(self): x = group([self.add.s(4, 4), self.add.s(8, 8)]) res = x.apply() assert res.get(), [8 == 16]
def test_group_with_group_argument(self): g1 = group(self.add.s(2, 2), self.add.s(4, 4), app=self.app) g2 = group(g1, app=self.app) assert g2.tasks is g1.tasks
def handle(self, *args, **options): Logger = namedtuple('Logger', 'info, error') log = Logger(info=self.stdout.write, error=self.stderr.write) if options['all'] or options['locale']: if options['locale'] and options['all']: raise CommandError( 'Specifying --locale with --all is the same as --all') filters = {} if options['locale']: locale = options['locale'] log.info( 'Publishing all documents in locale {}'.format(locale)) filters.update(locale=locale) else: log.info('Publishing all documents') chunk_size = max(options['chunk_size'], 1) docs = Document.objects.filter(**filters) doc_pks = docs.values_list('id', flat=True) num_docs = len(doc_pks) num_tasks = int(ceil(num_docs / float(chunk_size))) log.info('...found {} documents.'.format(num_docs)) # Let's publish the documents in a group of chunks, where the # tasks in the group can be run in parallel. tasks = [] for i, chunk in enumerate(chunked(doc_pks, chunk_size)): message = 'Published chunk #{} of {}'.format(i + 1, num_tasks) tasks.append( publish.si(chunk, completion_message=message, invalidate_cdn_cache=False)) if num_tasks == 1: msg = ('Launching a single task handling ' 'all {} documents.'.format(num_docs)) else: msg = ('Launching {} paralellizable tasks, each handling ' 'at most {} documents.'.format(num_tasks, chunk_size)) log.info(msg) group(*tasks).apply_async() else: paths = options['paths'] if not paths: raise CommandError( 'Need at least one document path to publish') doc_pks = [] get_doc_pk = Document.objects.values_list('id', flat=True).get for path in paths: if path.startswith('/'): path = path[1:] locale, sep, slug = path.partition('/') head, sep, tail = slug.partition('/') if head == 'docs': slug = tail try: doc_pk = get_doc_pk(locale=locale, slug=slug) except Document.DoesNotExist: msg = 'Document with locale={} and slug={} does not exist' log.error(msg.format(locale, slug)) else: doc_pks.append(doc_pk) publish( doc_pks, log=log, invalidate_cdn_cache=(not options['skip_cdn_invalidation']))
def test_cannot_link_error_on_group(self): x = group([self.add.s(2, 2), self.add.s(4, 4)]) with pytest.raises(TypeError): x.apply_async(link_error=self.add.s(2, 2))
def test_kwargs_apply_async(self): self.app.conf.task_always_eager = True x = group([self.add.s(), self.add.s()]) res = self.helper_test_get_delay(x.apply_async(kwargs=dict(x=1, y=1))) assert res == [2, 2]
def test_reverse(self): x = group([self.add.s(2, 2), self.add.s(4, 4)]) assert isinstance(signature(x), group) assert isinstance(signature(dict(x)), group)
def test_set_immutable(self): g1 = group(Mock(name='t1'), Mock(name='t2'), app=self.app) g1.set_immutable(True) for task in g1.tasks: task.set_immutable.assert_called_with(True)