def test_uniqueTaskNamesGeneratedForMultipleContexts(self): startStateMachine(self.machineName, [{'a': '1'}, {'b': '2'}], _currentConfig=self.currentConfig, taskName='foo') self.assertTrue(self.getTask(0).name.startswith('foo')) self.assertTrue(self.getTask(1).name.startswith('foo')) self.assertTrue(self.getTask(0).name.endswith('0')) self.assertTrue(self.getTask(1).name.endswith('1'))
def test_correctUrlInTask(self): startStateMachine(self.machineName, {'a': '1'}, _currentConfig=self.currentConfig, method='POST') self.assertEquals( self.getTask(0).url, '/fantasm/fsm/%s/%s/%s/%s/' % (self.machineName, FSM.PSEUDO_INIT, FSM.PSEUDO_INIT, self.initialState.name))
def test_tasksQueuedForStartStateMachineWithNoTaskName(self): startStateMachine(self.machineName, [{ 'a': '1' }, { 'b': '2' }], _currentConfig=self.currentConfig) self.assertEquals(len(self.mockQueue.tasks), 2) self.assertNotEquals(self.getTask(0).name, self.getTask(1).name)
def test_startStateMachineIsIdempotent(self): startStateMachine(self.machineName, {'a': '1'}, _currentConfig=self.currentConfig, taskName='foo') self.assertEquals(len(self.mockQueue.tasks), 1) startStateMachine(self.machineName, {'a': '1'}, _currentConfig=self.currentConfig, taskName='foo') self.assertEquals(len(self.mockQueue.tasks), 1)
def test_countdownIncludedInTask(self): # having trouble mocking Task, so I'll dip into a private attribute right on task import time startStateMachine(self.machineName, {'a': '1'}, countdown=20, _currentConfig=self.currentConfig) self.assertTrue( time.time() + 20 - getattr(self.mockQueue.tasks[0][0], '_Task__eta_posix') < 0.01)
def test_tasksEnqueuedToStartMultipleMachines(self): startStateMachine(self.machineName, [{ 'a': '1' }, { 'b': '2' }, { 'c': '3' }], _currentConfig=self.currentConfig) self.assertEquals(len(self.mockQueue.tasks), 3)
def test_contextsAddedToTasks(self): startStateMachine(self.machineName, [{ 'a': '1' }, { 'b': '2' }], _currentConfig=self.currentConfig, method='GET') self.assertTrue('a=1' in self.getTask(0).url) self.assertTrue('b=2' in self.getTask(1).url)
def test_uniqueTaskNamesGeneratedForMultipleContexts(self): startStateMachine(self.machineName, [{ 'a': '1' }, { 'b': '2' }], _currentConfig=self.currentConfig, taskName='foo') self.assertTrue(self.getTask(0).name.startswith('foo')) self.assertTrue(self.getTask(1).name.startswith('foo')) self.assertTrue(self.getTask(0).name.endswith('0')) self.assertTrue(self.getTask(1).name.endswith('1'))
def test_taskEnqueuedToStartSingleMachine(self): startStateMachine(self.machineName, {'a': '1'}, _currentConfig=self.currentConfig) self.assertEquals(len(self.mockQueue.tasks), 1)
def test_taskNameIsUsedWhenQueuingTasks(self): startStateMachine(self.machineName, {'a': '1'}, _currentConfig=self.currentConfig, taskName='foo') self.assertTrue(self.getTask(0).name.startswith('foo'))
def test_countdownIncludedInTask(self): # having trouble mocking Task, so I'll dip into a private attribute right on task import time startStateMachine(self.machineName, {'a': '1'}, countdown=20, _currentConfig=self.currentConfig) self.assertTrue(time.time()+20 - getattr(self.mockQueue.tasks[0][0], '_Task__eta_posix') < 0.01)
def test_correctUrlInTask(self): startStateMachine(self.machineName, {'a': '1'}, _currentConfig=self.currentConfig, method='POST') self.assertEquals(self.getTask(0).url, '/fantasm/fsm/%s/%s/%s/%s/' % (self.machineName, FSM.PSEUDO_INIT, FSM.PSEUDO_INIT, self.initialState.name))
def test_correctMethodUsedToEnqueueTask(self): startStateMachine(self.machineName, {'a': '1'}, _currentConfig=self.currentConfig, method='GET') self.assertEquals(self.getTask(0).method, 'GET')
def test_contextsAddedToTasks(self): startStateMachine(self.machineName, [{'a': '1'}, {'b': '2'}], _currentConfig=self.currentConfig, method='GET') self.assertTrue('a=1' in self.getTask(0).url) self.assertTrue('b=2' in self.getTask(1).url)
def post(self): """ Generate data sets here """ if self.request.get("fsm_cleanup"): if fsm_calculate_run_time(): self.redirect('/grep') else: self.response.out.write("Error calculating fsm/grep") return if self.request.get("compute"): engine = self.request.get("engine") dataset = self.request.get("dataset") user = self.request.get('user') needle = self.request.get('needle') data = GrepDataSet.get_by_key_name(dataset) record = Record(engine_type=engine, dataset=dataset, benchmark="grep", num_entities=data.num_entries, #shard_count=data.num_pipelines, entries_per_pipe=data.entries_per_pipe, user=user, char_per_word=data.char_per_word, state="Running") if engine == "fsm": record.put() context = {} context['user'] = str(user) context['num_entries'] = int(data.num_entries) context['needle'] = needle fsm.startStateMachine('Grep', [context]) self.redirect('/grep') elif engine == "pipeline": mypipeline = GrepPipelineLoop(data.num_entries, needle) mypipeline.start() record.pipeline_id = mypipeline.pipeline_id record.put() self.redirect('/grep') #self.redirect(mypipeline.base_path + "/status?root=" + mypipeline.pipeline_id) return elif engine == "mr": # Why 1k each per shard or less? is this ideal? if data.num_entries > 1000: shards = data.num_entries/1000 shards = min(256, shards) else: shards = 1 kind = getKindString(data.num_entries) mapreduce_id = control.start_map( name="Grep", handler_spec="grep.mr.grep_mapper", reader_spec="mapreduce.input_readers.DatastoreInputReader", mapper_parameters={ "entity_kind": "data.grep."+kind, "processing_rate": 500, "needle":needle, }, mapreduce_parameters={model.MapreduceSpec.PARAM_DONE_CALLBACK: '/grep/mr/callback'}, shard_count=shards, queue_name="default", ) record.mr_id = mapreduce_id record.put() self.redirect('/grep')
def test_tasksQueuedForStartStateMachineWithNoTaskName(self): startStateMachine(self.machineName, [{'a': '1'}, {'b': '2'}], _currentConfig=self.currentConfig) self.assertEquals(len(self.mockQueue.tasks), 2) self.assertNotEquals(self.getTask(0).name, self.getTask(1).name)
def test_tasksEnqueuedToStartMultipleMachines(self): startStateMachine(self.machineName, [{'a': '1'}, {'b': '2'}, {'c': '3'}], _currentConfig=self.currentConfig) self.assertEquals(len(self.mockQueue.tasks), 3)
def post(self): if self.request.get("fsm_cleanup"): if fsm_calculate_run_time(): self.redirect("/subset") else: self.response.out.write("Error calculating run time of FSM/subset") if self.request.get("reset_fsm_count"): for c in SSFSMSimpleCounterShard.all(): c.delete() self.redirect('/subset') return if self.request.get("reset_mr_count"): for c in SSMRSimpleCounterShard.all(): c.delete() self.redirect('/subset') return if self.request.get("reset_pl_count"): for c in SSPLSimpleCounterShard.all(): c.delete() self.redirect('/subset') return if self.request.get("compute"): engine = self.request.get("engine") dataset = self.request.get("dataset") user = self.request.get('user') data = SubSetDataSet.get_by_key_name(dataset) record = Record(engine_type=engine, dataset=dataset, benchmark="subset", num_entities=data.num_entries, entries_per_pipe=data.entries_per_pipe, user=user, state="Running") if engine == "fsm": record.put() # reset count for c in SSFSMSimpleCounterShard.all(): c.delete() context = {} context['user'] = str(user) context['num_entries'] = int(data.num_entries) fsm.startStateMachine('SubSet', [context]) self.redirect('/subset') elif engine == "pipeline": for c in SSPLSimpleCounterShard.all(): c.delete() mypipeline = SubSetPipeline(data.num_entries) mypipeline.start() record.pipeline_id = mypipeline.pipeline_id record.put() self.redirect('/subset') #self.redirect(mypipeline.base_path + "/status?root=" + mypipeline.pipeline_id) elif engine == "mr": for c in SSMRSimpleCounterShard.all(): c.delete() # Why 1k each per shard or less? is this ideal? if data.num_entries > 1000: shards = data.num_entries/1000 else: shards = 1 kind = get_class(data.num_entries) mapreduce_id = control.start_map( name="Wordcount with just mappers", handler_spec="subset.mr.subset_mapper", reader_spec="mapreduce.input_readers.DatastoreInputReader", mapper_parameters={ "entity_kind": "data.subset."+kind, "processing_rate": 500 }, mapreduce_parameters={model.MapreduceSpec.PARAM_DONE_CALLBACK: '/subset/mr/callback'}, shard_count=shards, queue_name="default", ) record.mr_id = mapreduce_id record.put() self.redirect('/subset')