def get(self): user = users.get_current_user() if not user: self.redirect(users.create_login_url(dest_url="/")) return q = WCDataSet.all() q.order('-start') results = q.fetch(1000) datasets = [result for result in results] datasets_len = len(datasets) q = Record.all() q.filter('benchmark =', "wordcount") q.order('-start') results = q.fetch(1000) records = [result for result in results] records_len = len(records) self.response.out.write(template.render("templates/wordcount.html", {"user": user.email(), "datasets_len" : datasets_len, "datasets" : datasets, "records": records, "records_len" : records_len}))
def post(self): """ Generate data sets here """ if self.request.get("fsm_cleanup"): if fsm_calculate_run_time(): self.redirect('/wc') else: self.response.out.write("Error calculating fsm/wordcount") return if self.request.get("compute"): engine = self.request.get("engine") dataset = self.request.get("dataset") user = self.request.get('user') data = WCDataSet.get_by_key_name(dataset) record = Record(engine_type=engine, dataset=dataset, benchmark="wordcount", num_entities=data.num_entries, #shard_count=data.num_pipelines, entries_per_pipe=data.entries_per_pipe, user=user, char_per_word=data.char_per_word, state="Running") if engine == "fsm": record.put() context = {} context['user'] = str(user) context['num_entries'] = int(data.num_entries) fsm.startStateMachine('WordCount', [context]) self.redirect('/wc') elif engine == "pipeline": mypipeline = WordCountPipelineLoop(data.num_entries) mypipeline.start() record.pipeline_id = mypipeline.pipeline_id record.put() self.redirect('/wc') #self.redirect(mypipeline.base_path + "/status?root=" + mypipeline.pipeline_id) logging.info("wordcount job running") elif engine == "mr": # Why 1k each per shard or less? is this ideal? shards = 10 if data.num_entries > 1000: shards = data.num_entries/1000 kind = getKindString(data.num_entries) mapreduce_id = control.start_map( name="Wordcount with just mappers", handler_spec="wordcount.mr.wordcount_mapper", reader_spec="mapreduce.input_readers.DatastoreInputReader", mapper_parameters={ "entity_kind": "data.wordcount."+kind, "processing_rate": 500 }, mapreduce_parameters={model.MapreduceSpec.PARAM_DONE_CALLBACK: '/wc/mr/callback'}, shard_count=shards, queue_name="default", ) record.mr_id = mapreduce_id record.put() self.redirect('/wc')