Example #1
0
def add():
    form = RecordForm()
    if form.validate_on_submit():
        record = Record()
        record.id = uuid.uuid4()
        record.value = request.form['value']
        record.timestamp = datetime.now()
        RECORDS.append(record)
        return redirect(url_for('home'))
    return render_template('add.html', form=form)
Example #2
0
  def post(self):
    name = self.request.headers["mapreduce-id"]
    if not name:
      name = "NAME NOT FOUND"
    logging.info("MR CALLBACK " + name)
    q = Record.all()
    q.filter('mr_id =', name)
    q.fetch(1)
    if q:
      for ii in q:
        t = memcache.get('fsm_mapper_cleanup')
        if not t:
          logging.error("Unable to get datetime from memcache")
          return False
        dt, msec = t.split(".")
        dt = datetime.datetime.strptime(dt, '%Y-%m-%d %H:%M:%S')
        msec = datetime.timedelta(microseconds = int(msec))
        fullDatetime = dt + msec
        ii.end = fullDatetime

        delta = (ii.end - ii.start)
        ii.total = float(delta.days * 86400 + delta.seconds) + float(delta.microseconds)/1000000
        ii.state = "Done"
        ii.put()

        logging.info("updated: record for MR job id %s"%name)
    else:
      logging.info("Unable to find record for MR job id %s"%name)
Example #3
0
def recordsHandler():
    if not g.user:
        return redirect('/')

    if request.method == "POST":
        app.logger.info(request.method)
        req = request.form
        # Logout
        if 'logoutBtn' in req:
            app.logger.info("Logout user: "******"No medical records found.")
        return redirect('/medic')

    resList = []
    for res in result:
        print(res)
        resList.append(Record(res[0], res[1], res[2], res[3]))

    return render_template('records.html',
                           list=resList,
                           client_name=resList[0].name,
                           client_tel=resList[0].tel)
Example #4
0
 def get(self):
   user = users.get_current_user()
   if not user:
     self.redirect(users.create_login_url(dest_url="/"))
     return
   q = SubSetDataSet.all()
   q.order('-start')
   results = q.fetch(1000)
   datasets = [result for result in results]
   datasets_len = len(datasets)
   q = Record.all()
   q.filter('benchmark =', "subset")
   q.order('-start')
   results = q.fetch(1000) 
   records = [result for result in results]
   records_len = len(records)
   fsm_count = get_fsm_count()
   pl_count = get_pl_count()
   mr_count = get_mr_count()
   self.response.out.write(template.render("templates/subset.html",
                                           {"user": user.email(),
                                            "datasets_len" : datasets_len,
                                            "datasets" : datasets,
                                            "fsm_count" : fsm_count,
                                            "pl_count" : pl_count,
                                            "mr_count" : mr_count,
                                            "records": records,
                                            "records_len" : records_len}))
Example #5
0
def encode_decode(headers, data):
    from records import Record, print_records, dump_records
    from io import StringIO, BytesIO

    if not data:
        return headers, data

    #print headers
    if 'X-WCF-Encode' in headers:
        from xml2records import Parser
        p = Parser()
        print data
        print '##################################'
        p.feed(data)
        data = dump_records(p.records)
        print data.encode('hex')
        del headers['X-WCF-Encode']
        headers['Content-Type'] = 'application/soap+msbin1'
    else:
        if 'Content-Type' not in headers or headers[
                'Content-Type'] != 'application/soap+msbin1':
            return headers, data
        fp = BytesIO(data)
        data = Record.parse(fp)
        fp.close()
        fp = StringIO()
        print_records(data, fp=fp)
        data = fp.getvalue()
        fp.close()
        headers['X-WCF-Encode'] = '1'
        headers['Content-Type'] = 'text/soap+xml'
    return headers, data
Example #6
0
def encode_decode(headers, data):
    from records import Record, print_records, dump_records
    from io import StringIO, BytesIO

    if not data:
        return headers, data

    #print headers
    if 'X-WCF-Encode' in headers:
        from xml2records import Parser
        p = Parser()
        print data
        print '##################################'
        p.feed(data)
        data = dump_records(p.records)
        print data.encode('hex')
        del headers['X-WCF-Encode']
        headers['Content-Type'] = 'application/soap+msbin1'
    else:
        if 'Content-Type' not in headers or headers['Content-Type'] != 'application/soap+msbin1':
            return headers, data
        fp = BytesIO(data)
        data = Record.parse(fp)
        fp.close()
        fp = StringIO()
        print_records(data, fp=fp)
        data = fp.getvalue()
        fp.close()
        headers['X-WCF-Encode'] = '1'
        headers['Content-Type'] = 'text/soap+xml'
    return headers, data
Example #7
0
    def query(self, sql, columns=None, **kwargs):

        headers = {
            "Accept":
            "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
            "Accept-Language":
            "zh-CN,zh;q=0.8",
            "Connection":
            "keep-alive",
            "Host":
            "192.168.0.159:8007",
            "Referer":
            "http://192.168.0.159:8007/clustering",
            "User-Agent":
            "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36"
        }

        self.params.update({"q": sql})

        rep = requests.get(self.db_url, params=self.params, headers=headers)
        content = rep.text.split('\n')

        rows_gen = (Record(json.loads(row).keys(),
                           json.loads(row).values()) for row in content
                    if row.strip())

        results = RecordCollection(rows_gen)

        return results
Example #8
0
 def get(self):
   results = []
   for jt in JOB_TYPES:
     for e in ENGINES:
       for js in JOB_SIZE:
         q = Record.all()
         q.filter('benchmark =', jt)          
         q.filter('engine_type =', e)
         q.filter('num_entities =', js)
         # hope you didnt run 1k+ jobs
         ents = q.fetch(1000)
         if len(ents) == 0:
           continue
         n = 0
         sum_x = 0
         mean = 0
         stdev = 0
         maximum = 0
         minimum = 99999999
         points = []
         for ii in ents:
           if ii.total: # some jobs may have failed
             n += 1
             points.append(ii.total)
             if ii.total > maximum:
               maximum = ii.total
             if ii.total < minimum:
               minimum = ii.total
         if n != 0:
           sum_x = getTotal(points)
           mean = getAverage(points, sum_x)
           stdev = getStDev(points, mean)
           results.append(Job(n, sum_x, mean, maximum, minimum, stdev, jt, e, js))
   self.response.out.write(template.render('templates/jobs.html', {'jobs':results, 'jobs_len':len(results)}))
   return
Example #9
0
def export_all(format, path, datas):
    """
    将所有结果数据导出到一个文件

    :param str format: 导出文件格式
    :param str path: 导出文件路径
    :param list datas: 待导出的结果数据
    """
    format = check_format(format, len(datas))
    timestamp = get_timestamp()
    name = f'all_subdomain_result_{timestamp}'
    path = check_path(path, name, format)
    logger.log('INFOR', f'所有主域的子域结果 {path}')
    row_list = list()
    for row in datas:
        row.pop('header')
        row.pop('response')
        row.pop('module')
        row.pop('source')
        row.pop('elapsed')
        row.pop('count')
        keys = row.keys()
        values = row.values()
        if format in {'xls', 'xlsx'}:
            values = check_value(values)
        row_list.append(Record(keys, values))
    rows = RecordCollection(iter(row_list))
    content = rows.export(format)
    save_data(path, content)
Example #10
0
    def query(self, sql, columns=None, **kwargs):

        rows = self.conn.execute(sql)

        row_gen = (Record(columns, row) for row in rows)

        # Convert psycopg2 results to RecordCollection.
        results = RecordCollection(row_gen)
        # # # Fetch all results if desired.
        # if fetchall:
        #     results.all()

        return results
Example #11
0
 def finalized(self):
   plid = self.pipeline_id
   q = Record.all()
   q.filter('pipeline_id =',plid) 
   items = q.fetch(1)
   for ii in items:
     ii.end = datetime.datetime.now() 
     delta = (ii.end - ii.start)
     ii.total = float(delta.days * 86400 + delta.seconds) + float(delta.microseconds)/1000000
     ii.state = "Done"
     ii.put() 
     logging.info("Updated aggregate pipeline record")
   logging.info("Done with aggregate pipeline")
Example #12
0
def iquery(self, query, batches=100):
    cursor = self._conn.execute(text(query))

    columns = cursor.keys()
    history = []
    for i, row in enumerate(cursor, start=1):
        history.extend(
            list(RecordCollection(
                (Record(columns, _row) for _row in (row, )))))
        if i % batches == 0:
            yield history
            history.clear()
    if history:
        yield history
 def checkDuplicates(self, line):
     # Returns true for unique records, stores duplicates
     ret = True
     cancer = False
     s = line.strip().split(self.d)
     if self.col.Patient and s[self.col.Patient] in self.reps.ids:
         if self.col.Code and "8" in s[self.col.Code]:
             cancer = True
         # Sort duplicates and store for later
         rec = Record(s[self.col.Sex], s[self.col.Age], s[self.col.Patient],
                      s[self.col.Species], cancer, s[self.col.ID])
         self.reps.sortReplicates(rec)
         self.dups[s[self.col.ID]] = line
         ret = False
     return ret
Example #14
0
 def query(self, sql, columns=None, **kwargs):
     try:
         dsl = json.loads(sql)
         index_name = kwargs.pop("index_name", None)
         type_name = kwargs.pop("type_name", None)
         data_gen = (Record(line['_source'].keys(),
                            line['_source'].values())
                     for line in self.db.search(body=dsl,
                                                index=index_name,
                                                doc_type=type_name,
                                                _source_include=columns)
                     ['hits']['hits'])
         result = RecordCollection(data_gen)
         return result
     except Exception as e:
         print(e)
Example #15
0
def export_all_results(path, name, format, datas):
    path = check_path(path, name, format)
    logger.log('ALERT', f'The subdomain result for all main domains: {path}')
    row_list = list()
    for row in datas:
        if 'header' in row:
            row.pop('header')
        if 'response' in row:
            row.pop('response')
        keys = row.keys()
        values = row.values()
        if format in {'xls', 'xlsx'}:
            values = check_value(values)
        row_list.append(Record(keys, values))
    rows = RecordCollection(iter(row_list))
    content = rows.export(format)
    save_data(path, content)
Example #16
0
 def post(self):
   name = self.request.headers["mapreduce-id"]
   if not name:
     name = "NAME NOT FOUND"
   logging.info("MR CALLBACK " + name)
   q = Record.all()
   q.filter('mr_id =', name)
   q.fetch(1)
   if q:
     for ii in q:
       ii.end = datetime.datetime.now()
       delta = (ii.end - ii.start)
       ii.total = float(delta.days * 86400 + delta.seconds) + float(delta.microseconds)/1000000
       ii.state = "Done"
       ii.put()
       logging.info("updated: record for MR job id %s"%name)
   else:
     logging.info("Unable to find record for MR job id %s"%name)
Example #17
0
def fsm_calculate_run_time():
  """ Fantasm does not give call backs when its done. Must figure it out
      with another job using the last modified date on output entities
  """
  # Get the last job which was run for subset /fsm
  q = Record.all()
  q.filter('engine_type =','fsm')
  q.filter('benchmark =','subset')
  q.order('-start')
  results = q.fetch(1)
  if len(results) == 0:
    logging.error("Unable to find a record for fsm/subset")
    return False

  q = None
  record = None
  for ii in results:
    if ii.state == "Done":
      logging.error("Last FSM end time has already been calculated")
    logging.info(str(ii.num_entities))
    q = SSFSMSimpleCounterShard.all()
    if not q:
      logging.error("No query returned for SubSet results")
      return False
    record = ii

  max_date = None
  while True:
    results = q.fetch(1000)
    for ii in results:
      date = ii.modified
      if max_date == None or max_date < date:
        max_date = date
    if len(results) < 1000:
      break;
  if not max_date:
    logging.error("Unable to calculate the max date for FSM/subset")
    return False
  record.state = "Done"
  record.end = max_date
  delta = (record.end - record.start)
  record.total = float(delta.days * 86400 + delta.seconds) + float(delta.microseconds)/1000000
  record.put()
  return True
Example #18
0
def process_docs(docs_dataset):
    invert_index = {}
    for row in docs_dataset:
        record = Record(keys=docs_dataset.headers, values=row)
        logging.info("处理文档: %s" % record.id)
        if not record.doc.strip():
            logging.warning("文档内容为空")
            continue
        # 分词并获取词性
        words_pos = word_segment(record.doc)
        # 清洗单词
        words = clean_words(words_pos)
        word_frequency = get_word_frequency(words)
        logging.info("文档词频统计结果: %s" % word_frequency)
        for word, frequency in word_frequency.items():
            if word in invert_index:
                invert_index[word].append((record.id, frequency))
            else:
                invert_index[word] = [(record.id, frequency)]
    return invert_index
Example #19
0
    def giveInsulin(self, amount: float):
        print("Trying to deliver {} units of insulin.".format(amount))
        from records import Record
        r = Record()
        scrollRate = r.getScrollRate()
        lastDC = r.getDutyCycle()
        ratio = .03  # Not a set ratio, I have to design the gearbox first.

        try:
            import RPi.GPIO as GPIO
            GPIO.setmode(GPIO.BCM)
            GPIO.setup(17, GPIO.OUT)
            servo = GPIO.PWM(17, 50)
            servo.start(lastDC)

            for i in range(amount / scrollRate):
                dutycycle = lastDC + (i * ratio)
                servo.ChangeDutyCycle(dutycycle)
                print(
                    "Servo dutycycle is now {}.\n{} units out of {} of insulin delivered as of now."
                    .format(dutycycle, i * scrollRate, amount))
                r.setDutyCycle(dutycycle)
                sleep(.5)

            servo.stop()
            GPIO.cleanup()

        except ImportError or ModuleNotFoundError:
            print(
                "This is likely not running on a Raspberry Pi.\nIf it is, make sure RPi is installed for Python 3.\n\nRunning print loop now instead of sending servo commands."
            )
            print(amount / scrollRate)
            for i in range(int(amount / scrollRate)):
                dutycycle = lastDC + (i * ratio)
                print(
                    "Servo dutycycle is now {}.\n{} units out of {} of insulin delivered as of now."
                    .format(dutycycle, i * scrollRate, amount))
                r.setDutyCycle(dutycycle)
                sleep(.5)
Example #20
0
def visit(elem, depth=0):
    content = elem[CONTENT_INDEX]
    # 解析名字
    name = content.text
    if name is None or name.strip() == "":
        name = get_name()

    # 如果是a标签, 则需提取url
    if content.tag == "a":
        urls = content.get("href"),
        url = urls[0]
        createds = content.get("add_date"),
        created = createds[0]
        record = Record(keys=["url", "created"], values=[url, created])
    else:
        record = None
    elem_obj = Element(name=name, data=record)
    for child in elem.findall(CHILD_XPATH):
        elem_obj.add_child(visit(child, depth + 1))
    return elem_obj
Example #21
0
def export_all(format, datas):
    format = check_format(format, len(datas))
    dpath = check_dpath()
    timestamp = get_timestamp()
    fpath = dpath.joinpath(f'all_subdomain_{timestamp}.{format}')
    row_list = list()
    for row in datas:
        row.pop('header')
        row.pop('response')
        row.pop('module')
        row.pop('source')
        row.pop('elapsed')
        row.pop('count')
        keys = row.keys()
        values = row.values()
        if format in {'xls', 'xlsx'}:
            values = check_value(values)
        row_list.append(Record(keys, values))
    rows = RecordCollection(iter(row_list))
    content = rows.export(format)
    save_data(fpath, content)
Example #22
0
def fsm_calculate_run_time():
  """ Fantasm does not give call backs when its done. Must figure it out
      with another job using the last modified date on output entities
  """
  # Get the last job which was run for grep/fsm
  q = Record.all() 
  q.filter('engine_type =','fsm')
  q.filter('benchmark =','grep')
  q.order('-start')
  results = q.fetch(1)
  if len(results) == 0:
    logging.error("Unable to find a record for fsm/grep")
    return False

  for ii in results:
    ii.state = "Calculating time"
    ii.put()
    shards = ii.num_entities/1000
    if shards < 1:
      shards = 1
    if shards > 256:
      shards = 256 # max amount of shards allowed

    kind  = "GrepResults" #get_output_class(ii.num_entities)
    mapreduce_id = control.start_map(
            name="FSM Grep cleanup",
            handler_spec="grep.fsm_mapper",
            reader_spec="mapreduce.input_readers.DatastoreInputReader",
            mapper_parameters={
                "entity_kind": "data.grep."+kind,
                "processing_rate": 500
            },
            mapreduce_parameters={model.MapreduceSpec.PARAM_DONE_CALLBACK:
                       '/grep/fsm/callback'},
            shard_count=shards,
            queue_name="default",
          )
    ii.mr_id = mapreduce_id
    ii.put()
  return True
Example #23
0
 def get(self):
   user = users.get_current_user()
   if not user:
     self.redirect(users.create_login_url(dest_url="/"))
     return
   q = GrepDataSet.all()
   q.order('-start')
   results = q.fetch(1000)
   datasets = [result for result in results]
   datasets_len = len(datasets)
   q = Record.all()
   q.filter('benchmark =', "grep")
   q.order('-start')
   results = q.fetch(1000) 
   records = [result for result in results]
   records_len = len(records)
   self.response.out.write(template.render("templates/grep.html",
                                           {"user": user.email(),
                                            "datasets_len" : datasets_len,
                                            "datasets" : datasets,
                                            "records": records,
                                            "records_len" : records_len}))
Example #24
0
    def parse(self,file):
        delivery = None
        section = None
        
        for line in file:
            rec = Record(line)
            
            if rec.is_type(Record.NOT_IMPLEMENTED):
                raise PBSParseException("Unimplemented record: "+line)

            if rec.is_type(Record.DELIVERY_START):
                delivery = rec
                continue

            if (rec.is_type(Record.DELIVERY_END)):
                delivery.end(rec)
                self._deliveries.append(delivery)
                delivery == None
                continue
        
            if delivery is None:
                raise PBSParseException("File contains content outside of delivery start/end")

            if (rec.is_type(Record.SECTION_START)):
                section = rec
                continue

            if (rec.is_type(Record.SECTION_END)):
                section.end(rec)
                delivery.append(section)
                section = None
                continue

            if section is None:
                raise PBSParseException("File contains content outside of section start/end")

            if (rec.is_type(Record.PAYLOAD)):
                section.append(rec)
                continue

            raise PBSParseException("File contains unrecognized content")
Example #25
0
  def post(self):
    if self.request.get("fsm_cleanup"):
      if fsm_calculate_run_time():
        self.redirect("/subset") 
      else:
        self.response.out.write("Error calculating run time of FSM/subset") 

    if self.request.get("reset_fsm_count"):
      for c in SSFSMSimpleCounterShard.all():
        c.delete()
      self.redirect('/subset')
      return
    if self.request.get("reset_mr_count"):
      for c in SSMRSimpleCounterShard.all():
        c.delete()
      self.redirect('/subset')
      return

    if self.request.get("reset_pl_count"):
      for c in SSPLSimpleCounterShard.all():
        c.delete()
      self.redirect('/subset')
      return

    if self.request.get("compute"):
      engine = self.request.get("engine")
      dataset = self.request.get("dataset")
      user = self.request.get('user')
      data = SubSetDataSet.get_by_key_name(dataset)
      
      record = Record(engine_type=engine, 
                      dataset=dataset,
                      benchmark="subset",
                      num_entities=data.num_entries,
                      entries_per_pipe=data.entries_per_pipe,
                      user=user,
                      state="Running")
      if engine == "fsm":
        record.put()
        # reset count
        for c in SSFSMSimpleCounterShard.all():
          c.delete()

        context = {}
        context['user'] = str(user)
        context['num_entries'] = int(data.num_entries)
        fsm.startStateMachine('SubSet', [context])
        self.redirect('/subset')
      elif engine == "pipeline":
        for c in SSPLSimpleCounterShard.all():
          c.delete()
        mypipeline = SubSetPipeline(data.num_entries)
        mypipeline.start()
        record.pipeline_id = mypipeline.pipeline_id
        record.put()
        self.redirect('/subset') 
        #self.redirect(mypipeline.base_path + "/status?root=" + mypipeline.pipeline_id)
      elif engine == "mr":
        for c in SSMRSimpleCounterShard.all():
          c.delete()
        # Why 1k each per shard or less? is this ideal?
        if data.num_entries > 1000: shards = data.num_entries/1000
        else: shards = 1

        kind = get_class(data.num_entries)
        mapreduce_id = control.start_map(
          name="Wordcount with just mappers",
          handler_spec="subset.mr.subset_mapper",
          reader_spec="mapreduce.input_readers.DatastoreInputReader",
          mapper_parameters={
              "entity_kind": "data.subset."+kind,
              "processing_rate": 500
          },
          mapreduce_parameters={model.MapreduceSpec.PARAM_DONE_CALLBACK:
                     '/subset/mr/callback'},
          shard_count=shards,
          queue_name="default",
        )

        record.mr_id = mapreduce_id
        record.put()
        self.redirect('/subset')
Example #26
0
def recordsPage(current_page):
    current_page.frame.pack_forget()
    current_page = Record(window, patientInfoPage)
Example #27
0
def fsm_calculate_run_time():
  """ Fantasm does not give call backs when its done. Must figure it out
      with another job using the last modified date on output entities
  """
  # Get the last job which was run for aggregate /fsm
  q = Record.all()
  q.filter('engine_type =','fsm')
  q.filter('benchmark =','aggregate')
  q.order('-start')
  results = q.fetch(1)

  # There is a second type of fsm job that has a fan in state
  q2 = Record.all()
  q2.filter('engine_type =','fsm_fan_in')
  q2.filter('benchmark =','aggregate')
  q2.order('-start')
  results2 = q2.fetch(1)
   
  if len(results) == 0 and len(results2) == 0:
    logging.error("Unable to find a record for fsm/aggregate")
    return False

  # Take only the one which ran last
  if len(results) == 0:
    results = results2 #fsm with fan in ran last
  elif len(results2) == 0:
    pass
  elif results[0].start > results2[0].start:
    pass
  else:
    results = results2 #fsm with fan in ran last

  q = None
  record = None
  # There should only be one result
  for ii in results:
    if ii.state == "Done":
      logging.error("Last FSM end time has already been calculated")
    logging.info(str(ii.num_entities))
    q = FSMSimpleCounterShard.all()
    if not q:
      logging.error("No query returned for Aggregate results")
      return False
    record = ii

  max_date = None
  while True:
    results = q.fetch(1000)
    for ii in results:
      date = ii.modified
      if max_date == None or max_date < date:
        max_date = date
    if len(results) < 1000:
      break;
  if not max_date:
    logging.error("Unable to calculate the max date for FSM/aggregate")
    return False
  record.state = "Done"
  record.end = max_date
  delta = (record.end - record.start)
  record.total = float(delta.days * 86400 + delta.seconds) + float(delta.microseconds)/1000000
  record.put()
  return True
Example #28
0
  def post(self):
    """ Generate data sets here """
    if self.request.get("fsm_cleanup"):
      if fsm_calculate_run_time():
        self.redirect('/grep')
      else:
        self.response.out.write("Error calculating fsm/grep")
      return 
    if self.request.get("compute"):
      engine = self.request.get("engine")
      dataset = self.request.get("dataset")
      user = self.request.get('user')
      needle = self.request.get('needle')    
      data = GrepDataSet.get_by_key_name(dataset)
      record = Record(engine_type=engine, 
                      dataset=dataset,
                      benchmark="grep",
                      num_entities=data.num_entries,
                      #shard_count=data.num_pipelines,
                      entries_per_pipe=data.entries_per_pipe,
                      user=user,
                      char_per_word=data.char_per_word,
                      state="Running")
      if engine == "fsm":
        record.put()
        context = {}
        context['user'] = str(user)
        context['num_entries'] = int(data.num_entries)
        context['needle'] = needle
        fsm.startStateMachine('Grep', [context])
        self.redirect('/grep')
      elif engine == "pipeline":
        mypipeline = GrepPipelineLoop(data.num_entries, needle)
        mypipeline.start()
        record.pipeline_id = mypipeline.pipeline_id
        record.put()
        self.redirect('/grep')
        #self.redirect(mypipeline.base_path + "/status?root=" + mypipeline.pipeline_id)
        return
      elif engine == "mr":
        # Why 1k each per shard or less? is this ideal?
        if data.num_entries > 1000:
          shards = data.num_entries/1000
          shards = min(256, shards) 
        else: shards = 1

        kind = getKindString(data.num_entries)
        mapreduce_id = control.start_map(
            name="Grep",
            handler_spec="grep.mr.grep_mapper",
            reader_spec="mapreduce.input_readers.DatastoreInputReader",
            mapper_parameters={
                "entity_kind": "data.grep."+kind,
                "processing_rate": 500,
                "needle":needle,
            },
            mapreduce_parameters={model.MapreduceSpec.PARAM_DONE_CALLBACK:
                       '/grep/mr/callback'},
            shard_count=shards,
            queue_name="default",
          )

        record.mr_id = mapreduce_id
        record.put()
        self.redirect('/grep')
Example #29
0
dist = 50
limit = 100

color_list = ("b.", "r.", "g.", "m.", "c.", "y.")

with open('jsondata.json') as f:
	x = json.load(f, object_hook=lambda d: SimpleNamespace(**d))

print(x.features[2].properties.filename)
X = np.zeros((1, 2))
list_rec=[]
weight_list = []

for i in range(len(x.features)):
    list_rec.append(Record(i, x.features[i].properties))
    X = np.vstack((X, [list_rec[i].lat, list_rec[i].lon]))
    list_rec[i].calc_weight()
    weight_list.append(list_rec[i].weight) 


X = X[1:-1]
weight_list = weight_list[0:-1]

#print(np.unique(cat_list,return_index=True))
#print(x.features[6].properties)

cluster = sklearn.DBSCAN(eps=0.0001, min_samples=100).fit(X,y=None,sample_weight=weight_list)

dumps = []
cluster_count = np.unique(cluster.labels_)