def add(): form = RecordForm() if form.validate_on_submit(): record = Record() record.id = uuid.uuid4() record.value = request.form['value'] record.timestamp = datetime.now() RECORDS.append(record) return redirect(url_for('home')) return render_template('add.html', form=form)
def post(self): name = self.request.headers["mapreduce-id"] if not name: name = "NAME NOT FOUND" logging.info("MR CALLBACK " + name) q = Record.all() q.filter('mr_id =', name) q.fetch(1) if q: for ii in q: t = memcache.get('fsm_mapper_cleanup') if not t: logging.error("Unable to get datetime from memcache") return False dt, msec = t.split(".") dt = datetime.datetime.strptime(dt, '%Y-%m-%d %H:%M:%S') msec = datetime.timedelta(microseconds = int(msec)) fullDatetime = dt + msec ii.end = fullDatetime delta = (ii.end - ii.start) ii.total = float(delta.days * 86400 + delta.seconds) + float(delta.microseconds)/1000000 ii.state = "Done" ii.put() logging.info("updated: record for MR job id %s"%name) else: logging.info("Unable to find record for MR job id %s"%name)
def recordsHandler(): if not g.user: return redirect('/') if request.method == "POST": app.logger.info(request.method) req = request.form # Logout if 'logoutBtn' in req: app.logger.info("Logout user: "******"No medical records found.") return redirect('/medic') resList = [] for res in result: print(res) resList.append(Record(res[0], res[1], res[2], res[3])) return render_template('records.html', list=resList, client_name=resList[0].name, client_tel=resList[0].tel)
def get(self): user = users.get_current_user() if not user: self.redirect(users.create_login_url(dest_url="/")) return q = SubSetDataSet.all() q.order('-start') results = q.fetch(1000) datasets = [result for result in results] datasets_len = len(datasets) q = Record.all() q.filter('benchmark =', "subset") q.order('-start') results = q.fetch(1000) records = [result for result in results] records_len = len(records) fsm_count = get_fsm_count() pl_count = get_pl_count() mr_count = get_mr_count() self.response.out.write(template.render("templates/subset.html", {"user": user.email(), "datasets_len" : datasets_len, "datasets" : datasets, "fsm_count" : fsm_count, "pl_count" : pl_count, "mr_count" : mr_count, "records": records, "records_len" : records_len}))
def encode_decode(headers, data): from records import Record, print_records, dump_records from io import StringIO, BytesIO if not data: return headers, data #print headers if 'X-WCF-Encode' in headers: from xml2records import Parser p = Parser() print data print '##################################' p.feed(data) data = dump_records(p.records) print data.encode('hex') del headers['X-WCF-Encode'] headers['Content-Type'] = 'application/soap+msbin1' else: if 'Content-Type' not in headers or headers[ 'Content-Type'] != 'application/soap+msbin1': return headers, data fp = BytesIO(data) data = Record.parse(fp) fp.close() fp = StringIO() print_records(data, fp=fp) data = fp.getvalue() fp.close() headers['X-WCF-Encode'] = '1' headers['Content-Type'] = 'text/soap+xml' return headers, data
def encode_decode(headers, data): from records import Record, print_records, dump_records from io import StringIO, BytesIO if not data: return headers, data #print headers if 'X-WCF-Encode' in headers: from xml2records import Parser p = Parser() print data print '##################################' p.feed(data) data = dump_records(p.records) print data.encode('hex') del headers['X-WCF-Encode'] headers['Content-Type'] = 'application/soap+msbin1' else: if 'Content-Type' not in headers or headers['Content-Type'] != 'application/soap+msbin1': return headers, data fp = BytesIO(data) data = Record.parse(fp) fp.close() fp = StringIO() print_records(data, fp=fp) data = fp.getvalue() fp.close() headers['X-WCF-Encode'] = '1' headers['Content-Type'] = 'text/soap+xml' return headers, data
def query(self, sql, columns=None, **kwargs): headers = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Language": "zh-CN,zh;q=0.8", "Connection": "keep-alive", "Host": "192.168.0.159:8007", "Referer": "http://192.168.0.159:8007/clustering", "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36" } self.params.update({"q": sql}) rep = requests.get(self.db_url, params=self.params, headers=headers) content = rep.text.split('\n') rows_gen = (Record(json.loads(row).keys(), json.loads(row).values()) for row in content if row.strip()) results = RecordCollection(rows_gen) return results
def get(self): results = [] for jt in JOB_TYPES: for e in ENGINES: for js in JOB_SIZE: q = Record.all() q.filter('benchmark =', jt) q.filter('engine_type =', e) q.filter('num_entities =', js) # hope you didnt run 1k+ jobs ents = q.fetch(1000) if len(ents) == 0: continue n = 0 sum_x = 0 mean = 0 stdev = 0 maximum = 0 minimum = 99999999 points = [] for ii in ents: if ii.total: # some jobs may have failed n += 1 points.append(ii.total) if ii.total > maximum: maximum = ii.total if ii.total < minimum: minimum = ii.total if n != 0: sum_x = getTotal(points) mean = getAverage(points, sum_x) stdev = getStDev(points, mean) results.append(Job(n, sum_x, mean, maximum, minimum, stdev, jt, e, js)) self.response.out.write(template.render('templates/jobs.html', {'jobs':results, 'jobs_len':len(results)})) return
def export_all(format, path, datas): """ 将所有结果数据导出到一个文件 :param str format: 导出文件格式 :param str path: 导出文件路径 :param list datas: 待导出的结果数据 """ format = check_format(format, len(datas)) timestamp = get_timestamp() name = f'all_subdomain_result_{timestamp}' path = check_path(path, name, format) logger.log('INFOR', f'所有主域的子域结果 {path}') row_list = list() for row in datas: row.pop('header') row.pop('response') row.pop('module') row.pop('source') row.pop('elapsed') row.pop('count') keys = row.keys() values = row.values() if format in {'xls', 'xlsx'}: values = check_value(values) row_list.append(Record(keys, values)) rows = RecordCollection(iter(row_list)) content = rows.export(format) save_data(path, content)
def query(self, sql, columns=None, **kwargs): rows = self.conn.execute(sql) row_gen = (Record(columns, row) for row in rows) # Convert psycopg2 results to RecordCollection. results = RecordCollection(row_gen) # # # Fetch all results if desired. # if fetchall: # results.all() return results
def finalized(self): plid = self.pipeline_id q = Record.all() q.filter('pipeline_id =',plid) items = q.fetch(1) for ii in items: ii.end = datetime.datetime.now() delta = (ii.end - ii.start) ii.total = float(delta.days * 86400 + delta.seconds) + float(delta.microseconds)/1000000 ii.state = "Done" ii.put() logging.info("Updated aggregate pipeline record") logging.info("Done with aggregate pipeline")
def iquery(self, query, batches=100): cursor = self._conn.execute(text(query)) columns = cursor.keys() history = [] for i, row in enumerate(cursor, start=1): history.extend( list(RecordCollection( (Record(columns, _row) for _row in (row, ))))) if i % batches == 0: yield history history.clear() if history: yield history
def checkDuplicates(self, line): # Returns true for unique records, stores duplicates ret = True cancer = False s = line.strip().split(self.d) if self.col.Patient and s[self.col.Patient] in self.reps.ids: if self.col.Code and "8" in s[self.col.Code]: cancer = True # Sort duplicates and store for later rec = Record(s[self.col.Sex], s[self.col.Age], s[self.col.Patient], s[self.col.Species], cancer, s[self.col.ID]) self.reps.sortReplicates(rec) self.dups[s[self.col.ID]] = line ret = False return ret
def query(self, sql, columns=None, **kwargs): try: dsl = json.loads(sql) index_name = kwargs.pop("index_name", None) type_name = kwargs.pop("type_name", None) data_gen = (Record(line['_source'].keys(), line['_source'].values()) for line in self.db.search(body=dsl, index=index_name, doc_type=type_name, _source_include=columns) ['hits']['hits']) result = RecordCollection(data_gen) return result except Exception as e: print(e)
def export_all_results(path, name, format, datas): path = check_path(path, name, format) logger.log('ALERT', f'The subdomain result for all main domains: {path}') row_list = list() for row in datas: if 'header' in row: row.pop('header') if 'response' in row: row.pop('response') keys = row.keys() values = row.values() if format in {'xls', 'xlsx'}: values = check_value(values) row_list.append(Record(keys, values)) rows = RecordCollection(iter(row_list)) content = rows.export(format) save_data(path, content)
def post(self): name = self.request.headers["mapreduce-id"] if not name: name = "NAME NOT FOUND" logging.info("MR CALLBACK " + name) q = Record.all() q.filter('mr_id =', name) q.fetch(1) if q: for ii in q: ii.end = datetime.datetime.now() delta = (ii.end - ii.start) ii.total = float(delta.days * 86400 + delta.seconds) + float(delta.microseconds)/1000000 ii.state = "Done" ii.put() logging.info("updated: record for MR job id %s"%name) else: logging.info("Unable to find record for MR job id %s"%name)
def fsm_calculate_run_time(): """ Fantasm does not give call backs when its done. Must figure it out with another job using the last modified date on output entities """ # Get the last job which was run for subset /fsm q = Record.all() q.filter('engine_type =','fsm') q.filter('benchmark =','subset') q.order('-start') results = q.fetch(1) if len(results) == 0: logging.error("Unable to find a record for fsm/subset") return False q = None record = None for ii in results: if ii.state == "Done": logging.error("Last FSM end time has already been calculated") logging.info(str(ii.num_entities)) q = SSFSMSimpleCounterShard.all() if not q: logging.error("No query returned for SubSet results") return False record = ii max_date = None while True: results = q.fetch(1000) for ii in results: date = ii.modified if max_date == None or max_date < date: max_date = date if len(results) < 1000: break; if not max_date: logging.error("Unable to calculate the max date for FSM/subset") return False record.state = "Done" record.end = max_date delta = (record.end - record.start) record.total = float(delta.days * 86400 + delta.seconds) + float(delta.microseconds)/1000000 record.put() return True
def process_docs(docs_dataset): invert_index = {} for row in docs_dataset: record = Record(keys=docs_dataset.headers, values=row) logging.info("处理文档: %s" % record.id) if not record.doc.strip(): logging.warning("文档内容为空") continue # 分词并获取词性 words_pos = word_segment(record.doc) # 清洗单词 words = clean_words(words_pos) word_frequency = get_word_frequency(words) logging.info("文档词频统计结果: %s" % word_frequency) for word, frequency in word_frequency.items(): if word in invert_index: invert_index[word].append((record.id, frequency)) else: invert_index[word] = [(record.id, frequency)] return invert_index
def giveInsulin(self, amount: float): print("Trying to deliver {} units of insulin.".format(amount)) from records import Record r = Record() scrollRate = r.getScrollRate() lastDC = r.getDutyCycle() ratio = .03 # Not a set ratio, I have to design the gearbox first. try: import RPi.GPIO as GPIO GPIO.setmode(GPIO.BCM) GPIO.setup(17, GPIO.OUT) servo = GPIO.PWM(17, 50) servo.start(lastDC) for i in range(amount / scrollRate): dutycycle = lastDC + (i * ratio) servo.ChangeDutyCycle(dutycycle) print( "Servo dutycycle is now {}.\n{} units out of {} of insulin delivered as of now." .format(dutycycle, i * scrollRate, amount)) r.setDutyCycle(dutycycle) sleep(.5) servo.stop() GPIO.cleanup() except ImportError or ModuleNotFoundError: print( "This is likely not running on a Raspberry Pi.\nIf it is, make sure RPi is installed for Python 3.\n\nRunning print loop now instead of sending servo commands." ) print(amount / scrollRate) for i in range(int(amount / scrollRate)): dutycycle = lastDC + (i * ratio) print( "Servo dutycycle is now {}.\n{} units out of {} of insulin delivered as of now." .format(dutycycle, i * scrollRate, amount)) r.setDutyCycle(dutycycle) sleep(.5)
def visit(elem, depth=0): content = elem[CONTENT_INDEX] # 解析名字 name = content.text if name is None or name.strip() == "": name = get_name() # 如果是a标签, 则需提取url if content.tag == "a": urls = content.get("href"), url = urls[0] createds = content.get("add_date"), created = createds[0] record = Record(keys=["url", "created"], values=[url, created]) else: record = None elem_obj = Element(name=name, data=record) for child in elem.findall(CHILD_XPATH): elem_obj.add_child(visit(child, depth + 1)) return elem_obj
def export_all(format, datas): format = check_format(format, len(datas)) dpath = check_dpath() timestamp = get_timestamp() fpath = dpath.joinpath(f'all_subdomain_{timestamp}.{format}') row_list = list() for row in datas: row.pop('header') row.pop('response') row.pop('module') row.pop('source') row.pop('elapsed') row.pop('count') keys = row.keys() values = row.values() if format in {'xls', 'xlsx'}: values = check_value(values) row_list.append(Record(keys, values)) rows = RecordCollection(iter(row_list)) content = rows.export(format) save_data(fpath, content)
def fsm_calculate_run_time(): """ Fantasm does not give call backs when its done. Must figure it out with another job using the last modified date on output entities """ # Get the last job which was run for grep/fsm q = Record.all() q.filter('engine_type =','fsm') q.filter('benchmark =','grep') q.order('-start') results = q.fetch(1) if len(results) == 0: logging.error("Unable to find a record for fsm/grep") return False for ii in results: ii.state = "Calculating time" ii.put() shards = ii.num_entities/1000 if shards < 1: shards = 1 if shards > 256: shards = 256 # max amount of shards allowed kind = "GrepResults" #get_output_class(ii.num_entities) mapreduce_id = control.start_map( name="FSM Grep cleanup", handler_spec="grep.fsm_mapper", reader_spec="mapreduce.input_readers.DatastoreInputReader", mapper_parameters={ "entity_kind": "data.grep."+kind, "processing_rate": 500 }, mapreduce_parameters={model.MapreduceSpec.PARAM_DONE_CALLBACK: '/grep/fsm/callback'}, shard_count=shards, queue_name="default", ) ii.mr_id = mapreduce_id ii.put() return True
def get(self): user = users.get_current_user() if not user: self.redirect(users.create_login_url(dest_url="/")) return q = GrepDataSet.all() q.order('-start') results = q.fetch(1000) datasets = [result for result in results] datasets_len = len(datasets) q = Record.all() q.filter('benchmark =', "grep") q.order('-start') results = q.fetch(1000) records = [result for result in results] records_len = len(records) self.response.out.write(template.render("templates/grep.html", {"user": user.email(), "datasets_len" : datasets_len, "datasets" : datasets, "records": records, "records_len" : records_len}))
def parse(self,file): delivery = None section = None for line in file: rec = Record(line) if rec.is_type(Record.NOT_IMPLEMENTED): raise PBSParseException("Unimplemented record: "+line) if rec.is_type(Record.DELIVERY_START): delivery = rec continue if (rec.is_type(Record.DELIVERY_END)): delivery.end(rec) self._deliveries.append(delivery) delivery == None continue if delivery is None: raise PBSParseException("File contains content outside of delivery start/end") if (rec.is_type(Record.SECTION_START)): section = rec continue if (rec.is_type(Record.SECTION_END)): section.end(rec) delivery.append(section) section = None continue if section is None: raise PBSParseException("File contains content outside of section start/end") if (rec.is_type(Record.PAYLOAD)): section.append(rec) continue raise PBSParseException("File contains unrecognized content")
def post(self): if self.request.get("fsm_cleanup"): if fsm_calculate_run_time(): self.redirect("/subset") else: self.response.out.write("Error calculating run time of FSM/subset") if self.request.get("reset_fsm_count"): for c in SSFSMSimpleCounterShard.all(): c.delete() self.redirect('/subset') return if self.request.get("reset_mr_count"): for c in SSMRSimpleCounterShard.all(): c.delete() self.redirect('/subset') return if self.request.get("reset_pl_count"): for c in SSPLSimpleCounterShard.all(): c.delete() self.redirect('/subset') return if self.request.get("compute"): engine = self.request.get("engine") dataset = self.request.get("dataset") user = self.request.get('user') data = SubSetDataSet.get_by_key_name(dataset) record = Record(engine_type=engine, dataset=dataset, benchmark="subset", num_entities=data.num_entries, entries_per_pipe=data.entries_per_pipe, user=user, state="Running") if engine == "fsm": record.put() # reset count for c in SSFSMSimpleCounterShard.all(): c.delete() context = {} context['user'] = str(user) context['num_entries'] = int(data.num_entries) fsm.startStateMachine('SubSet', [context]) self.redirect('/subset') elif engine == "pipeline": for c in SSPLSimpleCounterShard.all(): c.delete() mypipeline = SubSetPipeline(data.num_entries) mypipeline.start() record.pipeline_id = mypipeline.pipeline_id record.put() self.redirect('/subset') #self.redirect(mypipeline.base_path + "/status?root=" + mypipeline.pipeline_id) elif engine == "mr": for c in SSMRSimpleCounterShard.all(): c.delete() # Why 1k each per shard or less? is this ideal? if data.num_entries > 1000: shards = data.num_entries/1000 else: shards = 1 kind = get_class(data.num_entries) mapreduce_id = control.start_map( name="Wordcount with just mappers", handler_spec="subset.mr.subset_mapper", reader_spec="mapreduce.input_readers.DatastoreInputReader", mapper_parameters={ "entity_kind": "data.subset."+kind, "processing_rate": 500 }, mapreduce_parameters={model.MapreduceSpec.PARAM_DONE_CALLBACK: '/subset/mr/callback'}, shard_count=shards, queue_name="default", ) record.mr_id = mapreduce_id record.put() self.redirect('/subset')
def recordsPage(current_page): current_page.frame.pack_forget() current_page = Record(window, patientInfoPage)
def fsm_calculate_run_time(): """ Fantasm does not give call backs when its done. Must figure it out with another job using the last modified date on output entities """ # Get the last job which was run for aggregate /fsm q = Record.all() q.filter('engine_type =','fsm') q.filter('benchmark =','aggregate') q.order('-start') results = q.fetch(1) # There is a second type of fsm job that has a fan in state q2 = Record.all() q2.filter('engine_type =','fsm_fan_in') q2.filter('benchmark =','aggregate') q2.order('-start') results2 = q2.fetch(1) if len(results) == 0 and len(results2) == 0: logging.error("Unable to find a record for fsm/aggregate") return False # Take only the one which ran last if len(results) == 0: results = results2 #fsm with fan in ran last elif len(results2) == 0: pass elif results[0].start > results2[0].start: pass else: results = results2 #fsm with fan in ran last q = None record = None # There should only be one result for ii in results: if ii.state == "Done": logging.error("Last FSM end time has already been calculated") logging.info(str(ii.num_entities)) q = FSMSimpleCounterShard.all() if not q: logging.error("No query returned for Aggregate results") return False record = ii max_date = None while True: results = q.fetch(1000) for ii in results: date = ii.modified if max_date == None or max_date < date: max_date = date if len(results) < 1000: break; if not max_date: logging.error("Unable to calculate the max date for FSM/aggregate") return False record.state = "Done" record.end = max_date delta = (record.end - record.start) record.total = float(delta.days * 86400 + delta.seconds) + float(delta.microseconds)/1000000 record.put() return True
def post(self): """ Generate data sets here """ if self.request.get("fsm_cleanup"): if fsm_calculate_run_time(): self.redirect('/grep') else: self.response.out.write("Error calculating fsm/grep") return if self.request.get("compute"): engine = self.request.get("engine") dataset = self.request.get("dataset") user = self.request.get('user') needle = self.request.get('needle') data = GrepDataSet.get_by_key_name(dataset) record = Record(engine_type=engine, dataset=dataset, benchmark="grep", num_entities=data.num_entries, #shard_count=data.num_pipelines, entries_per_pipe=data.entries_per_pipe, user=user, char_per_word=data.char_per_word, state="Running") if engine == "fsm": record.put() context = {} context['user'] = str(user) context['num_entries'] = int(data.num_entries) context['needle'] = needle fsm.startStateMachine('Grep', [context]) self.redirect('/grep') elif engine == "pipeline": mypipeline = GrepPipelineLoop(data.num_entries, needle) mypipeline.start() record.pipeline_id = mypipeline.pipeline_id record.put() self.redirect('/grep') #self.redirect(mypipeline.base_path + "/status?root=" + mypipeline.pipeline_id) return elif engine == "mr": # Why 1k each per shard or less? is this ideal? if data.num_entries > 1000: shards = data.num_entries/1000 shards = min(256, shards) else: shards = 1 kind = getKindString(data.num_entries) mapreduce_id = control.start_map( name="Grep", handler_spec="grep.mr.grep_mapper", reader_spec="mapreduce.input_readers.DatastoreInputReader", mapper_parameters={ "entity_kind": "data.grep."+kind, "processing_rate": 500, "needle":needle, }, mapreduce_parameters={model.MapreduceSpec.PARAM_DONE_CALLBACK: '/grep/mr/callback'}, shard_count=shards, queue_name="default", ) record.mr_id = mapreduce_id record.put() self.redirect('/grep')
dist = 50 limit = 100 color_list = ("b.", "r.", "g.", "m.", "c.", "y.") with open('jsondata.json') as f: x = json.load(f, object_hook=lambda d: SimpleNamespace(**d)) print(x.features[2].properties.filename) X = np.zeros((1, 2)) list_rec=[] weight_list = [] for i in range(len(x.features)): list_rec.append(Record(i, x.features[i].properties)) X = np.vstack((X, [list_rec[i].lat, list_rec[i].lon])) list_rec[i].calc_weight() weight_list.append(list_rec[i].weight) X = X[1:-1] weight_list = weight_list[0:-1] #print(np.unique(cat_list,return_index=True)) #print(x.features[6].properties) cluster = sklearn.DBSCAN(eps=0.0001, min_samples=100).fit(X,y=None,sample_weight=weight_list) dumps = [] cluster_count = np.unique(cluster.labels_)