def getPeriodInfo(self, bid, start, period, sample, c=None, total=None): close_cursor = False if c is None: c = self.db.cursor() close_cursor = True query = self._get_period_info_query() t = [bid, start, start + period] if sample.lower() != 'all': t.append(sample) query += " AND lb = ?" # print "query: %s, var: %s" % (query, str(t)) logging.debug(query + str(t)) c.execute(query, t) row = c.fetchone() ret = {'name': sample, 'count': row[0], 'avgt': row[1], 'maxt': row[2], 'mint': row[3], 'stddevt': row[4], 'medt': row[5], 'p10t': row[6], 'p90t': row[7], 'p95t': row[8], 'p98t': row[9], 'total': row[10], 'success': row[11], 'tput': row[0] / float(period), 'filename': str2id(sample), 'title': str2id(sample | truncate(20))} ret['error'] = int(ret['count'] - ret['success']) ret['success_rate'] = 100. if ret['count'] > 0: ret['success_rate'] = (100. * ret['success']) / ret['count'] if total is None or not total: ret['percent'] = 100. else: ret['percent'] = ret['total'] * 100. / total if close_cursor: c.close() return ret
def incorporate_row(row, graph): general_election_id = ns_election['72'] # change after the next elections bsq = int(row[6]) division_id = next(graph.subjects(ns_property.dgeqDivisionId, Literal(bsq))) lastname = row[9] firstname = row[10] gender = 'female' if row[7] == 'F' else 'male' candidate_id = ns_candidate[str2id(lastname + '_' + firstname)] graph.add((candidate_id, FOAF.gender, Literal(gender))) query = """select distinct ?run ?party where { ?elecnode rdf:type type:ProvincialElection . ?elecnode prop:generalElection <%s> . ?elecnode prop:division <%s> . ?run prop:election ?elecnode . ?run prop:runningCandidate <%s> . ?run prop:runningFor ?party . } """ % (general_election_id, division_id, candidate_id) qres = graph.query(query, initNs={'rdf': RDF, 'prop': ns_property, 'type': ns_type}) assert len(qres) == 1 run_id, party_id = list(qres)[0] graph.add((run_id, ns_property.dgeqRunId, Literal(int(row[1])))) graph.add((run_id, ns_property.dateRegistered, Literal(row[11]))) graph.add((party_id, RDF.type, FOAF.Organization)) graph.add((party_id, FOAF.name, Literal(row[3]))) agent_name = row[5] try: lastname, firstname = agent_name.split(', ') agent_id = ns_agent[str2id(lastname + '_' + firstname)] save_person(graph, agent_id, firstname, lastname) graph.add((run_id, ns_property.registrationAgent, agent_id)) except ValueError: pass # seriously, those damn freaking messed up names... we don't care about them.
def get_graph_from_soup(soup, dgeqid): graph = Graph() # extract election year header = soup('h2', id='e')[0].get_text() election_year = str2int(re.findall(r"\d{4}", header)[0]) print(election_year) table = soup('table', class_='tableau')[0] rows = table('tr') current_division = None general_election_id = ns_election[dgeqid] graph.add((general_election_id, RDF.type, ns_type.ProvincialGeneralElection)) graph.add((general_election_id, ns_property.year, Literal(election_year))) for row in rows: if not row.td: # header row, skip continue if 'circonscription-precedante' in row.td.attrs.get('class', ()): # division name row name = row.td.get_text().strip() current_division = ns_division[str2id(name)] graph.add((current_division, RDF.type, ns_type.ProvincialDivision)) graph.add((current_division, DC.description, Literal(name))) first = True election_id = BNode() graph.add((election_id, RDF.type, ns_type.ProvincialElection)) graph.add((election_id, ns_property.generalElection, general_election_id)) graph.add((election_id, ns_property.division, current_division)) elif row.td.attrs.get('colspan') == '4': # summary row summary = row.td.get_text().strip() matches = re.findall(r"(?<=:)[\s\d]+", summary) graph.add((election_id, ns_property.validVoteCount, Literal(str2int(matches[0])))) graph.add((election_id, ns_property.invalidVoteCount, Literal(str2int(matches[1])))) graph.add((election_id, ns_property.totalVoteCount, Literal(str2int(matches[2])))) graph.add((election_id, ns_property.admissibleVoterCount, Literal(str2int(matches[3])))) else: # normal row cells = row.find_all('td') name = cells[0].get_text().strip() m = re.match(r"(.+?), (.+) \((.+)\)", name) lastname = m.group(1) firstname = m.group(2) party_id = str2party(m.group(3)) candidate_id = ns_candidate[str2id(lastname + '_' + firstname)] votes = str2int(cells[1].get_text()) graph.add((candidate_id, RDF.type, ns_type.ProvincialCandidate)) save_person(graph, candidate_id, firstname, lastname) run_id = BNode() graph.add((run_id, RDF.type, ns_type.ProvincialCandidateRun)) graph.add((run_id, ns_property.runningCandidate, candidate_id)) graph.add((run_id, ns_property.election, election_id)) graph.add((run_id, ns_property.runningFor, party_id)) graph.add((run_id, ns_property.voteCount, Literal(votes))) if first: graph.add((run_id, ns_property.won, Literal(True))) first = False return graph
def data_generator(self, X_train, Y_train, batch_size): # 数据生成器 X, Y = [], [] while True: for c, t in zip(X_train, Y_train): X.append(str2id(c, self.char2id)) Y.append(str2id(t, self.char2id, start_end=True)) # 只需给标题加开始和结尾 if len(X) == batch_size: X = np.array(self.padding(X)) Y = np.array(self.padding(Y)) yield [X, Y], None X, Y = [], []
def main(): graph = Graph() URL = 'http://www.electionsquebec.qc.ca/documents/donnees-ouvertes/Liste_circonscriptions.txt' with urlopen(URL) as fp: contents = fp.read().decode('utf-8') lines = contents.splitlines() for line in lines: div_id, div_name = line.split(';') current_division = ns_division[str2id(div_name)] graph.add((current_division, RDF.type, ns_type.ProvincialDivision)) graph.add((current_division, DC.description, Literal(div_name))) graph.add((current_division, ns_property.dgeqDivisionId, Literal(int(div_id)))) graph.serialize('division.rdf')
def predict(self, s, model_weights, topk=3, maxlen=64): model = self.network() model.load_weights(model_weights) # beam search解码 :每次只保留topk个最优候选结果;如果topk=1,那么就是贪心搜索 xid = np.array([str2id(s, self.char2id)] * topk) # 输入转id yid = np.array([[2]] * topk) # 解码均以<start>开头,这里<start>的id为2 scores = [0] * topk # 候选答案分数 for i in range(maxlen): # 强制要求输出不超过maxlen字 proba = model.predict([xid, yid])[:, i, 3:] # 直接忽略<padding>、<unk>、<start> log_proba = np.log(proba + 1e-6) # 取对数,方便计算 arg_topk = log_proba.argsort(axis=1)[:, -topk:] # 每一项选出topk _yid = [] # 暂存的候选目标序列 _scores = [] # 暂存的候选目标序列得分 if i == 0: for j in range(topk): _yid.append(list(yid[j]) + [arg_topk[0][j] + 3]) _scores.append(scores[j] + log_proba[0][arg_topk[0][j]]) else: for j in range(topk): for k in range(topk): # 遍历topk*topk的组合 _yid.append(list(yid[j]) + [arg_topk[j][k] + 3]) _scores.append(scores[j] + log_proba[j][arg_topk[j][k]]) _arg_topk = np.argsort(_scores)[-topk:] # 从中选出新的topk _yid = [_yid[k] for k in _arg_topk] _scores = [_scores[k] for k in _arg_topk] yid = np.array(_yid) scores = np.array(_scores) ends = np.where(yid[:, -1] == 3)[0] if len(ends) > 0: k = ends[scores[ends].argmax()] return id2str(yid[k], self.id2char) # 如果maxlen字都找不到<end>,直接返回 return id2str(yid[np.argmax(scores)], self.id2char)
def getId(self): return str2id("%s %s" % (self.name, self.build_number))
def fetch_build_from_file(self, url): dir_path = self.options.from_file build_id = str2id(' '.join([i for i in url.split('/') if i][-2:])) file_path = os.path.join(dir_path, build_id + '.txt') body = '\n'.join(open(file_path).readlines()) return body
def buildReport(self, bid): output_dir = self.options.output if not os.access(output_dir, os.W_OK): os.mkdir(output_dir, 0775) bencher = Bencher.getBencherForBid(self.db, self.options, bid) info = bencher.getInfo(bid) period = self.period def_period = int(ceil(info["duration"] / float(self.width))) * 30 bars = 4 plot_type = "linespoints" plot_type_avg = "lines" if period is None: if info["generator"].lower() == "funkload": cycle_duration = float(info["extra"]["duration"]) # 5 plots by cycle: period = int(ceil(cycle_duration / 4.0)) plot_type = "impulses" plot_type_avg = "points" else: period = def_period if 2 * period < def_period: bars = 2 params = { "dbpath": self.options.database, "output_dir": output_dir, "start": info["start"][11:19], "end": info["end"], "bid": bid, "ravg": self.options.runningavg, "width": self.width, "height": self.height, "period": period, "duration": info["duration"], "bars": bars, "plot_type": plot_type, "plot_type_avg": plot_type_avg, } for sample in [info["all_samples"]] + info["samples"]: name = sample["name"] data = bencher.getIntervalInfo(bid, info["start_stamp"], period, name) data_path = os.path.join(output_dir, str2id(name) + ".data") f = open(data_path, "w") for row in data: row = [i and str(i) or "0" for i in row] f.write(" ".join(row) + "\n") f.close() params["data"] = os.path.basename(data_path) params["filter"] = " AND lb = '%s' " % name params["title"] = "Sample: " + sample["title"] params["filename"] = str2id(name) if name.lower() == "all": params["filter"] = "" params["title"] = "All" script = render_template("sample-gplot.mako", **params) script_path = os.path.join(output_dir, str2id(name) + ".gplot") f = open(script_path, "w") f.write(script) f.close() gnuplot(script_path) sar = Sar(self.db, self.options) info.update(sar.getInfo(bid)) for host in info["sar"].keys(): params["host"] = host params["filter"] = " AND host = '%s'" % host script = render_template("sar-gplot.mako", **params) script_path = os.path.join(output_dir, "sar-%s.gplot" % host) script_path.replace(" ", "-") f = open(script_path, "w") f.write(script) f.close() gnuplot(script_path) report = render_template("report.mako", **info) rst_path = os.path.join(output_dir, "index.rst") f = open(rst_path, "w") f.write(report.encode("ascii", "ignore")) f.close() html_path = os.path.join(output_dir, "index.html") generate_html(rst_path, html_path, output_dir) logging.info("Report generated: " + html_path)
def buildReport(self, bid): output_dir = self.options.output if not os.access(output_dir, os.W_OK): os.mkdir(output_dir, 0775) bencher = Bencher.getBencherForBid(self.db, self.options, bid) info = bencher.getInfo(bid) period = self.period def_period = int(ceil(info['duration'] / float(self.width))) * 30 bars = 4 plot_type = 'linespoints' plot_type_avg = 'lines' if period is None: if info['generator'].lower() == 'funkload': cycle_duration = float(info['extra']['duration']) # 5 plots by cycle: period = int(ceil(cycle_duration / 4.)) plot_type = 'impulses' plot_type_avg = 'points' else: period = def_period if 2 * period < def_period: bars = 2 params = { 'dbpath': self.options.database, 'output_dir': output_dir, 'start': info['start'][11:19], 'end': info['end'], 'bid': bid, 'ravg': self.options.runningavg, 'width': self.width, 'height': self.height, 'period': period, 'duration': info['duration'], 'bars': bars, 'plot_type': plot_type, 'plot_type_avg': plot_type_avg } for sample in ([ info['all_samples'], ] + info['samples']): name = sample['name'] data = bencher.getIntervalInfo(bid, info['start_stamp'], period, name) data_path = os.path.join(output_dir, str2id(name) + ".data") f = open(data_path, 'w') for row in data: row = [i and str(i) or '0' for i in row] f.write(' '.join(row) + '\n') f.close() params['data'] = os.path.basename(data_path) params['filter'] = " AND lb = '%s' " % name params['title'] = "Sample: " + sample['title'] params['filename'] = str2id(name) if name.lower() == 'all': params['filter'] = '' params['title'] = "All" script = render_template('sample-gplot.mako', **params) script_path = os.path.join(output_dir, str2id(name) + ".gplot") f = open(script_path, 'w') f.write(script) f.close() gnuplot(script_path) sar = Sar(self.db, self.options) info.update(sar.getInfo(bid)) for host in info['sar'].keys(): params['host'] = host params['filter'] = " AND host = '%s'" % host script = render_template('sar-gplot.mako', **params) script_path = os.path.join(output_dir, "sar-%s.gplot" % host) script_path.replace(' ', '-') f = open(script_path, 'w') f.write(script) f.close() gnuplot(script_path) report = render_template('report.mako', **info) rst_path = os.path.join(output_dir, "index.rst") f = open(rst_path, 'w') f.write(report.encode('ascii', 'ignore')) f.close() html_path = os.path.join(output_dir, "index.html") generate_html(rst_path, html_path, output_dir) logging.info('Report generated: ' + html_path)