Example #1
0
 def getPeriodInfo(self, bid, start, period, sample, c=None, total=None):
     close_cursor = False
     if c is None:
         c = self.db.cursor()
         close_cursor = True
     query = self._get_period_info_query()
     t = [bid, start, start + period]
     if sample.lower() != 'all':
         t.append(sample)
         query += " AND lb = ?"
     # print "query: %s, var: %s" % (query, str(t))
     logging.debug(query + str(t))
     c.execute(query, t)
     row = c.fetchone()
     ret = {'name': sample, 'count': row[0],
            'avgt': row[1], 'maxt': row[2], 'mint': row[3],
            'stddevt': row[4], 'medt': row[5], 'p10t': row[6],
            'p90t': row[7], 'p95t': row[8], 'p98t': row[9],
            'total': row[10], 'success': row[11],
            'tput': row[0] / float(period),
            'filename': str2id(sample),
            'title': str2id(sample | truncate(20))}
     ret['error'] = int(ret['count'] - ret['success'])
     ret['success_rate'] = 100.
     if ret['count'] > 0:
         ret['success_rate'] = (100. * ret['success']) / ret['count']
     if total is None or not total:
         ret['percent'] = 100.
     else:
         ret['percent'] = ret['total'] * 100. / total
     if close_cursor:
         c.close()
     return ret
Example #2
0
def incorporate_row(row, graph):
    general_election_id = ns_election['72'] # change after the next elections
    bsq = int(row[6])
    division_id = next(graph.subjects(ns_property.dgeqDivisionId, Literal(bsq)))
    lastname = row[9]
    firstname = row[10]
    gender = 'female' if row[7] == 'F' else 'male'
    candidate_id = ns_candidate[str2id(lastname + '_' + firstname)]
    graph.add((candidate_id, FOAF.gender, Literal(gender)))
    query = """select distinct ?run ?party where {
        ?elecnode rdf:type type:ProvincialElection .
        ?elecnode prop:generalElection <%s> .
        ?elecnode prop:division <%s> .
        ?run prop:election ?elecnode .
        ?run prop:runningCandidate <%s> .
        ?run prop:runningFor ?party .
    }
    """ % (general_election_id, division_id, candidate_id)
    qres = graph.query(query, initNs={'rdf': RDF, 'prop': ns_property, 'type': ns_type})
    assert len(qres) == 1
    run_id, party_id = list(qres)[0]
    graph.add((run_id, ns_property.dgeqRunId, Literal(int(row[1]))))
    graph.add((run_id, ns_property.dateRegistered, Literal(row[11])))
    graph.add((party_id, RDF.type, FOAF.Organization))
    graph.add((party_id, FOAF.name, Literal(row[3])))
    agent_name = row[5]
    try:
        lastname, firstname = agent_name.split(', ')
        agent_id = ns_agent[str2id(lastname + '_' + firstname)]
        save_person(graph, agent_id, firstname, lastname)
        graph.add((run_id, ns_property.registrationAgent, agent_id))
    except ValueError:
        pass # seriously, those damn freaking messed up names... we don't care about them.
Example #3
0
 def getPeriodInfo(self, bid, start, period, sample, c=None, total=None):
     close_cursor = False
     if c is None:
         c = self.db.cursor()
         close_cursor = True
     query = self._get_period_info_query()
     t = [bid, start, start + period]
     if sample.lower() != 'all':
         t.append(sample)
         query += " AND lb = ?"
     # print "query: %s, var: %s" % (query, str(t))
     logging.debug(query + str(t))
     c.execute(query, t)
     row = c.fetchone()
     ret = {'name': sample, 'count': row[0],
            'avgt': row[1], 'maxt': row[2], 'mint': row[3],
            'stddevt': row[4], 'medt': row[5], 'p10t': row[6],
            'p90t': row[7], 'p95t': row[8], 'p98t': row[9],
            'total': row[10], 'success': row[11],
            'tput': row[0] / float(period),
            'filename': str2id(sample),
            'title': str2id(sample | truncate(20))}
     ret['error'] = int(ret['count'] - ret['success'])
     ret['success_rate'] = 100.
     if ret['count'] > 0:
         ret['success_rate'] = (100. * ret['success']) / ret['count']
     if total is None or not total:
         ret['percent'] = 100.
     else:
         ret['percent'] = ret['total'] * 100. / total
     if close_cursor:
         c.close()
     return ret
Example #4
0
def get_graph_from_soup(soup, dgeqid):
    graph = Graph()
    # extract election year
    header = soup('h2', id='e')[0].get_text()
    election_year = str2int(re.findall(r"\d{4}", header)[0])
    print(election_year)
    table = soup('table', class_='tableau')[0]
    rows = table('tr')
    current_division = None
    general_election_id = ns_election[dgeqid]
    graph.add((general_election_id, RDF.type, ns_type.ProvincialGeneralElection))
    graph.add((general_election_id, ns_property.year, Literal(election_year)))
    for row in rows:
        if not row.td: # header row, skip
            continue
        if 'circonscription-precedante' in row.td.attrs.get('class', ()): # division name row
            name = row.td.get_text().strip() 
            current_division = ns_division[str2id(name)]
            graph.add((current_division, RDF.type, ns_type.ProvincialDivision))
            graph.add((current_division, DC.description, Literal(name)))
            first = True
            election_id = BNode()
            graph.add((election_id, RDF.type, ns_type.ProvincialElection))
            graph.add((election_id, ns_property.generalElection, general_election_id))
            graph.add((election_id, ns_property.division, current_division))
        elif row.td.attrs.get('colspan') == '4': # summary row
            summary = row.td.get_text().strip()
            matches = re.findall(r"(?<=:)[\s\d]+", summary)
            graph.add((election_id, ns_property.validVoteCount, Literal(str2int(matches[0]))))
            graph.add((election_id, ns_property.invalidVoteCount, Literal(str2int(matches[1]))))
            graph.add((election_id, ns_property.totalVoteCount, Literal(str2int(matches[2]))))
            graph.add((election_id, ns_property.admissibleVoterCount, Literal(str2int(matches[3]))))
        else: # normal row
            cells = row.find_all('td')
            name = cells[0].get_text().strip()
            m = re.match(r"(.+?), (.+) \((.+)\)", name)
            lastname = m.group(1)
            firstname = m.group(2)
            party_id = str2party(m.group(3))
            candidate_id = ns_candidate[str2id(lastname + '_' + firstname)]
            votes = str2int(cells[1].get_text())
            graph.add((candidate_id, RDF.type, ns_type.ProvincialCandidate))
            save_person(graph, candidate_id, firstname, lastname)
            run_id = BNode()
            graph.add((run_id, RDF.type, ns_type.ProvincialCandidateRun))
            graph.add((run_id, ns_property.runningCandidate, candidate_id))
            graph.add((run_id, ns_property.election, election_id))
            graph.add((run_id, ns_property.runningFor, party_id))
            graph.add((run_id, ns_property.voteCount, Literal(votes)))
            if first:
                graph.add((run_id, ns_property.won, Literal(True)))
                first = False
    return graph
Example #5
0
 def data_generator(self, X_train, Y_train, batch_size):
     # 数据生成器
     X, Y = [], []
     while True:
         for c, t in zip(X_train, Y_train):
             X.append(str2id(c, self.char2id))
             Y.append(str2id(t, self.char2id,
                             start_end=True))  # 只需给标题加开始和结尾
             if len(X) == batch_size:
                 X = np.array(self.padding(X))
                 Y = np.array(self.padding(Y))
                 yield [X, Y], None
                 X, Y = [], []
Example #6
0
def main():
    graph = Graph()
    URL = 'http://www.electionsquebec.qc.ca/documents/donnees-ouvertes/Liste_circonscriptions.txt'
    with urlopen(URL) as fp:
        contents = fp.read().decode('utf-8')
    lines = contents.splitlines()
    for line in lines:
        div_id, div_name = line.split(';')
        current_division = ns_division[str2id(div_name)]
        graph.add((current_division, RDF.type, ns_type.ProvincialDivision))
        graph.add((current_division, DC.description, Literal(div_name)))
        graph.add((current_division, ns_property.dgeqDivisionId, Literal(int(div_id))))
    graph.serialize('division.rdf')
Example #7
0
    def predict(self, s, model_weights, topk=3, maxlen=64):
        model = self.network()
        model.load_weights(model_weights)
        # beam search解码 :每次只保留topk个最优候选结果;如果topk=1,那么就是贪心搜索

        xid = np.array([str2id(s, self.char2id)] * topk)  # 输入转id
        yid = np.array([[2]] * topk)  # 解码均以<start>开头,这里<start>的id为2
        scores = [0] * topk  # 候选答案分数
        for i in range(maxlen):  # 强制要求输出不超过maxlen字
            proba = model.predict([xid,
                                   yid])[:, i,
                                         3:]  # 直接忽略<padding>、<unk>、<start>
            log_proba = np.log(proba + 1e-6)  # 取对数,方便计算
            arg_topk = log_proba.argsort(axis=1)[:, -topk:]  # 每一项选出topk
            _yid = []  # 暂存的候选目标序列
            _scores = []  # 暂存的候选目标序列得分
            if i == 0:
                for j in range(topk):
                    _yid.append(list(yid[j]) + [arg_topk[0][j] + 3])
                    _scores.append(scores[j] + log_proba[0][arg_topk[0][j]])
            else:
                for j in range(topk):
                    for k in range(topk):  # 遍历topk*topk的组合
                        _yid.append(list(yid[j]) + [arg_topk[j][k] + 3])
                        _scores.append(scores[j] +
                                       log_proba[j][arg_topk[j][k]])
                _arg_topk = np.argsort(_scores)[-topk:]  # 从中选出新的topk
                _yid = [_yid[k] for k in _arg_topk]
                _scores = [_scores[k] for k in _arg_topk]
            yid = np.array(_yid)
            scores = np.array(_scores)
            ends = np.where(yid[:, -1] == 3)[0]
            if len(ends) > 0:
                k = ends[scores[ends].argmax()]
                return id2str(yid[k], self.id2char)
        # 如果maxlen字都找不到<end>,直接返回
        return id2str(yid[np.argmax(scores)], self.id2char)
Example #8
0
 def getId(self):
     return str2id("%s %s" % (self.name, self.build_number))
Example #9
0
 def fetch_build_from_file(self, url):
     dir_path = self.options.from_file
     build_id = str2id(' '.join([i for i in url.split('/') if i][-2:]))
     file_path = os.path.join(dir_path, build_id + '.txt')
     body = '\n'.join(open(file_path).readlines())
     return body
Example #10
0
    def buildReport(self, bid):
        output_dir = self.options.output
        if not os.access(output_dir, os.W_OK):
            os.mkdir(output_dir, 0775)
        bencher = Bencher.getBencherForBid(self.db, self.options, bid)
        info = bencher.getInfo(bid)
        period = self.period
        def_period = int(ceil(info["duration"] / float(self.width))) * 30
        bars = 4
        plot_type = "linespoints"
        plot_type_avg = "lines"
        if period is None:
            if info["generator"].lower() == "funkload":
                cycle_duration = float(info["extra"]["duration"])
                # 5 plots by cycle:
                period = int(ceil(cycle_duration / 4.0))
                plot_type = "impulses"
                plot_type_avg = "points"
            else:
                period = def_period
        if 2 * period < def_period:
            bars = 2
        params = {
            "dbpath": self.options.database,
            "output_dir": output_dir,
            "start": info["start"][11:19],
            "end": info["end"],
            "bid": bid,
            "ravg": self.options.runningavg,
            "width": self.width,
            "height": self.height,
            "period": period,
            "duration": info["duration"],
            "bars": bars,
            "plot_type": plot_type,
            "plot_type_avg": plot_type_avg,
        }

        for sample in [info["all_samples"]] + info["samples"]:
            name = sample["name"]
            data = bencher.getIntervalInfo(bid, info["start_stamp"], period, name)
            data_path = os.path.join(output_dir, str2id(name) + ".data")
            f = open(data_path, "w")
            for row in data:
                row = [i and str(i) or "0" for i in row]
                f.write(" ".join(row) + "\n")
            f.close()
            params["data"] = os.path.basename(data_path)
            params["filter"] = " AND lb = '%s' " % name
            params["title"] = "Sample: " + sample["title"]
            params["filename"] = str2id(name)
            if name.lower() == "all":
                params["filter"] = ""
                params["title"] = "All"
            script = render_template("sample-gplot.mako", **params)
            script_path = os.path.join(output_dir, str2id(name) + ".gplot")
            f = open(script_path, "w")
            f.write(script)
            f.close()
            gnuplot(script_path)

        sar = Sar(self.db, self.options)
        info.update(sar.getInfo(bid))
        for host in info["sar"].keys():
            params["host"] = host
            params["filter"] = " AND host = '%s'" % host
            script = render_template("sar-gplot.mako", **params)
            script_path = os.path.join(output_dir, "sar-%s.gplot" % host)
            script_path.replace(" ", "-")
            f = open(script_path, "w")
            f.write(script)
            f.close()
            gnuplot(script_path)

        report = render_template("report.mako", **info)
        rst_path = os.path.join(output_dir, "index.rst")
        f = open(rst_path, "w")
        f.write(report.encode("ascii", "ignore"))
        f.close()
        html_path = os.path.join(output_dir, "index.html")
        generate_html(rst_path, html_path, output_dir)
        logging.info("Report generated: " + html_path)
Example #11
0
    def buildReport(self, bid):
        output_dir = self.options.output
        if not os.access(output_dir, os.W_OK):
            os.mkdir(output_dir, 0775)
        bencher = Bencher.getBencherForBid(self.db, self.options, bid)
        info = bencher.getInfo(bid)
        period = self.period
        def_period = int(ceil(info['duration'] / float(self.width))) * 30
        bars = 4
        plot_type = 'linespoints'
        plot_type_avg = 'lines'
        if period is None:
            if info['generator'].lower() == 'funkload':
                cycle_duration = float(info['extra']['duration'])
                # 5 plots by cycle:
                period = int(ceil(cycle_duration / 4.))
                plot_type = 'impulses'
                plot_type_avg = 'points'
            else:
                period = def_period
        if 2 * period < def_period:
            bars = 2
        params = {
            'dbpath': self.options.database,
            'output_dir': output_dir,
            'start': info['start'][11:19],
            'end': info['end'],
            'bid': bid,
            'ravg': self.options.runningavg,
            'width': self.width,
            'height': self.height,
            'period': period,
            'duration': info['duration'],
            'bars': bars,
            'plot_type': plot_type,
            'plot_type_avg': plot_type_avg
        }

        for sample in ([
                info['all_samples'],
        ] + info['samples']):
            name = sample['name']
            data = bencher.getIntervalInfo(bid, info['start_stamp'], period,
                                           name)
            data_path = os.path.join(output_dir, str2id(name) + ".data")
            f = open(data_path, 'w')
            for row in data:
                row = [i and str(i) or '0' for i in row]
                f.write(' '.join(row) + '\n')
            f.close()
            params['data'] = os.path.basename(data_path)
            params['filter'] = " AND lb = '%s' " % name
            params['title'] = "Sample: " + sample['title']
            params['filename'] = str2id(name)
            if name.lower() == 'all':
                params['filter'] = ''
                params['title'] = "All"
            script = render_template('sample-gplot.mako', **params)
            script_path = os.path.join(output_dir, str2id(name) + ".gplot")
            f = open(script_path, 'w')
            f.write(script)
            f.close()
            gnuplot(script_path)

        sar = Sar(self.db, self.options)
        info.update(sar.getInfo(bid))
        for host in info['sar'].keys():
            params['host'] = host
            params['filter'] = " AND host = '%s'" % host
            script = render_template('sar-gplot.mako', **params)
            script_path = os.path.join(output_dir, "sar-%s.gplot" % host)
            script_path.replace(' ', '-')
            f = open(script_path, 'w')
            f.write(script)
            f.close()
            gnuplot(script_path)

        report = render_template('report.mako', **info)
        rst_path = os.path.join(output_dir, "index.rst")
        f = open(rst_path, 'w')
        f.write(report.encode('ascii', 'ignore'))
        f.close()
        html_path = os.path.join(output_dir, "index.html")
        generate_html(rst_path, html_path, output_dir)
        logging.info('Report generated: ' + html_path)