Ejemplo n.º 1
0
    def display_access(self, message: Dict):
        """
        Parses an access token from a message and displays them to the user.

        Args:
            message: The message to parse

        Raises:
            KeyError: If the message doesn't contain the "token" and "id" keys

        """

        try:
            self.access_token = message["token"]
            self.model_id = message["id"]

            log.info("Successfully authenticated with the Sybl system")
            # log.info(f"\tACCESS TOKEN: {self.access_token}")
            # log.info(f"\tMODEL ID: {self.model_id}")

            log.info(
                "Please go to https://sybl.tech/models to unlock your new model"
            )
        except KeyError:
            log.error(f"Expected 'token' and 'id' keys but got data={message}")
        finally:

            self.stream.close()
Ejemplo n.º 2
0
    def authenticate_challenge(self, message: Dict[Any, Any]):
        """
        Authenticates a challenge message and responds to the requestor.

        Args:
            challenge: The challenge message itself

        Raises:
            KeyError: If the user does not have their private key in their
            environment

        """

        challenge = message["challenge"]
        log.info("Authenticating a challenge from the server")

        try:
            signed_challenge = sign_challenge(base64.b64decode(challenge),
                                              self.private_key)

            message = {
                "ChallengeResponse": {
                    "email": self.email,
                    "model_name": self.model_name,
                    "response":
                    base64.b64encode(signed_challenge).decode("utf-8"),
                }
            }

            self._send_message(message)
        except KeyError:
            log.error("Failed to find the private key in the environment")
def get_dates(leg, sess):
    entries = []
    try:
        browser.visit(URL_FORMATTER % (leg, sess))
    except splinter.request_handler.status_code.HttpResponseError:
        return entries

    soup = BeautifulSoup(browser.html, 'html.parser')
    rows = soup.find('div', id="painelNumeros").find_all('tr')
    for row in rows:
        cols = row.find_all('td')
        if not cols:
            continue
        entry = OrderedDict()
        entry['leg'] = leg
        entry['sess'] = sess
        try:
            num = entry['num'] = int(cols[0].text.strip().split(" ")[-1])
        except ValueError:
            num = entry['num'] = cols[0].text.strip().split(" ")[-1]
        entry['date'] = cols[1].text.strip()
        entry['pages'] = int(cols[2].text.strip())
        entry['democratica_url'] = "http://demo.cratica.org/sessoes/%d/%d/%s/" % (leg, sess, str(num))
        entry['debates_url'] = "http://debates.parlamento.pt" + cols[0].find("a")['href']
        entries.append(entry)
    log.info("Parsed %d entries!" % len(entries))
    return entries
def get_dates(leg, sess):
    browser.visit(BASE_URL)
    browser.select('ctl00$ContentPlaceHolder1$IndexDiaries1$rpSearch$ctl00$ddlLegislature', 'l%02d' % leg)
    browser.select('ctl00$ContentPlaceHolder1$IndexDiaries1$rpSearch$ctl00$ddlSession', 'sl%d' % sess)
    browser.find_by_name("ctl00$ContentPlaceHolder1$IndexDiaries1$bttSearch").first.click()
    entries = []
    if browser.is_text_present(u"Não foram encontrados diários", wait_time=7):
        # esta combinação legislatura/sessão não existe
        return entries

    soup = BeautifulSoup(browser.html)
    rows = soup.find_all('tr', attrs={"class": ["resultseven", "resultsodd"]})
    for row in rows:
        cols = row.find_all('td')
        entry = OrderedDict()
        entry['leg'] = leg
        entry['sess'] = sess
        try:
            entry['num'] = int(cols[0].find("a").text)
        except ValueError:
            entry['num'] = cols[0].find("a").text
        entry['date'] = cols[2].text
        entry['pub_date'] = cols[1].text
        entry['page_start'] = int(cols[3].text.split('-')[0])
        entry['page_end'] = int(cols[3].text.split('-')[1])
        entries.append(entry)
    log.info("Parsed %d entries!" % len(entries))
    return entries
Ejemplo n.º 5
0
def get_dates(leg, sess):
    browser.visit(BASE_URL)
    browser.select(
        'ctl00$ContentPlaceHolder1$IndexDiaries1$rpSearch$ctl00$ddlLegislature',
        'l%02d' % leg)
    browser.select(
        'ctl00$ContentPlaceHolder1$IndexDiaries1$rpSearch$ctl00$ddlSession',
        'sl%d' % sess)
    browser.find_by_name(
        "ctl00$ContentPlaceHolder1$IndexDiaries1$bttSearch").first.click()
    entries = []
    if browser.is_text_present(u"Não foram encontrados diários", wait_time=7):
        # esta combinação legislatura/sessão não existe
        return entries

    soup = BeautifulSoup(browser.html)
    rows = soup.find_all('tr', attrs={"class": ["resultseven", "resultsodd"]})
    for row in rows:
        cols = row.find_all('td')
        entry = OrderedDict()
        entry['leg'] = leg
        entry['sess'] = sess
        try:
            entry['num'] = int(cols[0].find("a").text)
        except ValueError:
            entry['num'] = cols[0].find("a").text
        entry['date'] = cols[2].text
        entry['pub_date'] = cols[1].text
        entry['page_start'] = int(cols[3].text.split('-')[0])
        entry['page_end'] = int(cols[3].text.split('-')[1])
        entries.append(entry)
    log.info("Parsed %d entries!" % len(entries))
    return entries
Ejemplo n.º 6
0
def main():
    parser = argparse.ArgumentParser(description='Cert generation tool', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--working', help='certificates working directory (all files will be created inside)', default='cert')
    parser.add_argument('--package', help='certificates package directory (all required server files will be copied inside)')
    parser.add_argument('--package-mode', help='contents of the package', choices=('all', 'node'), default='all')
    parser.add_argument('--ca', help='path to key PEM file that will be used as a CA key', default='ca.key.pem')
    parser.add_argument('--name-ca', help='use provided name as CA CN (common name) - suggested: account name')
    parser.add_argument('--name-node', help='use provided name as node CN (common name) - suggested: node name, host or ip')
    parser.add_argument('--network', help='network to use when using autogenerated ca names', default='mainnet')
    parser.add_argument('--force', help='overwrite output directory if it already exists', action='store_true')
    args = parser.parse_args()

    check_openssl_version()

    # obtain full paths prior to switching directory
    ca_path = Path(args.ca).absolute()

    if args.package:
        package_path = Path(args.package).absolute()
        prepare_directory(args.package, args.force)

    os.chdir(prepare_directory(args.working, args.force))

    log.info('preparing configuration files')
    if args.name_ca:
        ca_cn = args.name_ca
    elif '.pem' == ca_path.suffix:
        main_private_key = PrivateKeyStorage(ca_path.parent).load(ca_path.stem)
        main_public_key = KeyPair(main_private_key).public_key

        network = NetworkLocator.find_by_name(Network.NETWORKS, args.network)
        main_address = network.public_key_to_address(main_public_key)
        ca_cn = str(main_address)
    else:
        ca_cn = get_common_name(args.name_ca, 'CA common name')

    node_cn = get_common_name(args.name_node, 'node common name')

    log.info(f' *   CA common name: {ca_cn}')
    log.info(f' * Node common name: {node_cn}')

    prepare_ca_config(ca_path, ca_cn)
    prepare_node_config(node_cn)

    openssl_prepare_keys(ca_path)
    openssl_prepare_certs(ca_path)

    log.info(f'certificates generated in {args.working} directory')

    if args.package:
        package_filenames = ['node.crt.pem', 'node.key.pem']
        if 'all' == args.package_mode:
            package_filenames += ['ca.pubkey.pem', 'ca.crt.pem']

        for filename in package_filenames:
            destination_path = package_path / filename
            shutil.copyfile(filename, destination_path)
            os.chmod(destination_path, 0o400)

        log.info(f'certificates packaged in {args.package} directory')
def get_dates(leg, sess):
    entries = []
    try:
        browser.visit(URL_FORMATTER % (leg, sess))
    except splinter.request_handler.status_code.HttpResponseError:
        return entries

    soup = BeautifulSoup(browser.html)
    rows = soup.find('div', id="painelNumeros").find_all('tr')
    for row in rows:
        cols = row.find_all('td')
        if not cols:
            continue
        entry = OrderedDict()
        entry['leg'] = leg
        entry['sess'] = sess
        try:
            num = entry['num'] = int(cols[0].text.strip().split(" ")[-1])
        except ValueError:
            num = entry['num'] = cols[0].text.strip().split(" ")[-1]
        entry['date'] = cols[1].text.strip()
        entry['pages'] = int(cols[2].text.strip())
        entry[
            'democratica_url'] = "http://demo.cratica.org/sessoes/%d/%d/%s/" % (
                leg, sess, str(num))
        entry['debates_url'] = "http://debates.parlamento.pt" + cols[0].find(
            "a")['href']
        entries.append(entry)
    log.info("Parsed %d entries!" % len(entries))
    return entries
Ejemplo n.º 8
0
def main(format, start, end, verbose, outfile, separate, indent, clear_cache, processes):
    if not outfile and format == "csv":
        outfile = "iniciativas.csv"
    elif not outfile and format == "json":
        outfile = "iniciativas.json"
    if clear_cache:
        log.info("Clearing old cache...")
        shutil.rmtree("cache/")

    scrape(format, start, end, verbose, outfile, separate, indent, processes)
 def get_db(view):
     db = getattr(g, '_database', None)
     if db is None:
         log.error("creating for the first time")
         couch = couchdb.Server(app.config['COUCHDB_SERVER'])
         couch.resource.credentials = (app.config['COUCHDB_USER'], app.config['COUCHDB_PASSWORD'])
         db = g._database = couch[app.config['COUCHDB_DATABASE']]
     else:
         log.info('nothing to do here, flies away')
     return db
def main(format, start, end, verbose, outfile, separate, indent, clear_cache, processes):
    if not outfile and format == "csv":
        outfile = "iniciativas.csv"
    elif not outfile and format == "json":
        outfile = "iniciativas.json"
    if clear_cache:
        log.info("Clearing old cache...")
        shutil.rmtree("cache/")

    scrape(format, start, end, verbose, outfile, separate, indent, processes)
    def check_warning_lights_on(self):
        warning_light = self.topics.get("*/track/0/warning_light/0")

        if warning_light.payload == "1":
            log.info("Track sequence OK. Warning lights are on.")
            return True
        else:
            log.error(
                "Track Sequence ERROR! Payload not matching expected state: Warning lights are not on!"
            )
            return False
Ejemplo n.º 12
0
    def save_values_in_file(self, data_list, min_value, min_names, boltz_list,
                            boltz_final):

        time = datetime.datetime.now()
        file_name = ('min_energy_table-{}.out'.format(
            time.strftime("%d-%m-%y_%H:%M")))
        x = PrettyTable()
        x.field_names = ['Molecule Name', 'Energy', '% in equilibrium']
        energy_list = [molecule['energy'] for molecule in data_list]
        name_list = [molecule['name'] for molecule in data_list]

        # Only leaves one energy minimum in energy_list
        no1_count = 0
        for count, number in enumerate(energy_list):
            if number == 0 and no1_count > 0:
                del boltz_list[count]
                no1_count += 1
                deleted_index = count
            elif number == 0:
                no1_count += 1

        result_dict_list = []
        for name in name_list:
            molecule_dict = {}
            molecule_dict['name'] = name
            result_dict_list.append(molecule_dict)

        count_min = 0
        for molecule in result_dict_list:
            for energy in energy_list:
                molecule['energy'] = energy
                energy_list.remove(energy)
                count_min += 1
                break

        boltz_final.insert(deleted_index, 0.0)
        for molecule in result_dict_list:
            for percent in boltz_final:
                molecule['percent'] = percent
                boltz_final.remove(percent)
                break

        for molecule in result_dict_list:
            x.add_row(
                [molecule['name'], molecule['energy'], molecule['percent']])

        table_title = ('Relative energies in kcal/mol')
        print(x.get_string(title=table_title))
        with open(file_name, 'w+') as f:
            f.write('Minimum energy: {}\n'.format(min_value))
            f.write(str(x.get_string(title=table_title)))
        log.info('Data correctly saved as \'{}\' in \'{}\''.format(
            file_name, os.getcwd()))
        return str(x)
Ejemplo n.º 13
0
def test_output():
    # All of these just need to output without errors.
    from zenlog import log
    log.debug("A quirky message only developers care about")
    log.info("Curious users might want to know this")
    log.warn("Something is wrong and any user should be informed")
    log.warning("Something is wrong and any user should be informed")
    log.error("Serious stuff, this is red for a reason")
    log.critical("OH NO everything is on fire")
    log.c("OH NO everything is on fire")
    log.crit("OH NO everything is on fire")
Ejemplo n.º 14
0
def test_output():
    # All of these just need to output without errors.
    from zenlog import log
    log.debug("A quirky message only developers care about")
    log.info("Curious users might want to know this")
    log.warn("Something is wrong and any user should be informed")
    log.warning("Something is wrong and any user should be informed")
    log.error("Serious stuff, this is red for a reason")
    log.critical("OH NO everything is on fire")
    log.c("OH NO everything is on fire")
    log.crit("OH NO everything is on fire")
Ejemplo n.º 15
0
    def _connect_to_sock(self):
        """
        Connects to the DCL for communications.
        """
        try:
            self._sock.connect(self._address)
        except ConnectionRefusedError:
            log.error(f"Could not connect to address: {self._address[0]}")
            sys.exit(1)

        log.info(f"Successfully connected to {self._address}")
def get_dates(leg, sess):
    browser.visit(BASE_URL)

    leg_box = browser.find_by_id('ctl00_ctl43_g_322eea22_ecb3_49d3_aa7c_3a66576bec2e_ctl00_ddlLegislatura').first
    options = leg_box.find_by_tag('option')
    idx = LAST_LEG - leg
    browser.select("ctl00$ctl43$g_322eea22_ecb3_49d3_aa7c_3a66576bec2e$ctl00$ddlLegislatura", options[idx].value)
    print("selected")
    sleep(2)
    if browser.is_text_not_present("A carregar", wait_time=10):
        print("Loaded!")
        pass
    else:
        assert False

    sess_box = browser.find_by_id('ctl00_ctl43_g_322eea22_ecb3_49d3_aa7c_3a66576bec2e_ctl00_ddlSessaoLegislativa').first
    options = sess_box.find_by_tag('option')
    idx = len(options) - sess
    browser.select('ctl00$ctl43$g_322eea22_ecb3_49d3_aa7c_3a66576bec2e$ctl00$ddlSessaoLegislativa', options[idx].value)
    print("selected")
    sleep(2)
    if browser.is_text_not_present("A carregar", wait_time=10):
        print("Loaded!")
        pass
    else:
        assert False

    entries = []
    if browser.is_text_present(u"Não foram encontrados diários", wait_time=7):
        # esta combinação legislatura/sessão não existe
        return entries

    soup = BeautifulSoup(browser.html, 'html.parser')
    rows = soup.find_all('tr', attrs={"class": ["ARTabResultadosLinhaImpar", "ARTabResultadosLinhaPar"]})
    rows.reverse()
    for row in rows:
        cols = row.find_all('td')
        entry = OrderedDict()
        entry['leg'] = leg
        entry['sess'] = sess
        try:
            entry['num'] = int(cols[0].find("a").text.split(" ")[-1])
        except ValueError:
            entry['num'] = cols[0].find("a").text.split(" ")[-1]
        if entry['num'] in ("Z", "Sumários"):
            # Sumários
            continue
        entry['date'] = cols[1].text.strip()
        entry['pub_date'] = cols[1].text.strip()
        entry['published_date'] = cols[2].text.strip()
        entry['notes'] = cols[3].text.strip()
        entries.append(entry)
    log.info("Parsed %d entries!" % len(entries))
    return entries
def create_api(packages):
    '''Generates a static API containing all the datapackage.json of the containing datasets.
    Accepts a list of pkg_info dicts, which are generated with the
    process_datapackage function.'''
    all_metadata = []
    for pkg_info in packages:
        pkg_dir = os.path.join(repo_dir, pkg_info['name'])
        all_metadata.append(json.loads(open(os.path.join(pkg_dir, "datapackage.json")).read()))
    with open(os.path.join(output_dir, 'api.json'), 'w') as api_file:
        json.dump(all_metadata, api_file)
    log.info("Created api.json.")
Ejemplo n.º 18
0
    def __set__(self, instance, epoint):
        logger.info('Setting {} = {}'.format(instance, epoint))
        entrypoint = pkg.EntryPoint.parse("{}={}".format(
                self.name, epoint
        ))

        try:
            setattr(instance, self.name, entrypoint.resolve())            
        except ImportError as e:
            raise AttributeError(
                "EntryPoint %s(%s) does not exist" % (self.name, epoint)
            )
    def check_train_passed(self):
        train_light_east = self.topics.get("*/track/0/train_light/0")
        train_light_west = self.topics.get("*/track/0/train_light/1")

        if train_light_east.payload == "0" and train_light_west.payload == "0":
            log.info("Track sequence OK. Train lights are red.")
            return True
        else:
            log.error(
                "Track Sequence ERROR! Payload not matching expected state: Train lights are still green!"
            )
            return False
Ejemplo n.º 20
0
    def feed_topic(self):
        for topic in self.topics:
            log.info("Writing to topic: " + str(topic) + " with value: " +
                     str(self.enum_index.value))
            self.client.publish(topic, self.enum_index.value)

        next_exec = self.determine_interval()
        threading.Timer(next_exec, self.feed_topic).start()
        self.set_next_value()

        log.info("Setting traffic light to " + str(self.enum_index) + " in " +
                 str(next_exec) + " seconds")
def index():
    db = CouchdbUtils().get_db()
    map_fun = '''function(doc) {
        if(doc.type=="post"){
            emit(doc.type, doc);
        }
    }'''
    results = db.query(map_fun)
    docs = []
    for body in results:
        docs.append(body.value)
    log.info("length:"+len(docs).__str__())
    return render_template('index.html', documents=docs)
def save_json_cache(data: Any,
                    file_name: Union[Path, str],
                    base_path: Union[None, Path, str] = None):
    if base_path is None:
        base_path = Path(__file__).parent.joinpath('data', 'cache')
    else:
        base_path = Path(base_path)
    base_path.mkdir(parents=True, exist_ok=True)

    path_to_write = base_path.joinpath(file_name)
    with path_to_write.open(mode='w+') as fp:
        log.info(f'Writing file {path_to_write}')
        json.dump(data, fp, sort_keys=True, separators=(',', ':'))
Ejemplo n.º 23
0
def create_index_page(packages):
    '''Generates the index page with the list of available packages.
    Accepts a list of pkg_info dicts, which are generated with the
    process_datapackage function.'''
    template = env.get_template("list.html")
    target = "index.html"
    datapackages = [p['name'] for p in packages]
    welcome_text = markdown.markdown(codecs.open("content/welcome_text.md", 'r', 'utf-8').read(), output_format="html5", encoding="UTF-8")
    contents = template.render(datapackages=packages, welcome_text=welcome_text)
    f = codecs.open(os.path.join(output_dir, target), 'w', 'utf-8')
    f.write(contents)
    f.close()
    log.info("Created index.html.")
def getpage(url):
    if not os.path.exists('cache'):
        log.info('Creating new cache/ folder.')
        os.mkdir('cache')
    url_hash = hash(url)
    cache_file = 'cache/' + url_hash

    if os.path.exists(cache_file):
        # log.debug("Cache hit for %s" % url)
        page = file_get_contents(cache_file)
    else:
        # log.debug("Cache miss for %s" % url)
        page = urllib.urlopen(url).read()
        file_put_contents(cache_file, page, utf8=True)
    return page
Ejemplo n.º 25
0
def getpage(url):
    if not os.path.exists('cache'):
        log.info('Creating new cache/ folder.')
        os.mkdir('cache')
    url_hash = hash(url)
    cache_file = 'cache/' + url_hash

    if os.path.exists(cache_file):
        log.debug("Cache hit for %s" % url)
        page = file_get_contents(cache_file)
    else:
        log.debug("Cache miss for %s" % url)
        page = urllib.urlopen(url).read()
        file_put_contents(cache_file, page, utf8=True)
    return page
Ejemplo n.º 26
0
def trace_vars(vars):
    if isinstance(vars, dict):
        for k, v in vars.iteritems():
            logger.info("Trace argument key: %s" % k)
            trace_vars(v)
    elif isinstance(vars, list) or isinstance(vars, tuple):
        for v in vars:
            logger.info("Trace argument")
            trace_vars(v)
    elif isinstance(vars, basestring):
        logger.info("Value: %s" % vars)

    else:
        logger.info("Unknown type")
        logger.info("Value: {}".format(vars))
Ejemplo n.º 27
0
def impute_and_scale(x):
    if np.count_nonzero(np.isnan(x)) > 0:
        log.info('Imputing NaN values')
        si = SimpleImputer(missing_values=np.nan, strategy='mean')
        x = si.fit_transform(x)

    if np.count_nonzero(np.isnan(x)) > 0:
        si = SimpleImputer(missing_values=np.nan,
                           strategy='constant',
                           fill_value=0)
        x = si.fit_transform(x)

    sc = StandardScaler().fit(x)
    x = sc.transform(x)
    return x, sc
Ejemplo n.º 28
0
    def __call__(self, *args, **kwargs):
        logger.info('Starting timing for %s' % self.id)
        self.timer = 0
        self.start_time = self.timer
        try:
            self.result = self.run(*args, **kwargs)

            return self.result
        except Exception as e:
            logger.critical('Critical ({})'.format(e))
            raise
        finally:
            self.end_time = self.timer
            logger.info('Stopping timer. Performance = %d' %
                        ((self.end_time - self.start_time) * 1.000))
Ejemplo n.º 29
0
def process_dep(i):
    log.debug("Trying ID %d..." % i)

    url = FORMATTER_URL_IL % i
    soup = BeautifulSoup(getpage(url), "lxml")
    title = soup.find('span', id=RE_TITLE)
    if title:
        summary = soup.find('span', id=RE_SUMMARY)
        doc_url = soup.find('a', id=RE_DOCLINK)
        pdf_url = soup.find('a', id=RE_PDFLINK)
        eventdates = soup.findAll('span', id=RE_EVENTDATE)
        eventtypes = soup.findAll('span', id=RE_EVENTTYPE)
        eventinfos = soup.findAll('div', id=RE_EVENTINFO)
        dist_date = soup.find('span', id=RE_DISTDATE)
        authors = soup.findAll('a', id=RE_AUTHOR)
        parlgroup = soup.find('span', id=RE_PARLGROUP)

        row = {'title': title.text,
               'summary': summary.text,
               'id': i,
               'url': url,
               'authors': [a.text for a in authors]}

        if doc_url:
            row['doc_url'] = doc_url['href']
        if pdf_url:
            row['pdf_url'] = pdf_url['href']
        if dist_date:
            row['dist_date'] = dist_date.text
        if parlgroup:
            row['parlgroup'] = parlgroup.text

        for index, eventdate in enumerate(eventdates):
            event = {'date': eventdate.text}
            event['type'] = eventtypes[index].text.strip()
            info = eventinfos[index]
            if info.text:
                # TODO: Processar esta informação
                event = parse_event_info(event, info)
            if not row.get('events'):
                row['events'] = []
            row['events'].append(event)

        log.info("Scraped initiative: %s" % title.text)

        return row
    else:
        return None
def process_dep(i):
    log.debug("Trying ID %d..." % i)

    url = FORMATTER_URL_IL % i
    soup = BeautifulSoup(getpage(url), "lxml")
    title = soup.find('span', id=RE_TITLE)
    if title:
        summary = soup.find('span', id=RE_SUMMARY)
        doc_url = soup.find('a', id=RE_DOCLINK)
        pdf_url = soup.find('a', id=RE_PDFLINK)
        eventdates = soup.findAll('span', id=RE_EVENTDATE)
        eventtypes = soup.findAll('span', id=RE_EVENTTYPE)
        eventinfos = soup.findAll('div', id=RE_EVENTINFO)
        dist_date = soup.find('span', id=RE_DISTDATE)
        authors = soup.findAll('a', id=RE_AUTHOR)
        parlgroup = soup.find('span', id=RE_PARLGROUP)

        row = {'title': title.text,
               'summary': summary.text,
               'id': i,
               'url': url,
               'authors': [a.text for a in authors]}

        if doc_url:
            row['doc_url'] = doc_url['href']
        if pdf_url:
            row['pdf_url'] = pdf_url['href']
        if dist_date:
            row['dist_date'] = dist_date.text
        if parlgroup:
            row['parlgroup'] = parlgroup.text

        for index, eventdate in enumerate(eventdates):
            event = {'date': eventdate.text}
            event['type'] = eventtypes[index].text.strip()
            info = eventinfos[index]
            if info.text:
                # TODO: Processar esta informação
                event = parse_event_info(event, info)
            if not row.get('events'):
                row['events'] = []
            row['events'].append(event)

        log.info("Scraped initiative: %s" % title.text)

        return row
    else:
        return None
Ejemplo n.º 31
0
def generate_voting_key_file(filepath, start_epoch, epoch_range):
    key_pair = KeyPair(PrivateKey.random())
    voting_keys_generator = VotingKeysGenerator(key_pair)
    end_epoch = start_epoch + epoch_range
    voting_key_buffer = voting_keys_generator.generate(start_epoch, end_epoch)
    log.info(f'voting key start epoch: {start_epoch}, end epoch: {end_epoch}')
    log.info(f'voting key root public key: {key_pair.public_key}')

    # create the file
    with open(filepath, 'wb') as output_file:
        pass

    os.chmod(filepath, stat.S_IRUSR + stat.S_IWUSR)

    with open(filepath, 'w+b') as output_file:
        output_file.write(voting_key_buffer)
Ejemplo n.º 32
0
def openssl_prepare_keys(ca_path):
    log.info('creating ca.pubkey.pem')
    run_openssl([
        'pkey',
        '-in', ca_path,
        '-out', 'ca.pubkey.pem',
        '-pubout'
    ])

    log.info('creating random node.key.pem')
    run_openssl([
        'genpkey',
        '-out', 'node.key.pem',
        '-outform', 'PEM',
        '-algorithm', 'ed25519'
    ])
Ejemplo n.º 33
0
    def _update_all(self):
        default_args = self.get_default_args()
        for manga in self.db.get_all():  # type Manga
            self.log() and log.info('Update %s', manga.url)
            _args = default_args.copy()
            data = json.loads(manga.data)
            data_args = data.get('args', {})
            del data_args['rewrite_exists_archives']
            del data_args['user_agent']
            del data_args['url']

            if not fs.is_dir(
                    fs.path_join(data_args['destination'], data_args['name'])):
                self.log() and log.warn('Destination not exists. Skip')
                continue

            _args.update({  # re-init args
                'url': manga.url,
                **data_args,
            })
            provider = self._get_provider(_args)
            if provider:
                provider = provider()  # type: Provider
                provider.before_provider(_args)
                provider.http.cookies = data.get('cookies')
                provider.http.ua = data.get('browser')
                provider.run(_args)
                provider.after_provider()
                provider.update_db()
                self.global_info.add_info(info)
    def move_voting_key_file(self):
        destination_directory = self.dir / 'votingkeys'
        if not destination_directory.is_dir():
            os.makedirs(destination_directory)

        matching_names = {}
        for filepath in Path('.').glob('private_key_tree*.dat'):
            match = re.match(r'private_key_tree(.*)\.dat', filepath.name)
            if match:
                matching_names[int(match.group(1))] = filepath

        for _, filepath in sorted(matching_names.items()):
            free_id = self.find_next_free_id()
            destination_filename = f'private_key_tree{free_id}.dat'
            destination_filepath = destination_directory / destination_filename
            shutil.move(filepath, destination_filepath)
            log.info(f'moving {filepath} -> {destination_filepath}')
Ejemplo n.º 35
0
    def _message_control(self) -> None:

        response: Dict = self._read_message()
        log.debug("HEARTBEAT")

        if "Alive" in response.keys():
            # Write it back
            self._state = State.HEARTBEAT
            self._send_message(response)
        elif "JobConfig" in response.keys():
            log.info("RECIEVED JOB CONFIG")
            self._state = State.READ_JOB
            self._message_stack.append(response)
        elif "Dataset" in response.keys():
            log.info("RECIEVED DATASET")
            self._state = State.PROCESSING
            self._message_stack.append(response)
Ejemplo n.º 36
0
    def __init__(self, id, sql, database, user, depends=[], options={}):
        super(OracleSQL, self).__init__(id, options)
        logger.info("Setting up Oracle component <%s>" % id)
        self.sql = sql
        self.depends = depends
        self.database = database
        self.user = user

        if options.get('cached'):
            verbose = 1 if options.get('verbose_caching') else 0

            self.mem = Memory(cachedir='/tmp/', verbose=verbose)
            self.run = self.mem.cache(self.run)

        pwd = "RqDERHMz3H2lxn0K7uqb"
        connstring = 'oracle+cx_oracle://{user}:{pwd}@{database}'.format(
            user=user, pwd=pwd, database=database)
        self.engine = sqlalchemy.create_engine(connstring)
    def check_train_passing(self):
        self.check_warning_lights_on()

        train_light_east = self.topics.get("*/track/0/train_light/0")
        train_light_west = self.topics.get("*/track/0/train_light/1")

        if train_light_east.payload == "0" and train_light_west.payload == "0":
            log.error(
                "Track sequence ERROR! Payload not matching expected state: Both train lights are red!"
            )
            return

        if train_light_east.payload == "1" and train_light_west.payload == "1":
            log.error(
                "Track sequence ERROR! Payload not matching expected state: Both train lights are green!"
            )
            return

        log.info("Track sequence OK. Train light is green")
def load_json_data(file_name: Union[Path, str],
                   base_path: Union[None, Path, str] = None):
    if base_path is None:
        base_path = Path(__file__).parent.joinpath('data')
    else:
        base_path = Path(base_path)
    base_path.mkdir(parents=True, exist_ok=True)

    try:
        path_to_read = base_path.joinpath(file_name)
        with path_to_read.open(mode='r') as fp:
            log.info(f'Reading file {path_to_read}')
            res = json.load(fp)
    except FileNotFoundError:
        log.warning(
            f'File to read not found, simulating empty one for {path_to_read}.'
        )
        return {}
    return res
Ejemplo n.º 39
0
def main():
    path = usage()
    yaml_obj: dict = read_and_validate_yaml_file(path)

    services: dict = yaml_obj["services"] if "services" in yaml_obj else {}
    topics: dict = yaml_obj["topics"] if "topics" in yaml_obj else {}
    actions: dict = yaml_obj["actions"] if "actions" in yaml_obj else {}

    for (name, value) in ask_for_components(services=services,
                                            topics=topics,
                                            actions=actions):
        is_service = (name, value) in services.items()
        is_topic = (name, value) in topics.items()
        is_action = (name, value) in actions.items()

        if is_service:
            destination_path = generate_service_template(
                source=value["source"],
                ros_type_str=value["type"],
                headers_file=value["headers_file"],
            )
            logging.info(f"{name}: created fuzzer for the service")
            logging.info(f"└── {destination_path}")

        elif is_topic:
            destination_path = generate_topic_template(
                source=value["source"],
                ros_type_str=value["type"],
                headers_file=value["headers_file"],
            )
            logging.info(f"{name}: created fuzzer for the topic")
            logging.info(f"└── {destination_path}")

        elif is_action:
            # TODO
            pass

    logging.info("Fuzzer(s) generated successfully")
    logging.warning("Please link the fuzzers to their CMakeLists.txt files,")
    logging.warning(
        "recompile the projects with instrumentalization and start the fuzzers."
    )
def create_dataset_page(pkg_info):
    '''Generate a single dataset page.'''
    template = env.get_template("dataset.html")
    name = pkg_info["name"]
    target = os.path.join("datasets/", name + ".html")

    context = {"title": pkg_info["title"],
               "description": pkg_info["description"],
               "sources": pkg_info.get("sources"),
               "readme": pkg_info["readme"],
               "datafiles": pkg_info["datafiles"],
               "last_updated": pkg_info["last_updated"],
               }
    context['welcome_text'] = markdown.markdown(codecs.open("content/welcome_text.md", 'r', 'utf-8').read(), output_format="html5", encoding="UTF-8")
    contents = template.render(**context)

    f = codecs.open(os.path.join(output_dir, target), 'w', 'utf-8')
    f.write(contents)
    f.close()
    log.info("Created %s." % target)
Ejemplo n.º 41
0
    def _process_job(self) -> None:
        log.info("PROCCESSING JOB")

        # Get message from message stack
        data: Dict = self._message_stack.pop()

        # Make sure the dataset ia actually there
        assert "Dataset" in data

        # Get training and prediction datasets
        train = decode_and_decompress(data["Dataset"]["train"])
        predict = decode_and_decompress(data["Dataset"]["predict"])

        train_pd = pd.read_csv(io.StringIO(train))
        predict_pd = pd.read_csv(io.StringIO(predict))

        # Prepare the datasets for callback
        train_pd, predict_pd, predict_rids = prepare_datasets(
            train_pd, predict_pd)

        # Check the user has specified a callback here to satisfy mypy
        assert self.callback is not None

        predictions = self.callback(train_pd, predict_pd, self.recv_job_config)

        log.debug("Predictions: %s", predictions.head())

        # Attatch record ids onto predictions
        predictions["record_id"] = predict_rids
        cols = predictions.columns.tolist()
        cols.insert(0, cols.pop())
        predictions = predictions[cols]

        assert len(predictions.index) == len(predict_pd.index)

        compressed_predictions: str = compress_and_encode(
            predictions.to_csv(index=False))

        message = {"Predictions": compressed_predictions}
        self._send_message(message)
        self._state = State.HEARTBEAT
Ejemplo n.º 42
0
    def __init__(self, URL):
        self.url = URL
        self.is_playing = False
        self.process = None

        log.debug("player: url => {}".format(self.url))

        self.process = Popen(
            ["ffplay", "-nodisp", "-nostats", "-loglevel", "error", self.url])

        log.debug("player: ffplay => PID {} initiated".format(
            self.process.pid))

        sleep(
            3)  # sleeping for 3 seconds wainting for ffplay to start properly

        if self.is_active():
            self.is_playing = True
            log.info("Radio started successfully")
        else:
            log.error("Radio could not be stared, may be a dead station")
            sys.exit(0)
def scrape(scrape_all, extend_file):
    entries = []
    if scrape_all:
        for leg in range(1, LAST_LEG + 1):
            for sess in range(1, 5):
                print leg, sess
                entries.extend(get_dates(leg, sess))
    else:
        entries.extend(get_dates(LAST_LEG, LAST_SESS))

    with open(OUTFILE, 'wb') as f:
        dw = csv.DictWriter(f, fieldnames=FIELDNAMES)
        dw.writeheader()
        for entry in entries:
            dw.writerow(entry)

    if extend_file:
        rows = open(extend_file, 'r').readlines()
        newrows = open(OUTFILE, 'r').readlines()
        rows_to_save = []
        # check if each new row already exists on the file, append otherwise
        for newrow in newrows:
            leg, sess, num = newrow.split(',')[:3]
            exists = False
            for row in rows:
                if row.startswith("%s,%s,%s" % (leg, sess, num)):
                    exists = True
                    break
            if not exists:
                log.info("New entry for %s,%s,%s, appending..." % (leg, sess, num))
                rows_to_save.append(newrow)

        if rows_to_save:
            with open(extend_file, 'a') as f:
                f.writelines(rows_to_save)
        os.remove(OUTFILE)

    browser.quit()
def main():
    if not os.path.exists(dest):
        os.mkdir(dest)
        log.info("Directory 'imgs/' created.")

    mp_json = json.loads(open(mp_file, 'r').read())
    for mp_id in mp_json:
        url = pic_url_formatter % mp_id
        filename = '%s.jpg' % os.path.join(dest, mp_id)
        if os.path.exists(filename):
            log.debug("File for id %s already exists, skipping." % mp_id)
            continue
        log.info('Retrieving picture with id: %s' % mp_id)
        try:
            urlretrieve(url, filename)
        except IOError:
            log.error('Socket error! :(')

    log.info('Done. Now do find ./imgs/ -size -722c -exec rm {} \;')
    log.info('to clean up things.')
def scrape(format, start=1, end=None, verbose=False, outfile='', separate=False, indent=1, processes=2):
    deprows = {}
    if processes > 1:
        pool = multiprocessing.Pool(processes=processes)
        max = end

        try:
            processed_items = (d for d in pool.map(process_dep, range(start, max), chunksize=4) if d)
        except KeyboardInterrupt:
            pool.terminate()
    else:
        processed_items = []
        for x in range(start, end):
            processed_items.append(process_dep(x))

    for item in processed_items:
        if not item:
            continue
        deprows[item['title']] = item

    if not separate:
        log.info("Saving to file %s..." % outfile)
        fp = codecs.open(outfile, 'w+', 'utf-8')
        fp.write(json.dumps(deprows, encoding='utf-8', ensure_ascii=False, indent=indent, sort_keys=True))
        fp.close()
        log.info("Done.")
    else:
        for n, item in deprows.items():
            # output dir
            d = "output"
            if not os.path.exists(d):
                os.mkdir(d)
            filename = item['doc_url'].replace(".doc&Inline=true", '').split('fich=')[-1] + ".json"
            fp = codecs.open(os.path.join(d, filename), 'w+', 'utf-8')
            fp.write(json.dumps(item, encoding='utf-8', ensure_ascii=False, indent=indent, sort_keys=True))
            fp.close()
        log.info("Done.")
def generate(offline, fetch_only):
    '''Main function that takes care of the whole process.'''
    # set up the output directory
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)
    # set up the dir for storing repositories
    if not os.path.exists(repo_dir):
        log.info("Directory %s doesn't exist, creating it." % repo_dir)
        os.mkdir(repo_dir)
    # create dir for dataset pages
    if not os.path.exists(os.path.join(output_dir, datasets_dir)):
        os.mkdir(os.path.join(output_dir, datasets_dir))
    # create download dir for zip and csv/json/* dataset files
    if not os.path.exists(os.path.join(output_dir, files_dir)):
        os.mkdir(os.path.join(output_dir, files_dir))
    # create static dirs
    # TODO: only update changed files -- right now we regenerate the whole static dir
    css_dir = os.path.join(output_dir, "css")
    js_dir = os.path.join(output_dir, "js")
    img_dir = os.path.join(output_dir, "img")
    fonts_dir = os.path.join(output_dir, "fonts")
    if os.path.exists(css_dir):
        shutil.rmtree(css_dir)
    shutil.copytree("static/css", css_dir)
    if os.path.exists(js_dir):
        shutil.rmtree(js_dir)
    shutil.copytree("static/js", js_dir)
    if os.path.exists(img_dir):
        shutil.rmtree(img_dir)
    shutil.copytree("static/img", img_dir)
    if os.path.exists(fonts_dir):
        shutil.rmtree(fonts_dir)
    shutil.copytree("static/fonts", fonts_dir)

    # read the config file to get the datasets we want to publish
    parser = SafeConfigParser()
    parser.read(config_file)
    packages = []

    if not parser.items('repositories'):
        log.critical('No repository data in settings.conf (does it even exist?). Cannot proceed :(')
        sys.exit()
    # go through each specified dataset
    for r in parser.items('repositories'):
        name, url = r
        dir_name = os.path.join(repo_dir, name)

        # do we have a local copy?
        if os.path.isdir(dir_name):
            if not offline:
                log.info("Checking for changes in repo '%s'..." % name)
                repo = git.Repo(dir_name)
                origin = repo.remotes.origin
                try:
                    origin.fetch()
                except AssertionError:
                    # usually this fails on the first run, try again
                    origin.fetch()
                except git.exc.GitCommandError:
                    log.critical("Fetch error connecting to repository, this dataset will be ignored and not listed in the index!")
                    continue
                # connection errors can also happen if fetch succeeds but pull fails
                try:
                    result = origin.pull()[0]
                except git.exc.GitCommandError:
                    log.critical("Pull error connecting to repository, this dataset will be ignored and not listed in the index!")
                    continue
                # we get specific flags for the results Git gave us
                # and we set the "updated" var in order to signal whether to
                # copy over the new files to the download dir or not
                if result.flags & result.HEAD_UPTODATE:
                    log.info("No new changes in repo '%s'." % name)
                    updated = False
                elif result.flags & result.ERROR:
                    log.error("Error pulling from repo '%s'!" % name)
                    updated = False
                else:
                    # TODO: figure out other git-python flags and return more
                    # informative log output
                    log.info("Repo changed, updating. (returned flags: %d)" % result.flags)
                    updated = True
            else:
                log.info("Offline mode, using cached version of package %s..." % name)
                # we set updated to True in order to re-generate everything
                # FIXME: See checksum of CSV files to make sure they're new before
                # marking updated as true
                updated = True
                repo = git.Repo(dir_name)
            if fetch_only:
                # if the --fetch-only flag was set, skip to the next dataset
                continue
        else:
            if offline:
                log.warn("Package %s specified in settings but no local cache, skipping..." % name)
                continue
            else:
                log.info("We don't have repo '%s', cloning..." % name)
                repo = git.Repo.clone_from(url, dir_name)
                updated = True

        # get datapackage metadata
        pkg_info = process_datapackage(name)
        # set last updated time based on last commit, comes in Unix timestamp format so we convert
        import datetime
        d = repo.head.commit.committed_date
        last_updated = datetime.datetime.fromtimestamp(int("1284101485")).strftime('%Y-%m-%d %H:%M:%S')
        log.debug(last_updated)
        pkg_info['last_updated'] = last_updated
        # add it to the packages list for index page generation after the loop ends
        packages.append(pkg_info)
        # re-generate the dataset HTML pages
        create_dataset_page(pkg_info)
        # if repo was updated, copy over CSV/JSON/* and ZIP files to the download dir
        # (we always generate them if offline)
        if updated or offline:
            create_dataset_page(pkg_info)
            datafiles = pkg_info['datafiles']
            zipf = zipfile.ZipFile(os.path.join(output_dir, files_dir, name + '.zip'), 'w')
            for d in datafiles:
                # copy CSV file
                target = os.path.join(output_dir, files_dir, os.path.basename(d['path']))
                shutil.copyfile(os.path.join(dir_name, d['path']), target)
                # generate JSON version
                csv2json(target, target.replace(".csv", ".json"))
                # make zip file
                zipf.write(os.path.join(dir_name, d['path']), d['basename'], compress_type=zipfile.ZIP_DEFLATED)
            try:
                zipf.write(pkg_info['readme_path'], 'README.md')
            except OSError:
                pass
            zipf.close()

    # generate the HTML index with the list of available packages
    create_index_page(packages)
    # generate the static JSON API of the data packages
    create_api(packages)
Ejemplo n.º 47
0
def generate(offline=False,
             fetch_only=False,
             output_dir=OUTPUT_DIR,
             theme_dir=os.path.join(THEMES_DIR, 'centraldedados'),
             repo_dir=REPO_DIR,
             config_file=CONFIG_FILE):
    '''Main function that takes care of the whole process.'''
    global env, packages
    # Read the config file
    parser = SafeConfigParser()
    parser.read(config_file)
    # Load the theme and set up Jinja
    theme_name = parser.get('ui', 'theme')
    theme_dir = os.path.join(THEMES_DIR, theme_name)
    template_dir = os.path.join(theme_dir, "templates")
    env = jinja2.Environment(loader=jinja2.FileSystemLoader([template_dir]))

    # Set up the output directory
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)
    # Set up the dir for storing repositories
    if not os.path.exists(repo_dir):
        log.debug("Directory %s doesn't exist, creating it." % repo_dir)
        os.mkdir(repo_dir)
    # Copy htaccess file
    shutil.copyfile(os.path.join(theme_dir, 'static/htaccess'), os.path.join(output_dir, ".htaccess"))

    # Static CSS files
    css_dir = os.path.join(output_dir, "css")
    if os.path.exists(css_dir):
        shutil.rmtree(css_dir)
    shutil.copytree(os.path.join(theme_dir, "static/css"), css_dir)
    # Static JavaScript files
    js_dir = os.path.join(output_dir, "js")
    if os.path.exists(js_dir):
        shutil.rmtree(js_dir)
    shutil.copytree(os.path.join(theme_dir, "static/js"), js_dir)
    # Theme images
    img_dir = os.path.join(output_dir, "img")
    if os.path.exists(img_dir):
        shutil.rmtree(img_dir)
    shutil.copytree(os.path.join(theme_dir, "static/img"), img_dir)
    # Fonts
    fonts_dir = os.path.join(output_dir, "fonts")
    if os.path.exists(fonts_dir):
        shutil.rmtree(fonts_dir)
    shutil.copytree(os.path.join(theme_dir, "static/fonts"), fonts_dir)

    if not parser.items('repositories'):
        log.critical('No repository data in settings.conf (does it even exist?). Cannot proceed :(')
        sys.exit()
    # go through each specified dataset
    for r in parser.items('repositories'):
        name, url = r
        dir_name = os.path.join(repo_dir, name)
        repo = None

        # do we have a local copy?
        if os.path.isdir(dir_name):
            if not os.path.isdir(os.path.join(dir_name, '.git')):
                if url.endswith(".json"):
                    log.info("%s: Data package, refreshing" % name)
                    updated = fetch_data_package(url, dir_name)
                else:
                    log.info('%s: Unsupported repo, skipping update' % name)
                    continue

            elif not offline:
                repo = git.Repo(dir_name)
                origin = repo.remotes.origin
                try:
                    origin.fetch()
                except AssertionError:
                    # usually this fails on the first run, try again
                    origin.fetch()
                except git.exc.GitCommandError:
                    log.critical("%s: Fetch error, this dataset will be left out." % name)
                    continue
                # see if we have updates
                if not local_and_remote_are_at_same_commit(repo, origin):
                    log.debug("%s: Repo has new commits, updating local copy." % name)
                    updated = True
                    # connection errors can also happen if fetch succeeds but pull fails
                    try:
                        result = origin.pull()[0]
                    except git.exc.GitCommandError:
                        log.critical("%s: Pull error, this dataset will be left out." % name)
                        continue
                    if result.flags & result.ERROR:
                        log.error("%s: Pull error, but going ahead." % name)
                        updated = False
                else:
                    log.info("%s: No changes." % name)
                    updated = False
            else:
                log.debug("%s: Offline mode, using cached version." % name)
                # we set updated to True in order to re-generate everything
                updated = True
                repo = git.Repo(dir_name)
            if fetch_only:
                # if the --fetch-only flag was set, skip to the next dataset
                continue
        else:
            if offline:
                log.warn("%s: No local cache, skipping." % name)
                continue
            else:
                if url.endswith(".git"):
                    # Handle GIT Repository URL
                    log.info("%s: New repo, cloning." % name)
                    try:
                        repo = git.Repo.clone_from(url, dir_name)
                        # For faster checkouts, one file at a time:
                        # repo = git.Repo.clone_from(url, dir_name, n=True, depth=1)
                        # repo.git.checkout("HEAD", "datapackage.json")
                    except git.exc.GitCommandError as inst:
                        log.warn("%s: skipping %s" % (inst, name))
                        continue
                    updated = True

                elif url.endswith(".json"):
                    # Handle Data Package URL
                    log.info("%s: New data package, fetching." % name)
                    updated = fetch_data_package(url, dir_name)
                else:
                    log.warn("Unsupported repository: %s" % url)

        # get datapackage metadata
        try:
            pkg_info = process_datapackage(name, repo_dir, url)
        except ParseException as inst:
            log.warn("%s: skipping %s" % (inst, name))
            continue

        # set last updated time based on last commit, comes in Unix timestamp format so we convert
        import datetime
        if repo is not None:
            d = repo.head.commit.committed_date
        else:
            d = int(time.mktime(time.localtime()))
        last_updated = datetime.datetime.fromtimestamp(int(d)).strftime('%Y-%m-%d %H:%M:%S')
        pkg_info['last_updated'] = last_updated
        # add it to the packages list for index page generation after the loop ends
        packages.append(pkg_info)
        # re-generate the dataset HTML pages
        create_dataset_page(pkg_info, output_dir)
        # if repo was updated, copy over CSV/JSON/* and ZIP files to the download dir
        # (we always generate them if offline)
        if updated or offline:
            create_dataset_page(pkg_info, output_dir)
            datafiles = pkg_info['datafiles']
            zipf = zipfile.ZipFile(os.path.join(output_dir, name + '.zip'), 'w')
            for d in datafiles:
                log.info("Copying %s" % d['path'])
                # copy file
                target = os.path.join(output_dir, os.path.basename(d['path']))
                shutil.copyfile(os.path.join(dir_name, d['path']), target)
                # generate JSON version of CSV
                if target.endswith('.csv'):
                    csv2json(target, target.replace(".csv", ".json"))
                # make zip file
                zipf.write(os.path.join(dir_name, d['path']), d['basename'], compress_type=zipfile.ZIP_DEFLATED)
            if 'readme_path' in pkg_info:
                try:
                    zipf.write(pkg_info['readme_path'], 'README.md')
                except OSError:
                    pass
            zipf.close()

    # HTML index with the list of available packages
    create_index_page(packages, output_dir)
    # Static JSON API of the data packages
    create_api(packages, output_dir, repo_dir)
    # Static pages
    create_static_pages(output_dir)
    # Contact page
    create_contact_page(output_dir, parser.get('credentials', 'contact_email'))

    log.info("All static content is ready inside '%s'." % OUTPUT_DIR)
Ejemplo n.º 48
0
def generate_site(fast_run):
    # flush output
    create_dir(OUTPUT_DIR)
    create_dir(os.path.join(OUTPUT_DIR, TRANSCRIPTS_PATH))
    create_dir(os.path.join(OUTPUT_DIR, MPS_PATH))
    create_dir(os.path.join(OUTPUT_DIR, MEDIA_PATH))

    # init Jinja
    env = jinja2.Environment(loader=jinja2.FileSystemLoader([TEMPLATE_DIR]),
                             extensions=['jinja2htmlcompress.SelectiveHTMLCompress'],
                             trim_blocks=True, lstrip_blocks=True)
    env.filters['date'] = format_date

    # generate pages
    log.info("Copying static files...")
    copy_tree(MEDIA_SOURCE_DIR, os.path.join(OUTPUT_DIR, MEDIA_PATH))

    log.info("Generating index...")
    render_template_into_file(env, 'index.html', 'index.html')

    log.info("Generating MP index...")
    mps = generate_mp_list()
    context = {"mps": mps}
    render_template_into_file(env, 'mp_list.html', "deputados/index.html", context)

    gov_data = get_gov_dataset()
    govpost_data = list(get_govpost_dataset())
    gov_mp_ids = [int(row[2]) for row in govpost_data if row[2]]
    date_data = get_date_dataset()

    log.info("Generating MP pages...")
    for mp in mps:
        id = int(mp['id'])
        mp['photo_url'] = PHOTO_URL_BASE + str(id) + ".jpg"
        # determine government posts
        if id in gov_mp_ids:
            mp['govposts'] = []
            govpost_rows = [row for row in govpost_data if row[2].strip() and int(row[2]) == id]
            for row in govpost_rows:
                gov_number = int(row[0])
                gov = None
                for r in gov_data:
                    if int(r[0]) == gov_number:
                        gov = {'number': r[0], 'start_date': dateparser.parse(r[1]), 'end_date': dateparser.parse(r[2])}
                        break
                if not gov:
                    log.critical("Gov not found!")
                mp['govposts'].append({
                    'post': row[3],
                    'start_date': dateparser.parse(row[4]),
                    'end_date': dateparser.parse(row[5]),
                    'gov': gov,
                })
        # parse dates
        for m in mp['mandates']:
            m['start_date'] = dateparser.parse(m['start_date'])
            m['end_date'] = dateparser.parse(m['end_date'])
            # nice effect: if no end date, set to today

        context = {'mp': mp, 'l': None}
        filename = os.path.join(MPS_PATH, mp['slug'], 'index.html')
        render_template_into_file(env, 'mp_detail.html', filename, context)

    log.info("Generating session index...")
    datedict = generate_datedict()
    all_years = [y for y in datedict]
    for year_number in datedict:
        year = datedict[year_number]
        context = {'year': year,
                   'year_number': year_number,
                   'all_years': all_years,
                   'datedict': datedict,
                   }
        target_dir = os.path.join(TRANSCRIPTS_PATH + "%s/" % year_number)
        filename = target_dir + "index.html"
        # print filename
        render_template_into_file(env, 'day_list.html', filename, context)

    # get most recent year and make the session index
    y = all_years[-1]
    year = datedict[y]
    context = {'year': year,
               'year_number': year_number,
               'all_years': all_years,
               'datedict': datedict,
               }
    render_template_into_file(env, 'day_list.html', TRANSCRIPTS_PATH + 'index.html', context)

    log.info("Generating HTML session pages...")
    if fast_run:
        COUNTER = 0
    date_data.reverse()
    for leg, sess, num, d, dpub, page_start, page_end in date_data:
        dateobj = dateparser.parse(d)
        context = {'session_date': dateobj,
                   'year_number': year_number,
                   'text': get_session_text(leg, sess, num),
                   'monthnames': MESES,
                   'pdf_url': 'xpto',
                   }
        target_dir = "%s%d/%02d/%02d" % (TRANSCRIPTS_PATH, dateobj.year, dateobj.month, dateobj.day)
        if not os.path.exists(os.path.join(OUTPUT_DIR, target_dir)):
            create_dir(os.path.join(OUTPUT_DIR, target_dir))
        filename = "%s/index.html" % target_dir
        render_template_into_file(env, 'day_detail.html', filename, context)
        log.debug(d)
        if fast_run:
            COUNTER += 1
            if COUNTER > 20:
                break
 def __init__(self):
     log.info('couchutils created')