Example #1
0
    def connect(self):
        """
        Connects to the Sybl service

            Parametes:
                None

            Returns:
                None

            Raises:
                AttributeError: raised when accesstoken or model id has not been loaded
                PermissionError: raised when the accesstoken cannot be authorized

        """
        # Check the user has specified a callback here
        if self.callback is None:
            raise AttributeError("Callback has not been registered")

        if not self._access_token or not self._model_id:
            log.error("Model has not been loaded")
            raise AttributeError(
                "Model access token and ID have not been loaded")

        self._connect_to_sock()
        log.info("Connected")

        if not self._is_authenticated():
            raise PermissionError(
                "Model access token has not been authenticated")

        self._state = State.HEARTBEAT
        self._begin_state_machine()
Example #2
0
def load_access_token(email, model_name) -> Tuple[str, str]:
    """
    Loads the access token from XDG_DATA_HOME using email and model name

        Parameters:
            email (str): The email the model is registered with
            model_name (str): The name of the model to be loaded

        Returns:
            Tuple[str, str]: Tuple of access token and model id
        Raise:
            ValueError: raised if the model name and email pair is not found
                in XDG_DATA_HOME
            FileNotFoundError: raised if XDG_DATA_HOME/sybl.json is not found
                meaning no access token has been stored
    """
    model_key: str = f"{email}.{model_name}"

    path = xdg_data_home() / "sybl.json"
    with path.open("r") as f:  # pylint: disable=invalid-name

        file_contents: str = f.read()
        models_dict: Dict = json.loads(file_contents)

        try:
            model_data = models_dict[model_key]

            return model_data["access_token"], model_data["model_id"]
        except KeyError as e:  # pylint: disable=invalid-name
            log.error("Model not registered")
            raise ValueError(
                f"Model {model_name} not registered to {email}") from e
def ask_for_components(services: dict, topics: dict, actions: dict):
    services_choices = component_options(services)
    topics_choices = component_options(topics)
    actions_choices = component_options(actions)

    all_choices = services_choices + topics_choices + actions_choices

    there_are_valid_choices = any("disabled" not in choice for choice in all_choices)
    if not there_are_valid_choices:
        logging.error(
            "There are no available components to fuzz!\n"
            "Check for TODOs in the fuzz.yaml file."
        )
        exit(-1)

    choices = [
        Separator("Topics"),
        *topics_choices,
        Separator("Services"),
        *services_choices,
        Separator("Actions"),
        *actions_choices,
    ]

    questions = [
        {
            "type": "checkbox",
            "message": "What do you want to fuzz?",
            "name": "to_fuzz_components",
            "choices": choices,
        }
    ]

    return prompt(questions)["to_fuzz_components"]
Example #4
0
    def parse(type_name: str, prepath: str = "") -> ROSType:
        # Base case
        if type_name in TypeParser.PRIMITIVES:
            return ROSType(type_name=type_name, is_primitive=True)

        # Recursive case
        full_topic_path = prepath + type_name
        try:
            ros2_process_result = subprocess.check_output(
                ["ros2", "interface", "show", full_topic_path],
                stderr=subprocess.STDOUT,
            )
        except:
            logging.error(
                f"Couldn't call `ros2 interface show {full_topic_path}`\n"
                "Have you sourced install/setup.bash?"
            )
            exit(-1)

        ros2_process_result = ros2_process_result.decode("utf-8")

        # If it is a *.srv file content (request --- response),
        # isolate and process only the request part
        if "---" in ros2_process_result:
            ros2_process_result = ros2_process_result.split("---")[0]
        ros2_process_result = ros2_process_result.splitlines()
        fields = [TypeParser.line2field(line) for line in ros2_process_result]
        fields = [field for field in fields if field is not None]
        return ROSType(type_name=type_name, fields=fields)
Example #5
0
    def _process_job_config(self) -> None:
        if self._message_stack:
            job_config = self._message_stack.pop()
        else:
            log.error("Empty Message Stack!\n RETURNING TO HEARTBEAT")
            self._state = State.HEARTBEAT
            return

        assert self.config is not None
        if "JobConfig" not in job_config:
            log.warning("Invalid Job Config Message")
            self._state = State.HEARTBEAT
            return

        accept_job: bool = self.config.compare(job_config["JobConfig"])

        if not accept_job:
            self._send_message({"ConfigResponse": {"accept": False}})
            log.info("REJECTING JOB")
        else:
            self._send_message({"ConfigResponse": {"accept": True}})
            self.recv_job_config = job_config["JobConfig"]
            log.info("ACCEPTING JOB")

        self._state = State.HEARTBEAT
Example #6
0
    def _read_message(self) -> Dict:
        size_bytes = self._sock.recv(4)

        if size_bytes == b"":
            log.error("Empty Message: Closing")
            self._sock.close()
            sys.exit(1)

        size = struct.unpack(">I", size_bytes)[0]
        log.debug("Message size: %d", size)

        if size > 4096:
            remaining_size = size
            buf: List[int] = []

            while remaining_size > 0:
                chunk = self._sock.recv(4096)
                buf.extend(chunk)

                remaining_size -= 4096

            return json.loads(bytes(buf))

        message: Dict = json.loads(self._sock.recv(size))
        # Error handle
        if "Server" in message.keys():
            # There has been an error in communication
            if "text" in message["Server"].keys():
                payload: Dict = json.loads(message["Server"]["text"])
                code = message["Server"]["code"]
                self._handle_server_error(code, payload)

        log.info(message)
        return message
Example #7
0
async def play_game(websocket, gametype: str):
    # Wait until the game begins
    global bot
    while True:
        message = await parse_game_message(websocket)

        if message is None:
            log.error("Failed to get a valid message from the socket.")
            continue

        if isinstance(message, BidRequest):
            args = pickle.loads(bytes.fromhex(message.arguments))
            value = None

            if gametype == "value":
                value = str(bot.get_bid_game_type_value(**args))
            else:
                value = str(bot.get_bid_game_type_collection(**args))

            await websocket.send(value)
        elif isinstance(message, (AuctionEnd, MultiAuctionEnd)):
            log.info(f"Auction winners: {message.winners}")
            return
        elif isinstance(message, ResetBot):
            log.info("Resetting bot")
            bot = Bot()
Example #8
0
def text_to_mp3(text, outfile, lang):
    url = "http://translate.google.com/translate_tts?ie=UTF-8&q=%s&tl=%s&prev=input" % (urllib.quote(saythis), lang)
    try:
        urllib.urlretrieve(url, outfile)
    except IOError:
        log.error('Got IOError -- are you online?')
        raise
Example #9
0
def table_to_list(table: Tag, section: str,
                  id_base: int) -> Tuple[List[Mod], int]:
    """Convert an HTML table into a list of Mods."""
    mods: List[Mod] = []
    modid = id_base
    row: Tag
    for row in table.find_all('tr'):
        try:
            tag_column_1: Tag = row.find('td', 'col1')
            tag_column_2: Tag = row.find('td', 'col2')
            tag_column_3: Tag = row.find('td', 'col3')
            if tag_column_1 and tag_column_2 and tag_column_3:
                webpages = get_webpages_from_tag(tag_column_2)
                requirements = get_requirements_from_tag(tag_column_3)

                mod = Mod(modid, get_name_from_tag(tag_column_2),
                          get_description_from_tag(tag_column_3),
                          get_notes_from_tag(tag_column_3), section,
                          get_media_from_tag(tag_column_1),
                          Oldrim(True, webpages, requirements),
                          Sse(False, Webpages(None, None, None, []), []), [],
                          get_deprecated_status_from_tag(tag_column_3), False)

                mods.append(mod)

                modid += 1
        except BadEntryException as exc:
            log.error(str(exc))

    return (mods, modid)
Example #10
0
    def display_access(self, message: Dict):
        """
        Parses an access token from a message and displays them to the user.

        Args:
            message: The message to parse

        Raises:
            KeyError: If the message doesn't contain the "token" and "id" keys

        """

        try:
            self.access_token = message["token"]
            self.model_id = message["id"]

            log.info("Successfully authenticated with the Sybl system")
            # log.info(f"\tACCESS TOKEN: {self.access_token}")
            # log.info(f"\tMODEL ID: {self.model_id}")

            log.info(
                "Please go to https://sybl.tech/models to unlock your new model"
            )
        except KeyError:
            log.error(f"Expected 'token' and 'id' keys but got data={message}")
        finally:

            self.stream.close()
Example #11
0
    def authenticate_challenge(self, message: Dict[Any, Any]):
        """
        Authenticates a challenge message and responds to the requestor.

        Args:
            challenge: The challenge message itself

        Raises:
            KeyError: If the user does not have their private key in their
            environment

        """

        challenge = message["challenge"]
        log.info("Authenticating a challenge from the server")

        try:
            signed_challenge = sign_challenge(base64.b64decode(challenge),
                                              self.private_key)

            message = {
                "ChallengeResponse": {
                    "email": self.email,
                    "model_name": self.model_name,
                    "response":
                    base64.b64encode(signed_challenge).decode("utf-8"),
                }
            }

            self._send_message(message)
        except KeyError:
            log.error("Failed to find the private key in the environment")
Example #12
0
def generate_cpp_file(fuzz_target: FuzzTarget, source_file: str,
                      template_name: str):
    __location__ = os.path.realpath(
        os.path.join(os.getcwd(), os.path.dirname(__file__)))
    plain_source_file_name = Path(source_file).name
    without_extension = os.path.splitext(plain_source_file_name)[0]

    # Read template
    env = Environment(loader=FileSystemLoader(__location__))
    template = env.get_template(template_name)
    logging.debug("Template read")

    # Populate template
    template_arguments = fuzz_target.get_mapping()
    template_arguments["FILE_NAME"] = plain_source_file_name
    fuzzing_path = os.path.join(os.path.dirname(__file__), "fuzzing_api.hpp")
    template_arguments["FUZZING_API"] = open(fuzzing_path).read()
    template = template.render(template_arguments)
    logging.debug("Template populated")

    # Write the populated file
    full_path = os.path.join(os.path.dirname(source_file),
                             without_extension + "_generated.cpp")
    try:
        with open(full_path, "w") as fd:
            fd.write(template)
            logging.debug(
                f"Template written with {fuzz_target.client_name} client")
    except Exception:
        logging.error("Couldn't write generated file", exc_info=True)

    return full_path
Example #13
0
    def upload_archive(self, content, description):
        try:
            archive = self.vault.upload_archive(archiveDescription=description,
                                                body=content)

            return archive
        except ClientError as e:
            log.error(e)
Example #14
0
def validate(fontpkg):
    """Checks if the font package is valid."""
    # check if JSON passes
    try:
        json.loads(os.path.join(fontpkg, "fontpackage.json"))
    except ValueError:
        log.error("Invalid JSON in fontpackage.json file.")
        raise
Example #15
0
def load_priv_key():

    try:
        priv_key = os.environ["PRIVATE_KEY"]
    except KeyError:
        log.error("PRIVATE_KEY not found in environment. Exiting...")
        sys.exit(1)

    return priv_key
 def get_db(view):
     db = getattr(g, '_database', None)
     if db is None:
         log.error("creating for the first time")
         couch = couchdb.Server(app.config['COUCHDB_SERVER'])
         couch.resource.credentials = (app.config['COUCHDB_USER'], app.config['COUCHDB_PASSWORD'])
         db = g._database = couch[app.config['COUCHDB_DATABASE']]
     else:
         log.info('nothing to do here, flies away')
     return db
Example #17
0
def parse_event_info(event, info):
    # event is a dict, info is a BeautifulSoup object
    if event['type'] == u"Baixa comissão para discussão":
        com_name = info.find('span', id=RE_COMNAME)
        if com_name:
            event['comission_name'] = com_name.text.strip()
    elif event['type'] == u"Publicação":
        url_nodes = info.findAll('a')
        urls = [{'url': node['href'], 'title': node.text.strip('[]')} for node in url_nodes]
        event['references'] = urls
    elif event['type'] in (u"Votação na generalidade", u"Votação Deliberação", u"Votação final global"):
        vote_info = info.find('span', id=RE_VOTEINFO)

        # funky parse loop for understanding how each party voted
        results = {'for': [], 'against': [], 'abstain': []}
        current_vote = None
        for c in vote_info.contents:
            if type(c) == bs4.element.Tag:
                if c.name == "br":
                    continue
                elif c.name == "i":
                    results[current_vote].append(c.text)
                else:
                    log.error("Unrecognized vote tag: %s" % c)
            elif type(c) == bs4.element.NavigableString:
                c = c.strip()
                if c == ",":
                    continue
                if c.startswith(u'Contra:'):
                    current_vote = "against"
                    if not c == u'Contra:':
                        # cases with one voter, in one line
                        # ex. "Abstenção: Isabel Oneto (PS)"
                        c = c.replace(u'Contra: ', '')
                        results[current_vote].append(c)
                elif c.startswith(u"A Favor:"):
                    current_vote = "for"
                    if not c == u'A Favor:':
                        c = c.replace(u'A Favor: ', '')
                        results[current_vote].append(c)
                elif c.startswith(u"Abstenção:"):
                    current_vote = "abstain"
                    if not c == u'Abstenção:':
                        c = c.replace(u'Abstenção: ', '')
                        results[current_vote].append(c)
                else:
                    log.error("Unrecognized vote string: %s" % c)

        event['vote_info'] = results
        pass
    else:
        if info.text.strip():
            event['raw_info'] = info.text.strip()
    return event
Example #18
0
def test_output():
    # All of these just need to output without errors.
    from zenlog import log
    log.debug("A quirky message only developers care about")
    log.info("Curious users might want to know this")
    log.warn("Something is wrong and any user should be informed")
    log.warning("Something is wrong and any user should be informed")
    log.error("Serious stuff, this is red for a reason")
    log.critical("OH NO everything is on fire")
    log.c("OH NO everything is on fire")
    log.crit("OH NO everything is on fire")
    def check_warning_lights_on(self):
        warning_light = self.topics.get("*/track/0/warning_light/0")

        if warning_light.payload == "1":
            log.info("Track sequence OK. Warning lights are on.")
            return True
        else:
            log.error(
                "Track Sequence ERROR! Payload not matching expected state: Warning lights are not on!"
            )
            return False
Example #20
0
    def _connect_to_sock(self):
        """
        Connects to the DCL for communications.
        """
        try:
            self._sock.connect(self._address)
        except ConnectionRefusedError:
            log.error(f"Could not connect to address: {self._address[0]}")
            sys.exit(1)

        log.info(f"Successfully connected to {self._address}")
Example #21
0
def test_output():
    # All of these just need to output without errors.
    from zenlog import log
    log.debug("A quirky message only developers care about")
    log.info("Curious users might want to know this")
    log.warn("Something is wrong and any user should be informed")
    log.warning("Something is wrong and any user should be informed")
    log.error("Serious stuff, this is red for a reason")
    log.critical("OH NO everything is on fire")
    log.c("OH NO everything is on fire")
    log.crit("OH NO everything is on fire")
Example #22
0
    def parse_type(type_name: str) -> ROSType:
        if "/" not in type_name:
            logging.error(
                f"The type name `{type_name}` does not contain any slash (/).\n"
                f'Please write the whole path (i.e. "tutorial_interfaces/srv/AddThreeInts")'
            )
            exit(-1)

        prepath, type_name = type_name.rsplit("/", 1)
        prepath += "/"

        return TypeParser.parse(type_name=type_name, prepath=prepath)
    def check_train_passed(self):
        train_light_east = self.topics.get("*/track/0/train_light/0")
        train_light_west = self.topics.get("*/track/0/train_light/1")

        if train_light_east.payload == "0" and train_light_west.payload == "0":
            log.info("Track sequence OK. Train lights are red.")
            return True
        else:
            log.error(
                "Track Sequence ERROR! Payload not matching expected state: Train lights are still green!"
            )
            return False
Example #24
0
async def parse(url, session, collected):
    try:
        data = await fetch_html(url, session)
        collected[url] = data
    except (aiohttp.ClientError,
            aiohttp.http_exceptions.HttpProcessingError) as exc:
        status = getattr(exc, 'status', None)
        message = getattr(exc, 'message', None)
        log.error(f'aiothtp exception for {url} [{status}]: {message}')
    except Exception as exc:  # pylint: disable=broad-except
        exception_attributes = getattr(exc, '__dict__', {})
        log.error(f'Non-aiohttp exception occured: {exception_attributes}')
Example #25
0
    def _update_all(self):
        default_args = self.get_default_args()
        for manga in self.db.get_all():  # type: Manga
            self.show_log() and log.info('Update %s', manga.url)
            _args = default_args.copy()
            data = json.loads(manga.data)
            data_args = data.get('args', {})
            del data_args['rewrite_exists_archives']
            del data_args['user_agent']
            del data_args['url']

            if not fs.is_dir(
                    fs.path_join(data_args['destination'], data_args['name'])):
                self.show_log() and log.warn('Destination not exists. Skip')
                continue

            _args.update({  # re-init args
                'url': manga.url,
                **data_args,
            })
            provider = self._get_provider(_args)  # type: Provider
            if provider:
                provider.before_provider(_args)
                provider.http.cookies = data.get('cookies')
                provider.http.ua = data.get('browser')
                provider.run(_args)
                provider.after_provider()
                provider.update_db()
                self.global_info.add_info(info)
            else:
                self.show_log() and log.error('Provider not exists')
Example #26
0
    def get_file_stats(self):
        file_stats = {}
        try:
            stats = self.file_path.stat()
            file_stats['st_mode'] = stats.st_mode
            file_stats['st_ino'] = stats.st_ino
            file_stats['st_dev'] = stats.st_dev
            file_stats['st_nlink'] = stats.st_nlink
            file_stats['st_uid'] = stats.st_uid
            file_stats['st_gid'] = stats.st_gid
            file_stats['st_size'] = stats.st_size
            file_stats['st_atime'] = stats.st_atime
            file_stats['st_mtime'] = stats.st_mtime
        except Exception as e:
            log.error(e)

        return file_stats
Example #27
0
    def play_by_station_uuid(self, _uuid):
        print(_uuid)
        # Pyradios by default don't let you search by uuid
        # a trick is to call click_counter(uuid) directly to get the statioon info
        is_ok = "false"
        try:
            self.target_station = self.API.click_counter(_uuid)
            log.debug(self.target_station)
            is_ok = self.target_station["ok"]
        except Exception as e:
            log.error("Could not find a station by the UUID")
            sys.exit(0)

        self.API.search(name=self.target_station["name"], name_exact=True)
        # againg register a valid click
        if is_ok == "false":
            res = self.API.click_counter(self.target_station["stationuuid"])
            log.debug(res)
    def check_train_passing(self):
        self.check_warning_lights_on()

        train_light_east = self.topics.get("*/track/0/train_light/0")
        train_light_west = self.topics.get("*/track/0/train_light/1")

        if train_light_east.payload == "0" and train_light_west.payload == "0":
            log.error(
                "Track sequence ERROR! Payload not matching expected state: Both train lights are red!"
            )
            return

        if train_light_east.payload == "1" and train_light_west.payload == "1":
            log.error(
                "Track sequence ERROR! Payload not matching expected state: Both train lights are green!"
            )
            return

        log.info("Track sequence OK. Train light is green")
Example #29
0
    def verify(self):
        """
        Creates a new model for the user and authenticates it with the
        challenge response method.

        Raises:
            IndexError: If an invalid message is encountered

        """

        # Connect to the socket
        self._connect()

        message = {
            "NewModel": {
                "email": self.email,
                "password": self.password,
                "model_name": self.model_name,
            }
        }

        self._send_message(message)

        while True:
            # Read some data
            data = self._read_message()
            log.debug(f"Received data={data}")

            try:
                variant, data = parse_message(data)

                if variant == "Challenge":
                    self.authenticate_challenge(data)
                elif variant == "AccessToken":
                    self.display_access(data)
                    self.save_access_tokens()
                    break
                else:
                    log.warn(
                        f"Encountered an unexpected message variant={variant}")

            except IndexError:
                log.error(f"Failed to parse a message from data={data}")
Example #30
0
    def _is_authenticated(self) -> bool:

        response = {
            "AccessToken": {
                "id": self._model_id,
                "token": self._access_token,
            }
        }

        self._send_message(response)
        message = self._read_message()
        try:
            if message["message"] == "Authentication successful":
                return True
        except KeyError:
            pass

        log.error("Authentication not successful")
        return False
def main():
    if not os.path.exists(dest):
        os.mkdir(dest)
        log.info("Directory 'imgs/' created.")

    mp_json = json.loads(open(mp_file, 'r').read())
    for mp_id in mp_json:
        url = pic_url_formatter % mp_id
        filename = '%s.jpg' % os.path.join(dest, mp_id)
        if os.path.exists(filename):
            log.debug("File for id %s already exists, skipping." % mp_id)
            continue
        log.info('Retrieving picture with id: %s' % mp_id)
        try:
            urlretrieve(url, filename)
        except IOError:
            log.error('Socket error! :(')

    log.info('Done. Now do find ./imgs/ -size -722c -exec rm {} \;')
    log.info('to clean up things.')
def main():
    if not os.path.exists(dest):
        os.mkdir(dest)
        log.info("Directory 'imgs/' created.")

    mp_json = json.loads(open(mp_file, 'r').read())
    for mp_id in mp_json:
        url = pic_url_formatter % mp_id
        filename = '%s.jpg' % os.path.join(dest, mp_id)
        if os.path.exists(filename):
            log.debug("File for id %s already exists, skipping." % mp_id)
            continue
        log.info('Retrieving picture with id: %s' % mp_id)
        try:
            urlretrieve(url, filename)
        except IOError:
            log.error('Socket error! :(')

    log.info('Done. Now do find ./imgs/ -size -722c -exec rm {} \;')
    log.info('to clean up things.')
Example #33
0
    def station_validator(self):
        if len(self.response) == 0:
            log.error("No stations found by the name")
            sys.exit(0)
        if len(self.response) > 1:
            log.info("Multiple stations found by the name")
            stations_name = ""
            for station in self.response:
                # stations_name = stations_name + "," + station["name"]
                log.info("name: {} | id: {} | country: {}".format(
                    station["name"], station["stationuuid"],
                    station["country"]))

            log.info(stations_name)
            sys.exit(0)
        if len(self.response) == 1:
            log.info("Station found: {}".format(self.response[0]["name"]))
            log.debug(self.response[0])
            self.target_station = self.response[0]
            self.API.click_counter(self.target_station["stationuuid"])
def parse_file(f):
    bank = {}

    html = open(f, 'r').read()
    soup = BeautifulSoup(html)
    items_headings = soup.findAll("th")
    items = soup.findAll("td")
    item_pairs = zip(items_headings, items)

    # encontrar duplicados
    country_needs_fixing = False
    urls_need_fixing = False
    dups = set([x.text for x in items_headings if items_headings.count(x) > 1])
    if dups:
        if "Country" in dups:
            country_needs_fixing = True
        elif "Related decisions" in dups and "Published accounts" in dups:
            pass
        else:
            log.error("Campo duplicado:")
            for d in dups:
                print '        ' + d

    for name, value in item_pairs:
        name = name.text.lower().replace(' ', '_').replace('-', '_')
        if country_needs_fixing and name == "country":
            country_needs_fixing = False
            name = "city"
        if value.find("a"):
            bank[name] = value.find('a')['href']
        else:
            bank[name] = value.text

    bank_filename = f.split('/')[-1]
    bank_id = bank_filename.split('.')[0]
    bank_url = 'https://www.bportugal.pt/en-US/Supervisao/Pages/CreditInstitution.aspx?IcID=' + bank_id
    bank['id'] = bank_id
    bank['url'] = bank_url

    return bank
rows.append(headerline)

for item in jsondata:
    values = []
    for header in headers:
        if not item.has_key(header):
            if header == "institution_name":
                # abre o banks.json
                name = ""
                for f_item in fatherjsondata:
                    if f_item['url'] == item['url'].replace("https", "http"):
                        name = f_item['name'].replace('"', "'")
                        values.append('"%s"' % name)
                        break
                if not name:
                    log.error("Não encontrei o nome ;(")
                continue
            elif header == "institution_type":
                # abre o banks.json
                bank_type = ""
                for f_item in fatherjsondata:
                    if f_item['url'] == item['url'].replace("https", "http"):
                        bank_type = f_item['type'].replace('"', "'")
                        values.append('"%s"' % bank_type)
                        break
                if not bank_type:
                    log.error("Não encontrei o tipo ;(")
                continue
            else:
                value = ""
                values.append(value)
def parse_event_info(event, info):
    # event is a dict, info is a BeautifulSoup object
    if event['type'] == u"Baixa comissão para discussão":
        com_name = info.find('span', id=RE_COMNAME)
        if com_name:
            event['comission_name'] = com_name.text.strip()
    elif event['type'] == u"Publicação":
        url_nodes = info.findAll('a')
        urls = [{'url': node['href'], 'title': node.text.strip('[]')} for node in url_nodes]
        event['references'] = urls
    elif event['type'] in (u"Votação na generalidade", u"Votação Deliberação", u"Votação final global"):
        vote_info = info.find('span', id=RE_VOTEINFO)

        # funky parse loop for understanding how each party voted
        # I really have to refactor this, please pester me if you need it -- rlafuente
        results = {'for': [], 'against': [], 'abstain': []}
        current_vote = None
        for c in vote_info.contents:
            if type(c) == bs4.element.Tag:
                if c.name == "br":
                    continue
                elif c.name == "i":
                    results[current_vote].append(c.text)
                else:
                    log.error("Unrecognized vote tag: %s" % c)
            elif type(c) == bs4.element.NavigableString:
                c = c.strip()
                if c == ",":
                    continue
                if c.startswith(u'Contra:'):
                    current_vote = "against"
                    if not c == u'Contra:':
                        # cases with voters in one line (individual MPs)
                        # ex. "Abstenção: Isabel Oneto (PS)"
                        c = c.replace(u'Contra: ', '').split(', ')
                        for mp in c:
                            if mp:
                                results[current_vote].append(mp.strip(','))
                elif c.startswith(u"A Favor:"):
                    current_vote = "for"
                    if not c == u'A Favor:':
                        c = c.replace(u'A Favor: ', '').split(', ')
                        for mp in c:
                            if mp:
                                results[current_vote].append(mp.strip(','))
                elif c.startswith(u"Abstenção:"):
                    current_vote = "abstain"
                    if not c == u'Abstenção:':
                        c = c.replace(u'Abstenção: ', '').split(', ')
                        for mp in c:
                            if mp:
                                results[current_vote].append(mp.strip(','))
                else:
                    log.warn("Orphan vote string: %s -- saving as voter" % c)
                    c = c.split(', ')
                    for mp in c:
                        if mp:
                            results[current_vote].append(mp)

        event['vote_info'] = results
        pass
    else:
        if info.text.strip():
            event['raw_info'] = info.text.strip()
    return event
Example #37
0
def format_date(value, format='medium'):
    if type(value) not in (datetime.date, datetime.datetime):
        log.error(type(value))
        value = dateparser.parse(value)
    return babel_format_date(value, format, locale="pt_PT")
bank_list = []

pagecount = 1
while True:
    log.debug("Novo loop, pagecount eh " + str(pagecount))
    wait = UI.WebDriverWait(driver, 10)
    
    links = driver.find_elements_by_css_selector(".AreaResultados td a")
    log.debug("Encontrei %d links..." % len(links))
    if len(links) == 0:
        from time import sleep
        sleep(3)
        links = driver.find_elements_by_css_selector(".AreaResultados td a")
        if len(links) == 0:
            log.error("Não há links, snif")
        else:
            log.debug("Iupi, %d links!" % len(links))
    rows = driver.find_elements_by_css_selector(".AreaResultados tbody tr")
    
    # skip first row
    first = True
    for row in rows:
        if first:
            first = False
            continue
        bank_name = row.find_element_by_css_selector("a")
        bank_type = row.find_element_by_css_selector("span")
        bank = { "name": bank_name.text, 
                 "url": bank_name.get_attribute("href"),
                 "type": bank_type.text
def generate(offline, fetch_only):
    '''Main function that takes care of the whole process.'''
    # set up the output directory
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)
    # set up the dir for storing repositories
    if not os.path.exists(repo_dir):
        log.info("Directory %s doesn't exist, creating it." % repo_dir)
        os.mkdir(repo_dir)
    # create dir for dataset pages
    if not os.path.exists(os.path.join(output_dir, datasets_dir)):
        os.mkdir(os.path.join(output_dir, datasets_dir))
    # create download dir for zip and csv/json/* dataset files
    if not os.path.exists(os.path.join(output_dir, files_dir)):
        os.mkdir(os.path.join(output_dir, files_dir))
    # create static dirs
    # TODO: only update changed files -- right now we regenerate the whole static dir
    css_dir = os.path.join(output_dir, "css")
    js_dir = os.path.join(output_dir, "js")
    img_dir = os.path.join(output_dir, "img")
    fonts_dir = os.path.join(output_dir, "fonts")
    if os.path.exists(css_dir):
        shutil.rmtree(css_dir)
    shutil.copytree("static/css", css_dir)
    if os.path.exists(js_dir):
        shutil.rmtree(js_dir)
    shutil.copytree("static/js", js_dir)
    if os.path.exists(img_dir):
        shutil.rmtree(img_dir)
    shutil.copytree("static/img", img_dir)
    if os.path.exists(fonts_dir):
        shutil.rmtree(fonts_dir)
    shutil.copytree("static/fonts", fonts_dir)

    # read the config file to get the datasets we want to publish
    parser = SafeConfigParser()
    parser.read(config_file)
    packages = []

    if not parser.items('repositories'):
        log.critical('No repository data in settings.conf (does it even exist?). Cannot proceed :(')
        sys.exit()
    # go through each specified dataset
    for r in parser.items('repositories'):
        name, url = r
        dir_name = os.path.join(repo_dir, name)

        # do we have a local copy?
        if os.path.isdir(dir_name):
            if not offline:
                log.info("Checking for changes in repo '%s'..." % name)
                repo = git.Repo(dir_name)
                origin = repo.remotes.origin
                try:
                    origin.fetch()
                except AssertionError:
                    # usually this fails on the first run, try again
                    origin.fetch()
                except git.exc.GitCommandError:
                    log.critical("Fetch error connecting to repository, this dataset will be ignored and not listed in the index!")
                    continue
                # connection errors can also happen if fetch succeeds but pull fails
                try:
                    result = origin.pull()[0]
                except git.exc.GitCommandError:
                    log.critical("Pull error connecting to repository, this dataset will be ignored and not listed in the index!")
                    continue
                # we get specific flags for the results Git gave us
                # and we set the "updated" var in order to signal whether to
                # copy over the new files to the download dir or not
                if result.flags & result.HEAD_UPTODATE:
                    log.info("No new changes in repo '%s'." % name)
                    updated = False
                elif result.flags & result.ERROR:
                    log.error("Error pulling from repo '%s'!" % name)
                    updated = False
                else:
                    # TODO: figure out other git-python flags and return more
                    # informative log output
                    log.info("Repo changed, updating. (returned flags: %d)" % result.flags)
                    updated = True
            else:
                log.info("Offline mode, using cached version of package %s..." % name)
                # we set updated to True in order to re-generate everything
                # FIXME: See checksum of CSV files to make sure they're new before
                # marking updated as true
                updated = True
                repo = git.Repo(dir_name)
            if fetch_only:
                # if the --fetch-only flag was set, skip to the next dataset
                continue
        else:
            if offline:
                log.warn("Package %s specified in settings but no local cache, skipping..." % name)
                continue
            else:
                log.info("We don't have repo '%s', cloning..." % name)
                repo = git.Repo.clone_from(url, dir_name)
                updated = True

        # get datapackage metadata
        pkg_info = process_datapackage(name)
        # set last updated time based on last commit, comes in Unix timestamp format so we convert
        import datetime
        d = repo.head.commit.committed_date
        last_updated = datetime.datetime.fromtimestamp(int("1284101485")).strftime('%Y-%m-%d %H:%M:%S')
        log.debug(last_updated)
        pkg_info['last_updated'] = last_updated
        # add it to the packages list for index page generation after the loop ends
        packages.append(pkg_info)
        # re-generate the dataset HTML pages
        create_dataset_page(pkg_info)
        # if repo was updated, copy over CSV/JSON/* and ZIP files to the download dir
        # (we always generate them if offline)
        if updated or offline:
            create_dataset_page(pkg_info)
            datafiles = pkg_info['datafiles']
            zipf = zipfile.ZipFile(os.path.join(output_dir, files_dir, name + '.zip'), 'w')
            for d in datafiles:
                # copy CSV file
                target = os.path.join(output_dir, files_dir, os.path.basename(d['path']))
                shutil.copyfile(os.path.join(dir_name, d['path']), target)
                # generate JSON version
                csv2json(target, target.replace(".csv", ".json"))
                # make zip file
                zipf.write(os.path.join(dir_name, d['path']), d['basename'], compress_type=zipfile.ZIP_DEFLATED)
            try:
                zipf.write(pkg_info['readme_path'], 'README.md')
            except OSError:
                pass
            zipf.close()

    # generate the HTML index with the list of available packages
    create_index_page(packages)
    # generate the static JSON API of the data packages
    create_api(packages)
def generate(offline=False,
             fetch_only=False,
             output_dir=OUTPUT_DIR,
             theme_dir=os.path.join(THEMES_DIR, 'centraldedados'),
             repo_dir=REPO_DIR,
             config_file=CONFIG_FILE):
    '''Main function that takes care of the whole process.'''
    global env, packages
    # Read the config file
    parser = SafeConfigParser()
    parser.read(config_file)
    # Load the theme and set up Jinja
    theme_name = parser.get('ui', 'theme')
    theme_dir = os.path.join(THEMES_DIR, theme_name)
    template_dir = os.path.join(theme_dir, "templates")
    env = jinja2.Environment(loader=jinja2.FileSystemLoader([template_dir]))

    # Set up the output directory
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)
    # Set up the dir for storing repositories
    if not os.path.exists(repo_dir):
        log.debug("Directory %s doesn't exist, creating it." % repo_dir)
        os.mkdir(repo_dir)
    # Copy htaccess file
    shutil.copyfile(os.path.join(theme_dir, 'static/htaccess'), os.path.join(output_dir, ".htaccess"))

    # Static CSS files
    css_dir = os.path.join(output_dir, "css")
    if os.path.exists(css_dir):
        shutil.rmtree(css_dir)
    shutil.copytree(os.path.join(theme_dir, "static/css"), css_dir)
    # Static JavaScript files
    js_dir = os.path.join(output_dir, "js")
    if os.path.exists(js_dir):
        shutil.rmtree(js_dir)
    shutil.copytree(os.path.join(theme_dir, "static/js"), js_dir)
    # Theme images
    img_dir = os.path.join(output_dir, "img")
    if os.path.exists(img_dir):
        shutil.rmtree(img_dir)
    shutil.copytree(os.path.join(theme_dir, "static/img"), img_dir)
    # Fonts
    fonts_dir = os.path.join(output_dir, "fonts")
    if os.path.exists(fonts_dir):
        shutil.rmtree(fonts_dir)
    shutil.copytree(os.path.join(theme_dir, "static/fonts"), fonts_dir)

    if not parser.items('repositories'):
        log.critical('No repository data in settings.conf (does it even exist?). Cannot proceed :(')
        sys.exit()
    # go through each specified dataset
    for r in parser.items('repositories'):
        name, url = r
        dir_name = os.path.join(repo_dir, name)
        repo = None

        # do we have a local copy?
        if os.path.isdir(dir_name):
            if not os.path.isdir(os.path.join(dir_name, '.git')):
                if url.endswith(".json"):
                    log.info("%s: Data package, refreshing" % name)
                    updated = fetch_data_package(url, dir_name)
                else:
                    log.info('%s: Unsupported repo, skipping update' % name)
                    continue

            elif not offline:
                repo = git.Repo(dir_name)
                origin = repo.remotes.origin
                try:
                    origin.fetch()
                except AssertionError:
                    # usually this fails on the first run, try again
                    origin.fetch()
                except git.exc.GitCommandError:
                    log.critical("%s: Fetch error, this dataset will be left out." % name)
                    continue
                # see if we have updates
                if not local_and_remote_are_at_same_commit(repo, origin):
                    log.debug("%s: Repo has new commits, updating local copy." % name)
                    updated = True
                    # connection errors can also happen if fetch succeeds but pull fails
                    try:
                        result = origin.pull()[0]
                    except git.exc.GitCommandError:
                        log.critical("%s: Pull error, this dataset will be left out." % name)
                        continue
                    if result.flags & result.ERROR:
                        log.error("%s: Pull error, but going ahead." % name)
                        updated = False
                else:
                    log.info("%s: No changes." % name)
                    updated = False
            else:
                log.debug("%s: Offline mode, using cached version." % name)
                # we set updated to True in order to re-generate everything
                updated = True
                repo = git.Repo(dir_name)
            if fetch_only:
                # if the --fetch-only flag was set, skip to the next dataset
                continue
        else:
            if offline:
                log.warn("%s: No local cache, skipping." % name)
                continue
            else:
                if url.endswith(".git"):
                    # Handle GIT Repository URL
                    log.info("%s: New repo, cloning." % name)
                    try:
                        repo = git.Repo.clone_from(url, dir_name)
                        # For faster checkouts, one file at a time:
                        # repo = git.Repo.clone_from(url, dir_name, n=True, depth=1)
                        # repo.git.checkout("HEAD", "datapackage.json")
                    except git.exc.GitCommandError as inst:
                        log.warn("%s: skipping %s" % (inst, name))
                        continue
                    updated = True

                elif url.endswith(".json"):
                    # Handle Data Package URL
                    log.info("%s: New data package, fetching." % name)
                    updated = fetch_data_package(url, dir_name)
                else:
                    log.warn("Unsupported repository: %s" % url)

        # get datapackage metadata
        try:
            pkg_info = process_datapackage(name, repo_dir, url)
        except ParseException as inst:
            log.warn("%s: skipping %s" % (inst, name))
            continue

        # set last updated time based on last commit, comes in Unix timestamp format so we convert
        import datetime
        if repo is not None:
            d = repo.head.commit.committed_date
        else:
            d = int(time.mktime(time.localtime()))
        last_updated = datetime.datetime.fromtimestamp(int(d)).strftime('%Y-%m-%d %H:%M:%S')
        pkg_info['last_updated'] = last_updated
        # add it to the packages list for index page generation after the loop ends
        packages.append(pkg_info)
        # re-generate the dataset HTML pages
        create_dataset_page(pkg_info, output_dir)
        # if repo was updated, copy over CSV/JSON/* and ZIP files to the download dir
        # (we always generate them if offline)
        if updated or offline:
            create_dataset_page(pkg_info, output_dir)
            datafiles = pkg_info['datafiles']
            zipf = zipfile.ZipFile(os.path.join(output_dir, name + '.zip'), 'w')
            for d in datafiles:
                log.info("Copying %s" % d['path'])
                # copy file
                target = os.path.join(output_dir, os.path.basename(d['path']))
                shutil.copyfile(os.path.join(dir_name, d['path']), target)
                # generate JSON version of CSV
                if target.endswith('.csv'):
                    csv2json(target, target.replace(".csv", ".json"))
                # make zip file
                zipf.write(os.path.join(dir_name, d['path']), d['basename'], compress_type=zipfile.ZIP_DEFLATED)
            if 'readme_path' in pkg_info:
                try:
                    zipf.write(pkg_info['readme_path'], 'README.md')
                except OSError:
                    pass
            zipf.close()

    # HTML index with the list of available packages
    create_index_page(packages, output_dir)
    # Static JSON API of the data packages
    create_api(packages, output_dir, repo_dir)
    # Static pages
    create_static_pages(output_dir)
    # Contact page
    create_contact_page(output_dir, parser.get('credentials', 'contact_email'))

    log.info("All static content is ready inside '%s'." % OUTPUT_DIR)