예제 #1
0
    def _parse_properties(self, htmltree):
        # Populate properties from selectors
        _properties = {}
        config_props = self.config['properties']
        # Search for each property option
        for property in self._selector_properties:
            if property not in config_props:
                logging.info("No config found for {}".format(property))
                continue

            logging.info('Working on {}'.format(property))

            args = {
                'selector': config_props[property],
            }
            if property in self._dates:
                args['is_date'] = True
            selector = Selector(**args)

            logging.info("Using selector \"{}\" with selector type {}".format(
                selector.selector,
                selector.SelectorType.__class__.__name__,
            ))

            result_text = selector.result(htmltree)
            logging.info('Found prop value {}'.format(result_text))

            _properties[property] = result_text

        return _properties
예제 #2
0
    def _parse_entries(self, htmltree):
        _entries = []
        logging.info("Working on entries")
        config_entries = self.config['entries']

        entries_selector = Selector(config_entries, multiple=True)

        logging.info("Using selector \"{}\" with selector type {}".format(
            entries_selector.selector,
            entries_selector.SelectorType.__class__.__name__,
        ))

        result_entries = entries_selector.result(htmltree)
        logging.info('Got {} entries'.format(len(result_entries)))

        for entry in result_entries:
            logging.info('Working on entry {}'.format(entry))
            tmp_entry = {}

            for property in self._entry_selector_properties:
                if property not in config_entries:
                    logging.info("No config found for {}".format(property))
                    continue
                logging.info('Working on prop {}'.format(property))

                args = {
                    'selector': config_entries[property],
                }
                if property in self._dates:
                    args['is_date'] = True
                selector = Selector(**args)

                logging.info(
                    "Using selector \"{}\" with selector type {}".format(
                        selector.selector,
                        selector.SelectorType.__class__.__name__,
                    ))

                tmp_entry[property] = selector.result(entry)

            _entries.append(tmp_entry)
        return _entries