Exemple #1
0
    def _load_input_calendars(self) -> List[tuple]:
        self.logger.info("Loading input calendars...")

        query = '''
                    SELECT id, url, html_file_path 
                    FROM calendar 
                    WHERE 1 == 1
                '''

        if self.args.domain:
            website_base = utils.get_base_by_domain(self.args.domain)
            if website_base is None:
                self.logger.critical("Unknown domain '{}'!".format(
                    self.args.domain))
                sys.exit()
            calendar_url = website_base.get('url', None)
            if calendar_url is None:
                self.logger.critical(
                    "Specified domain '{}' is no longer active!".format(
                        self.args.domain))
                sys.exit()
            query += ''' AND url == "{}"'''.format(calendar_url)

        if not self.args.parse_all:
            query += ''' AND is_parsed == 0'''

        cursor = self.connection.execute(query)
        return cursor.fetchall()
Exemple #2
0
    def _load_input_events(self) -> List[tuple]:
        self.logger.info("Loading input events...")

        query = '''
                    SELECT eu.id, eu.url,
                           c.url
                    FROM event_url eu
                         LEFT OUTER JOIN event_html eh ON eu.id = eh.event_url_id
                         INNER JOIN calendar c ON eu.calendar_id = c.id
                    WHERE 1 == 1
                '''

        if self.args.domain:
            website_base = utils.get_base_by_domain(self.args.domain)
            if website_base is None:
                self.logger.critical("Unknown domain '{}'!".format(
                    self.args.domain))
                sys.exit()
            calendar_url = website_base.get('url', None)
            if calendar_url is None:
                self.logger.critical(
                    "Specified domain '{}' is no longer active!".format(
                        self.args.domain))
                sys.exit()
            query += ''' AND c.url = "{}"'''.format(calendar_url)

        if self.args.event_url:
            query += ''' AND eu.url = "{}"'''.format(self.args.event_url)

        if not self.args.redownload_file:
            query += ''' AND eh.event_url_id IS NULL'''

        cursor = self.connection.execute(query)
        return cursor.fetchall()
Exemple #3
0
    def _load_input_calendars(self) -> List[dict]:
        self.logger.info("Loading input calendars...")

        if self.args.domain:
            website_base = utils.get_base_by_domain(self.args.domain)
            if website_base is None:
                self.logger.critical("Unknown domain '{}'!".format(self.args.domain))
                sys.exit()
            if website_base.get('url', None) is None:
                self.logger.critical("Specified domain '{}' is no longer active!".format(self.args.domain))
                sys.exit()
            return [website_base]

        base_list = utils.get_active_base()
        return base_list
Exemple #4
0
    def _load_input_events(self) -> List[tuple]:
        self.logger.info("Loading input events...")

        query = '''
                    SELECT ed.id, ed.datetime, 
                           eu.url, 
                           c.url
                    FROM event_data ed
                         INNER JOIN event_html eh ON ed.event_html_id = eh.id
                         INNER JOIN event_url eu ON eh.event_url_id = eu.id
                         INNER JOIN calendar c ON eu.calendar_id = c.id
                    WHERE 1 == 1
                '''

        if not self.args.process_all:
            query += ''' AND ed.id NOT IN (SELECT DISTINCT event_data_id FROM event_data_datetime)'''

        if self.args.domain:
            website_base = utils.get_base_by_domain(self.args.domain)
            if website_base is None:
                self.logger.critical("Unknown domain '{}'!".format(
                    self.args.domain))
                sys.exit()
            calendar_url = website_base.get('url', None)
            if calendar_url is None:
                self.logger.critical(
                    "Specified domain '{}' is no longer active!".format(
                        self.args.domain))
                sys.exit()
            query += ''' AND c.url = "{}"'''.format(calendar_url)

        if self.args.event_url:
            query += ''' AND eu.url = "{}"'''.format(self.args.event_url)

        if self.args.events_ids:
            query += ''' AND ed.id IN ({})'''.format(",".join(
                ["{}".format(event_id) for event_id in self.args.events_ids]))

        cursor = self.connection.execute(query)
        return cursor.fetchall()