def check_user_existence(self, name: str):
     user = self.reddit_instance.redditor(name=name)
     try:
         if user.id:
             return
     except prawcore.exceptions.NotFound:
         raise errors.BulkDownloaderException(f'Could not find user {name}')
     except AttributeError:
         if hasattr(user, 'is_suspended'):
             raise errors.BulkDownloaderException(f'User {name} is banned')
 def check_subreddit_status(subreddit: praw.models.Subreddit):
     if subreddit.display_name in ('all', 'friends'):
         return
     try:
         assert subreddit.id
     except prawcore.NotFound:
         raise errors.BulkDownloaderException(
             f'Source {subreddit.display_name} does not exist or cannot be found'
         )
     except prawcore.Forbidden:
         raise errors.BulkDownloaderException(
             f'Source {subreddit.display_name} is private and cannot be scraped'
         )
 def sanitise_subreddit_name(subreddit: str) -> str:
     pattern = re.compile(r'^(?:https://www\.reddit\.com/)?(?:r/)?(.*?)/?$')
     match = re.match(pattern, subreddit)
     if not match:
         raise errors.BulkDownloaderException(
             f'Could not find subreddit name in string {subreddit}')
     return match.group(1)
    def create_file_logger(self):
        main_logger = logging.getLogger()
        if self.args.log is None:
            log_path = Path(self.config_directory, 'log_output.txt')
        else:
            log_path = Path(self.args.log).resolve().expanduser()
            if not log_path.parent.exists():
                raise errors.BulkDownloaderException(
                    f'Designated location for logfile does not exist')
        backup_count = self.cfg_parser.getint('DEFAULT',
                                              'backup_log_count',
                                              fallback=3)
        file_handler = logging.handlers.RotatingFileHandler(
            log_path,
            mode='a',
            backupCount=backup_count,
        )
        if log_path.exists():
            try:
                file_handler.doRollover()
            except PermissionError:
                logger.critical(
                    'Cannot rollover logfile, make sure this is the only '
                    'BDFR process or specify alternate logfile location')
                raise
        formatter = logging.Formatter(
            '[%(asctime)s - %(name)s - %(levelname)s] - %(message)s')
        file_handler.setFormatter(formatter)
        file_handler.setLevel(0)

        main_logger.addHandler(file_handler)
class RedditConnector(metaclass=ABCMeta):
    def __init__(self, args: Configuration):
        self.args = args
        self.config_directories = appdirs.AppDirs('bdfr', 'BDFR')
        self.run_time = datetime.now().isoformat()
        self._setup_internal_objects()

        self.reddit_lists = self.retrieve_reddit_lists()

    def _setup_internal_objects(self):
        self.determine_directories()
        self.load_config()
        self.create_file_logger()

        self.read_config()

        self.parse_disabled_modules()

        self.download_filter = self.create_download_filter()
        logger.log(9, 'Created download filter')
        self.time_filter = self.create_time_filter()
        logger.log(9, 'Created time filter')
        self.sort_filter = self.create_sort_filter()
        logger.log(9, 'Created sort filter')
        self.file_name_formatter = self.create_file_name_formatter()
        logger.log(9, 'Create file name formatter')

        self.create_reddit_instance()
        self.args.user = list(
            filter(None,
                   [self.resolve_user_name(user) for user in self.args.user]))

        self.excluded_submission_ids = set.union(
            self.read_id_files(self.args.exclude_id_file),
            set(self.args.exclude_id),
        )

        self.args.link = list(
            itertools.chain(self.args.link,
                            self.read_id_files(self.args.include_id_file)))

        self.master_hash_list = {}
        self.authenticator = self.create_authenticator()
        logger.log(9, 'Created site authenticator')

        self.args.skip_subreddit = self.split_args_input(
            self.args.skip_subreddit)
        self.args.skip_subreddit = set(
            [sub.lower() for sub in self.args.skip_subreddit])

    def read_config(self):
        """Read any cfg values that need to be processed"""
        if self.args.max_wait_time is None:
            self.args.max_wait_time = self.cfg_parser.getint('DEFAULT',
                                                             'max_wait_time',
                                                             fallback=120)
            logger.debug(
                f'Setting maximum download wait time to {self.args.max_wait_time} seconds'
            )
        if self.args.time_format is None:
            option = self.cfg_parser.get('DEFAULT',
                                         'time_format',
                                         fallback='ISO')
            if re.match(r'^[\s\'\"]*$', option):
                option = 'ISO'
            logger.debug(f'Setting datetime format string to {option}')
            self.args.time_format = option
        if not self.args.disable_module:
            self.args.disable_module = [
                self.cfg_parser.get('DEFAULT', 'disabled_modules', fallback='')
            ]
        # Update config on disk
        with open(self.config_location, 'w') as file:
            self.cfg_parser.write(file)

    def parse_disabled_modules(self):
        disabled_modules = self.args.disable_module
        disabled_modules = self.split_args_input(disabled_modules)
        disabled_modules = set(
            [name.strip().lower() for name in disabled_modules])
        self.args.disable_module = disabled_modules
        logger.debug(
            f'Disabling the following modules: {", ".join(self.args.disable_module)}'
        )

    def create_reddit_instance(self):
        if self.args.authenticate:
            logger.debug('Using authenticated Reddit instance')
            if not self.cfg_parser.has_option('DEFAULT', 'user_token'):
                logger.log(9, 'Commencing OAuth2 authentication')
                scopes = self.cfg_parser.get(
                    'DEFAULT',
                    'scopes',
                    fallback='identity, history, read, save')
                scopes = OAuth2Authenticator.split_scopes(scopes)
                oauth2_authenticator = OAuth2Authenticator(
                    scopes,
                    self.cfg_parser.get('DEFAULT', 'client_id'),
                    self.cfg_parser.get('DEFAULT', 'client_secret'),
                )
                token = oauth2_authenticator.retrieve_new_token()
                self.cfg_parser['DEFAULT']['user_token'] = token
                with open(self.config_location, 'w') as file:
                    self.cfg_parser.write(file, True)
            token_manager = OAuth2TokenManager(self.cfg_parser,
                                               self.config_location)

            self.authenticated = True
            self.reddit_instance = praw.Reddit(
                client_id=self.cfg_parser.get('DEFAULT', 'client_id'),
                client_secret=self.cfg_parser.get('DEFAULT', 'client_secret'),
                user_agent=socket.gethostname(),
                token_manager=token_manager,
            )
        else:
            logger.debug('Using unauthenticated Reddit instance')
            self.authenticated = False
            self.reddit_instance = praw.Reddit(
                client_id=self.cfg_parser.get('DEFAULT', 'client_id'),
                client_secret=self.cfg_parser.get('DEFAULT', 'client_secret'),
                user_agent=socket.gethostname(),
            )

    def retrieve_reddit_lists(self) -> list[praw.models.ListingGenerator]:
        master_list = []
        master_list.extend(self.get_subreddits())
        logger.log(9, 'Retrieved subreddits')
        master_list.extend(self.get_multireddits())
        logger.log(9, 'Retrieved multireddits')
        master_list.extend(self.get_user_data())
        logger.log(9, 'Retrieved user data')
        master_list.extend(self.get_submissions_from_link())
        logger.log(9, 'Retrieved submissions for given links')
        return master_list

    def determine_directories(self):
        self.download_directory = Path(
            self.args.directory).resolve().expanduser()
        self.config_directory = Path(self.config_directories.user_config_dir)

        self.download_directory.mkdir(exist_ok=True, parents=True)
        self.config_directory.mkdir(exist_ok=True, parents=True)

    def load_config(self):
        self.cfg_parser = configparser.ConfigParser()
        if self.args.config:
            if (cfg_path := Path(self.args.config)).exists():
                self.cfg_parser.read(cfg_path)
                self.config_location = cfg_path
                return
        possible_paths = [
            Path('./config.cfg'),
            Path('./default_config.cfg'),
            Path(self.config_directory, 'config.cfg'),
            Path(self.config_directory, 'default_config.cfg'),
        ]
        self.config_location = None
        for path in possible_paths:
            if path.resolve().expanduser().exists():
                self.config_location = path
                logger.debug(f'Loading configuration from {path}')
                break
        if not self.config_location:
            with importlib.resources.path('bdfr',
                                          'default_config.cfg') as path:
                self.config_location = path
                shutil.copy(self.config_location,
                            Path(self.config_directory, 'default_config.cfg'))
        if not self.config_location:
            raise errors.BulkDownloaderException(
                'Could not find a configuration file to load')
        self.cfg_parser.read(self.config_location)
class RedditDownloader:
    def __init__(self, args: Configuration):
        self.args = args
        self.config_directories = appdirs.AppDirs('bdfr', 'BDFR')
        self.run_time = datetime.now().isoformat()
        self._setup_internal_objects()

        self.reddit_lists = self._retrieve_reddit_lists()

    def _setup_internal_objects(self):
        self._determine_directories()
        self._load_config()
        self._create_file_logger()

        self._read_config()

        self.download_filter = self._create_download_filter()
        logger.log(9, 'Created download filter')
        self.time_filter = self._create_time_filter()
        logger.log(9, 'Created time filter')
        self.sort_filter = self._create_sort_filter()
        logger.log(9, 'Created sort filter')
        self.file_name_formatter = self._create_file_name_formatter()
        logger.log(9, 'Create file name formatter')

        self._create_reddit_instance()
        self._resolve_user_name()

        self.excluded_submission_ids = self._read_excluded_ids()

        if self.args.search_existing:
            self.master_hash_list = self.scan_existing_files(
                self.download_directory)
        else:
            self.master_hash_list = {}
        self.authenticator = self._create_authenticator()
        logger.log(9, 'Created site authenticator')

        self.args.skip_subreddit = self._split_args_input(
            self.args.skip_subreddit)
        self.args.skip_subreddit = set(
            [sub.lower() for sub in self.args.skip_subreddit])

    def _read_config(self):
        """Read any cfg values that need to be processed"""
        if self.args.max_wait_time is None:
            if not self.cfg_parser.has_option('DEFAULT', 'max_wait_time'):
                self.cfg_parser.set('DEFAULT', 'max_wait_time', '120')
                logger.log(
                    9,
                    'Wrote default download wait time download to config file')
            self.args.max_wait_time = self.cfg_parser.getint(
                'DEFAULT', 'max_wait_time')
            logger.debug(
                f'Setting maximum download wait time to {self.args.max_wait_time} seconds'
            )
        # Update config on disk
        with open(self.config_location, 'w') as file:
            self.cfg_parser.write(file)

    def _create_reddit_instance(self):
        if self.args.authenticate:
            logger.debug('Using authenticated Reddit instance')
            if not self.cfg_parser.has_option('DEFAULT', 'user_token'):
                logger.log(9, 'Commencing OAuth2 authentication')
                scopes = self.cfg_parser.get('DEFAULT', 'scopes')
                scopes = OAuth2Authenticator.split_scopes(scopes)
                oauth2_authenticator = OAuth2Authenticator(
                    scopes,
                    self.cfg_parser.get('DEFAULT', 'client_id'),
                    self.cfg_parser.get('DEFAULT', 'client_secret'),
                )
                token = oauth2_authenticator.retrieve_new_token()
                self.cfg_parser['DEFAULT']['user_token'] = token
                with open(self.config_location, 'w') as file:
                    self.cfg_parser.write(file, True)
            token_manager = OAuth2TokenManager(self.cfg_parser,
                                               self.config_location)

            self.authenticated = True
            self.reddit_instance = praw.Reddit(
                client_id=self.cfg_parser.get('DEFAULT', 'client_id'),
                client_secret=self.cfg_parser.get('DEFAULT', 'client_secret'),
                user_agent=socket.gethostname(),
                token_manager=token_manager,
            )
        else:
            logger.debug('Using unauthenticated Reddit instance')
            self.authenticated = False
            self.reddit_instance = praw.Reddit(
                client_id=self.cfg_parser.get('DEFAULT', 'client_id'),
                client_secret=self.cfg_parser.get('DEFAULT', 'client_secret'),
                user_agent=socket.gethostname(),
            )

    def _retrieve_reddit_lists(self) -> list[praw.models.ListingGenerator]:
        master_list = []
        master_list.extend(self._get_subreddits())
        logger.log(9, 'Retrieved subreddits')
        master_list.extend(self._get_multireddits())
        logger.log(9, 'Retrieved multireddits')
        master_list.extend(self._get_user_data())
        logger.log(9, 'Retrieved user data')
        master_list.extend(self._get_submissions_from_link())
        logger.log(9, 'Retrieved submissions for given links')
        return master_list

    def _determine_directories(self):
        self.download_directory = Path(
            self.args.directory).resolve().expanduser()
        self.config_directory = Path(self.config_directories.user_config_dir)

        self.download_directory.mkdir(exist_ok=True, parents=True)
        self.config_directory.mkdir(exist_ok=True, parents=True)

    def _load_config(self):
        self.cfg_parser = configparser.ConfigParser()
        if self.args.config:
            if (cfg_path := Path(self.args.config)).exists():
                self.cfg_parser.read(cfg_path)
                self.config_location = cfg_path
                return
        possible_paths = [
            Path('./config.cfg'),
            Path('./default_config.cfg'),
            Path(self.config_directory, 'config.cfg'),
            Path(self.config_directory, 'default_config.cfg'),
        ]
        self.config_location = None
        for path in possible_paths:
            if path.resolve().expanduser().exists():
                self.config_location = path
                logger.debug(f'Loading configuration from {path}')
                break
        if not self.config_location:
            self.config_location = list(
                importlib.resources.path('bdfr', 'default_config.cfg').gen)[0]
            shutil.copy(self.config_location,
                        Path(self.config_directory, 'default_config.cfg'))
        if not self.config_location:
            raise errors.BulkDownloaderException(
                'Could not find a configuration file to load')
        self.cfg_parser.read(self.config_location)