Exemple #1
0
    def _limited_call(self, func, *args, **kwargs):
        """Rate limit calls to a function.
        """

        # Check seconds that have passed
        now = datetime.datetime.now()
        diff = (now - self._rate_limit_start).total_seconds()

        if diff >= 60:
            # If greater than a minute, reset the rate limit
            self._rate_limit_count = 0
            self._rate_limit_start = now
        else:
            # Check if the per-minute limit has been exceeded
            if self._rate_limit_count >= constants.FA_PAGE_REQUESTS_PER_MINUTE:
                # Wait until next minute, then reset the count/time
                wait_time = 60 - diff
                logger.debug("Hit rate limit, waiting %d seconds" % wait_time)
                time.sleep(wait_time)
                self._rate_limit_count = 0
                self._rate_limit_start = datetime.datetime.now()

        self._rate_limit_count += 1

        return func(*args, **kwargs)
Exemple #2
0
    def _limited_call(self, func, *args, **kwargs):
        """Rate limit calls to a function.
        """

        # Check seconds that have passed
        now = datetime.datetime.now()
        diff = (now - self._rate_limit_start).total_seconds()

        if diff >= 60:
            # If greater than a minute, reset the rate limit
            self._rate_limit_count = 0
            self._rate_limit_start = now
        else:
            # Check if the per-minute limit has been exceeded
            if self._rate_limit_count >= constants.FA_PAGE_REQUESTS_PER_MINUTE:
                # Wait until next minute, then reset the count/time
                wait_time = 60 - diff
                logger.debug("Hit rate limit, waiting %d seconds" % wait_time)
                time.sleep(wait_time)
                self._rate_limit_count = 0
                self._rate_limit_start = datetime.datetime.now()

        self._rate_limit_count += 1

        return func(*args, **kwargs)
Exemple #3
0
    def _load_folders(self):
        logger.debug("Loading folders")

        self._root_folders = []

        url = constants.WZL_ROOT + "/api/users/%s/view" % self.username
        res = self._requests.get(url)
        folders = res.json()["folders"]

        for folder_struct in folders:
            folder = self._folders.get(folder_struct["folder_id"])
            if folder is None:
                folder = Folder()
                folder._session = self
                folder.id = folder_struct["folder_id"]
                self._folders[folder.id] = folder

            folder.title = folder_struct["title"]
            folder.children = []

            self._root_folders.append(folder)

            if "subfolders" in folder_struct:
                for subfolder_struct in folder_struct["subfolders"]:
                    subfolder = self._folders.get(subfolder_struct["folder_id"])
                    if subfolder is None:
                        subfolder = Folder()
                        subfolder._session = self
                        subfolder.id = subfolder_struct["folder_id"]
                        self._folders[subfolder.id] = subfolder

                    subfolder.title = subfolder_struct["title"]
                    subfolder.children = []

                    folder.children.append(subfolder)
Exemple #4
0
    def _scan_folder(self, folder):
        logger.debug("Scanning folder %r" % folder)

        url = constants.FA_ROOT + "/gallery/%s/folder/%d/-/%%d/" % (
            self.username, folder.id)
        submissions = self._scan_submission_page(url)

        folder.submissions = []

        for sub in submissions:
            folder.submissions.append(sub)
Exemple #5
0
    def _scan_folder(self, folder):
        logger.debug("Scanning folder %r" % folder)

        url = constants.FA_ROOT + "/gallery/%s/folder/%d/-/%%d/" % (
            self.username, folder.id)
        submissions = self._scan_submission_page(url)

        folder.submissions = []

        for sub in submissions:
            folder.submissions.append(sub)
Exemple #6
0
    def _scan_gallery(self, folder_id=None):

        next_id = None
        url = constants.WZL_ROOT + "/api/users/%s/gallery" % self.username

        submissions = []

        logger.debug("Scanning gallery folder %r" % folder_id)

        while True:
            params = {}

            if next_id is not None:
                params["nextid"] = next_id

            if folder_id is not None:
                params["folderid"] = folder_id

            res = self._requests.get(url, params=params)
            data = res.json()

            next_id = data["nextid"]

            for sub_struct in data["submissions"]:
                sub = self._load_submission_from_struct(sub_struct)

                submissions.append(sub)

            if next_id is None:
                break

            logger.debug("Found %d submissions" % len(data["submissions"]))

        if folder_id is None:
            self._gallery_submissions = submissions

        return submissions
Exemple #7
0
    def _scan_gallery(self, folder_id=None):

        next_id = None
        url = constants.WZL_ROOT + "/api/users/%s/gallery" % self.username

        submissions = []

        logger.debug("Scanning gallery folder %r" % folder_id)

        while True:
            params = {}

            if next_id is not None:
                params["nextid"] = next_id

            if folder_id is not None:
                params["folderid"] = folder_id

            res = self._requests.get(url, params=params)
            data = res.json()

            next_id = data["nextid"]

            for sub_struct in data["submissions"]:
                sub = self._load_submission_from_struct(sub_struct)

                submissions.append(sub)

            if next_id is None:
                break

            logger.debug("Found %d submissions" % len(data["submissions"]))

        if folder_id is None:
            self._gallery_submissions = submissions

        return submissions
Exemple #8
0
    def _load_folders(self):
        logger.debug("Loading folders")

        self._root_folders = []

        url = constants.WZL_ROOT + "/api/users/%s/view" % self.username
        res = self._requests.get(url)
        folders = res.json()["folders"]

        for folder_struct in folders:
            folder = self._folders.get(folder_struct["folder_id"])
            if folder is None:
                folder = Folder()
                folder._session = self
                folder.id = folder_struct["folder_id"]
                self._folders[folder.id] = folder

            folder.title = folder_struct["title"]
            folder.children = []

            self._root_folders.append(folder)

            if "subfolders" in folder_struct:
                for subfolder_struct in folder_struct["subfolders"]:
                    subfolder = self._folders.get(
                        subfolder_struct["folder_id"])
                    if subfolder is None:
                        subfolder = Folder()
                        subfolder._session = self
                        subfolder.id = subfolder_struct["folder_id"]
                        self._folders[subfolder.id] = subfolder

                    subfolder.title = subfolder_struct["title"]
                    subfolder.children = []

                    folder.children.append(subfolder)
Exemple #9
0
 def _scan_scraps(self):
     logger.debug("Scanning scraps")
     url = constants.FA_ROOT + "/scraps/%s/%%d/" % self.username
     submissions = self._scan_submission_page(url)
     return submissions
Exemple #10
0
 def _scan_gallery(self):
     logger.debug("Scanning gallery")
     url = constants.FA_ROOT + "/gallery/%s/%%d/" % self.username
     submissions = self._scan_submission_page(url)
     return submissions
Exemple #11
0
    def _scan_submission_page(self, url_format):
        """Return submissions found in pages of a base url.

        Args:
            url_format (str): URL, with a %d that holds the page id

        Returns:
            A list of submission objects.
        """

        submissions = []

        try:
            page = 1
            while True:
                url = url_format % page
                doc = self._limited_call(self._html_get, url)
                logger.debug("Scanning submissions from %s" % url)

                count = 0

                for el in doc.cssselect(".gallery > *"):
                    if el.get("id") == "no-images":
                        continue

                    id_str = el.get("id")[4:]
                    if id_str == "":
                        continue

                    id = int(id_str)

                    submission = self._submissions.get(id)
                    if submission is None:
                        submission = Submission()
                        submission._session = self
                        submission.id = id
                        self._submissions[id] = submission

                    submission.title = str(
                        el.cssselect("span")[0].text_content())

                    if "r-adult" in el.classes:
                        submission.rating = "adult"
                    elif "r-mature" in el.classes:
                        submission.rating = "mature"
                    elif "r-general" in el.classes:
                        submission.rating = "general"
                    else:
                        raise exceptions.ScraperError()

                    if "t-image" in el.classes:
                        submission.type = "image"
                    elif "t-text" in el.classes:
                        submission.type = "text"
                    elif "t-audio" in el.classes:
                        submission.type = "audio"
                    elif "t-flash" in el.classes:
                        submission.type = "flash"
                    else:
                        raise exceptions.ScraperError()

                    submission.thumbnail_url = "https:" + el.cssselect(
                        "img")[0].get("src")

                    submissions.append(submission)
                    count += 1

                if count == 0:
                    break

                logger.debug("Found %d submissions" % count)

                page += 1

        except (IndexError, ValueError):
            raise exceptions.ScraperError()

        return submissions
Exemple #12
0
    def _load_folders(self):
        logger.debug("Loading folders")

        self._root_folders = []

        url = constants.FA_ROOT + "/controls/folders/submissions/"
        doc = self._limited_call(self._html_get, url)

        # get groups
        for group_el in doc.cssselect(".group-row"):
            try:
                title = str(group_el.cssselect("strong")[0].text_content())

                id_match = re.search("group-([0-9]+)", group_el.get("class"))
                id = int(id_match.group(1))

                group = self._folders.get(id)
                if group is None:
                    group = Folder()
                    group._session = self
                    group.id = id
                    self._folders[id] = group

                group.title = title
                group.children = []
                group.submissions = []

                self._root_folders.append(group)

            except (IndexError, ValueError):
                raise exceptions.ScraperError()

        # Get folders
        for folder_el in doc.cssselect(".folder-row"):
            try:
                title = str(
                    folder_el.cssselect(".folder-name strong")
                    [0].text_content())
                id_match = re.search("folder-([0-9]+)", folder_el.get("class"))
                group_match = re.search("group-([0-9]+)",
                                        folder_el.get("class"))

                id = int(id_match.group(1))
                parent_id = int(group_match.group(1))

                folder = self._folders.get(id)
                if folder is None:
                    folder = Folder()
                    folder._session = self
                    folder.id = id
                    self._folders[id] = folder

                folder.title = title
                folder.children = []

                parent = self._folders.get(parent_id)
                if parent is None:
                    self._root_folders.append(folder)
                else:
                    parent.children.append(folder)

            except (IndexError, ValueError):
                raise exceptions.ScraperError()
Exemple #13
0
 def _scan_scraps(self):
     logger.debug("Scanning scraps")
     url = constants.FA_ROOT + "/scraps/%s/%%d/" % self.username
     submissions = self._scan_submission_page(url)
     return submissions
Exemple #14
0
 def _scan_gallery(self):
     logger.debug("Scanning gallery")
     url = constants.FA_ROOT + "/gallery/%s/%%d/" % self.username
     submissions = self._scan_submission_page(url)
     return submissions
Exemple #15
0
    def _scan_submission_page(self, url_format):
        """Return submissions found in pages of a base url.

        Args:
            url_format (str): URL, with a %d that holds the page id

        Returns:
            A list of submission objects.
        """

        submissions = []

        try:
            page = 1
            while True:
                url = url_format % page
                doc = self._limited_call(self._html_get, url)
                logger.debug("Scanning submissions from %s" % url)

                count = 0

                for el in doc.cssselect(".gallery > *"):
                    if el.get("id") == "no-images":
                        continue

                    id_str = el.get("id")[4:]
                    if id_str == "":
                        continue

                    id = int(id_str)

                    submission = self._submissions.get(id)
                    if submission is None:
                        submission = Submission()
                        submission._session = self
                        submission.id = id
                        self._submissions[id] = submission

                    submission.title = str(
                        el.cssselect("span")[0].text_content())

                    if "r-adult" in el.classes:
                        submission.rating = "adult"
                    elif "r-mature" in el.classes:
                        submission.rating = "mature"
                    elif "r-general" in el.classes:
                        submission.rating = "general"
                    else:
                        raise exceptions.ScraperError()

                    if "t-image" in el.classes:
                        submission.type = "image"
                    elif "t-text" in el.classes:
                        submission.type = "text"
                    elif "t-audio" in el.classes:
                        submission.type = "audio"
                    elif "t-flash" in el.classes:
                        submission.type = "flash"
                    else:
                        raise exceptions.ScraperError()

                    submission.thumbnail_url = "https:" + el.cssselect("img")[
                        0].get("src")

                    submissions.append(submission)
                    count += 1

                if count == 0:
                    break

                logger.debug("Found %d submissions" % count)

                page += 1

        except (IndexError, ValueError):
            raise exceptions.ScraperError()

        return submissions
Exemple #16
0
    def _load_folders(self):
        logger.debug("Loading folders")

        self._root_folders = []

        url = constants.FA_ROOT + "/controls/folders/submissions/"
        doc = self._limited_call(self._html_get, url)

        # get groups
        for group_el in doc.cssselect(".group-row"):
            try:
                title = str(group_el.cssselect("strong")[0].text_content())

                id_match = re.search("group-([0-9]+)", group_el.get("class"))
                id = int(id_match.group(1))

                group = self._folders.get(id)
                if group is None:
                    group = Folder()
                    group._session = self
                    group.id = id
                    self._folders[id] = group

                group.title = title
                group.children = []
                group.submissions = []

                self._root_folders.append(group)

            except (IndexError, ValueError):
                raise exceptions.ScraperError()

        # Get folders
        for folder_el in doc.cssselect(".folder-row"):
            try:
                title = str(folder_el.cssselect(".folder-name strong")[
                                0].text_content())
                id_match = re.search("folder-([0-9]+)", folder_el.get("class"))
                group_match = re.search("group-([0-9]+)",
                                        folder_el.get("class"))

                id = int(id_match.group(1))
                parent_id = int(group_match.group(1))

                folder = self._folders.get(id)
                if folder is None:
                    folder = Folder()
                    folder._session = self
                    folder.id = id
                    self._folders[id] = folder

                folder.title = title
                folder.children = []

                parent = self._folders.get(parent_id)
                if parent is None:
                    self._root_folders.append(folder)
                else:
                    parent.children.append(folder)

            except (IndexError, ValueError):
                raise exceptions.ScraperError()