def GetImageList(self,url,gid):
        """Returns list of images in gallery page content"""
        UrlList=[]

        request = Request(self._config)
        PageContent = request.ReqUrl(url, 'utf-8')

        while True:
            """iterate through gallery pages"""
            index_begin=0
            index_end=0
            pics = []
            while True:
                """Find html entries with 'idx=' string indicating image link."""
#                print(PageContent)
                img_class = PageContent.find('class="col-thumbnail"',index_end)
                index_begin = PageContent.find('href="',img_class)
                if (index_begin==-1): break
                index_begin += 6
            
                index_end = PageContent.find('"',index_begin)
                pic = PageContent[index_begin:index_end]
                if (len(pic)>1): pics+=[self._getFullUrl(pic)]
            UrlList+=pics
        
            np_url = self._nextPage(PageContent)
        
            if np_url is None:
                break
            else:
                url = np_url
                #print url
                PageContent = request.ReqUrl(url, 'utf-8')
        return UrlList
    def GetImageList(self, url, gid):
        """Returns list of images in gallery page content"""
        UrlList = []

        request = Request(self._config)
        PageContent = request.ReqUrl(url, 'CP1251', proto="http://")
        """iterate through gallery pages"""
        index_begin = 0
        index_end = 0
        while True:
            """Find html entries with 'idx=' string indicating image link."""
            index_begin = PageContent.find('<div class="pic">', index_end)
            if (index_begin == -1): break
            index_begin = PageContent.find('<a href="', index_begin)
            if (index_begin == -1): break

            index_end = PageContent.find('" target="_blank">', index_begin)
            i = index_end - 1
            while (PageContent[i] != '"'):
                i = i - 1
            pic = PageContent[i + 1:index_end]
            if (len(pic) > 1):
                UrlList.append(self._config[self.configKey()]['baseurl'] + pic)

        return UrlList
Exemple #3
0
 def GetImageUrl(self, UrlList, UrlNum):
     """Get image source URL"""
     request = Request(self._config)
     PageContent = request.ReqUrl(UrlList[UrlNum], 'utf-8')
     url_index = PageContent.find('contentUrl')
     url_start = PageContent.find('https://', url_index)
     url_end = PageContent.find('"', url_start)
     return PageContent[url_start:url_end]
 def GetImageUrl(self, UrlList, UrlNum):
     """Get image source URL"""
     request = Request(self._config)
     PageContent = request.ReqUrl(UrlList[UrlNum], 'utf-8')
     url_index = PageContent.find('theImage')
     url_start = PageContent.find('img src="',url_index)
     url_start += 9
     url_end = PageContent.find('"',url_start)
     return self._getFullUrl(PageContent[url_start+1:url_end])
 def GetImageUrl(self, UrlList, UrlNum):
     """Get image source URL"""
     request = Request(self._config)
     PageContent = request.ReqUrl(UrlList[UrlNum],
                                  'CP1251',
                                  proto='http://')
     url_index = PageContent.find('<img style="')
     url_start = PageContent.find(' src="', url_index) + 6
     url_end = PageContent.find('"', url_start)
     return 'http://' + self._config[
         self.configKey()]['baseurl'] + PageContent[url_start:url_end]
Exemple #6
0
    def GetImageList(self, url, gid):
        """Returns list of images in gallery page content"""
        UrlList = []

        request = Request(self._config)
        PageContent = request.ReqUrl(url, 'utf-8')

        p = 0

        while True:
            """iterate through gallery pages"""
            index_begin = 0
            index_end = 0
            pics = []
            while True:
                """Find html entries with 'idx=' string indicating image link."""
                print(PageContent)
                index_begin = PageContent.find('idx=', index_end)
                if (index_begin == -1): break

                index_end = PageContent.find('"', index_begin)
                i = index_begin
                while (PageContent[i] != '"'):
                    i = i - 1
                pic = PageContent[i + 1:index_end]
                if (len(pic) > 1):
                    pics += [
                        str(self._config[self.configKey()]['baseurl']) + pic
                    ]
            UrlList += pics

            np_url = self._NextPage(PageContent, gid, p)

            if (np_url == 0):
                break
            else:
                url = np_url
                #print url
                p += 1
                PageContent = request.ReqUrl(url, 'utf-8')
        return UrlList
    def _ListUserFolders(self, ProfileUrl):
        request = Request(self._config)
        htmldata = request.ReqUrl(ProfileUrl, 'utf-8')
        Username = self._GetUsername(htmldata)

        galleries_searchstring = "/usergallery.php"
        start = htmldata.find(galleries_searchstring)
        end = htmldata.find('"', start)

        GalsUrl = self._config[
            self.configKey()]['baseurl'] + htmldata[start:end]
        htmldata = request.ReqUrl(GalsUrl, 'utf-8')

        j = 0
        k = 0

        folderid = ''
        Folders = []
        while (folderid != "folderid=-1"):
            j = htmldata.find("folderid=", k)
            k = htmldata.find('"', j)
            folderid = htmldata[j:k]
            J = j
            l = -1
            while (l == -1):
                J = J - 10
                l = htmldata[J:k].find("https:")

            FolderUrl = htmldata[l + J:k]

            if (folderid != "folderid=0" and len(folderid) > 0):
                n = htmldata.find(">", k)
                m = htmldata.find("<", n)
                FolderName = htmldata[n + 1:m]
                Folders += [[FolderName, FolderUrl]]

        return Username, Folders
Exemple #8
0
def DownloadImage(config,url,Dir,attempts=3):
    success = False
    r = Request(config)
    for k in range(attempts):
        pic = r.ReqUrl(url)
        if pic is not None and pic!=[]:
            success = True
            break
        else:
            sleep(1)

    if success:    
        """create output directory if it doesn't exist"""
        if not path.exists(Dir):
            makedirs(Dir)
        i=0
        while True:
            _i = i
            i = url.find('/',i+1)
            if (i==-1): break

        fname = url[_i+1:]
        _path = Dir+'/'+fname

        while True:
            """check if file already exists, in which case progressing numbers are added to the new filename to avoid overwriting of older files."""
            pic_num = 0
            if not pic:
                return False

            if not path.exists(_path):
                f = open(_path,'wb')
                f.write(pic)
                f.close()
                break
            else:
                pic_num+=1
                k = fname.find('.')
                fn1 = fname[:k]
                ext = fname[k:]
                fname = fn1+str(pic_num)+ext
                _path = Dir+'/'+fname
        return True
    else:
        return False
Exemple #9
0
    def _ListFolderGalleries(self, FolderUrl):
        request = Request(self._config)
        htmldata = request.ReqUrl(FolderUrl, 'utf-8')
        UserName = self._GetUsername(htmldata)
        FolderName = self._GetFolderName(htmldata)
        Galleries = []
        j = 0
        k = 0
        while True:
            j = htmldata.find("/gallery/", k)
            if (j == -1): break
            k = htmldata.find('"', j)
            GalleryUrl = self._config[
                self.configKey()]['baseurl'] + htmldata[j:k]

            if not (GalleryUrl in Galleries):
                Galleries += [GalleryUrl]

        return RemoveBlank(UserName), RemoveBlank(FolderName), Galleries
Exemple #10
0
    def OpenGallery(self,
                    Gal_Url,
                    urltype,
                    encoding='utf-8',
                    proto='https://'):
        """Get gallery title and list of image URLs"""

        url = Gal_Url
        gid = self.GetGalleryId(url, urltype)
        request = Request(self._config)
        PageContent = request.ReqUrl(url, encoding, proto)

        ### read gallery title from html content
        GalTitle = self.GetGalleryTitle(PageContent)

        ### get image list from gallery page
        UrlList = self.GetImageList(url, gid)

        return GalTitle, UrlList
Exemple #11
0
    def GetImageList(self, url, gid):
        """Returns list of images in gallery page content"""
        UrlList = []

        request = Request(self._config)
        PageContent = request.ReqUrl(url, 'utf-8')

        #        print(PageContent)
        index_end = PageContent.find('class=\'image\'')
        while True:
            """Find html entries with 'idx=' string indicating image link."""
            index_begin = PageContent.find('href="', index_end)
            if (index_begin == -1): break
            index_begin += 6

            index_end = PageContent.find('"', index_begin)
            pic = PageContent[index_begin:index_end]
            if (len(pic) > 1) and pic.startswith('http'): UrlList.append(pic)

        return UrlList
Exemple #12
0
    def GetImageList(self, url, gid):
        """Returns list of images in xhamster gallery"""
        UrlList = []
        request = Request(self._config)
        PageContent = request.ReqUrl(url, 'utf-8')

        imglink_begin = 0
        imglink_end = 0
        pics = []

        while True:
            """find location of image page link"""
            index_link = PageContent.find("photo-container photo-thumb",
                                          imglink_end)
            if (index_link == -1): break
            imglink_begin = PageContent.find("href=", index_link) + 6
            imglink_end = PageContent.find("\"", imglink_begin)
            pic = PageContent[imglink_begin:imglink_end]
            if (len(pic) > 1): pics += [pic]
        UrlList += pics
        return UrlList
    def enqueue(self, queue, url, urltype):
        request = Request(self._config)
        PageContent = request.ReqUrl(url, 'CP1251', proto="http://")
        """iterate through gallery pages"""
        index_begin = 0
        index_end = 0
        while True:
            """Find html entries with 'idx=' string indicating image link."""
            index_begin = PageContent.find('<div class="galleryThumb">',
                                           index_end)
            if (index_begin == -1): break
            index_begin = PageContent.find('<a href="', index_begin)
            if (index_begin == -1): break

            index_end = PageContent.find('" onmouseout="', index_begin)
            i = index_end - 1
            while (PageContent[i] != '"'):
                i = i - 1
            pic = PageContent[i + 1:index_end]
            if (len(pic) > 1):
                super().enqueue(
                    queue, self._config[self.configKey()]['baseurl'] + pic, 1)