def GetImageList(self,url,gid): """Returns list of images in gallery page content""" UrlList=[] request = Request(self._config) PageContent = request.ReqUrl(url, 'utf-8') while True: """iterate through gallery pages""" index_begin=0 index_end=0 pics = [] while True: """Find html entries with 'idx=' string indicating image link.""" # print(PageContent) img_class = PageContent.find('class="col-thumbnail"',index_end) index_begin = PageContent.find('href="',img_class) if (index_begin==-1): break index_begin += 6 index_end = PageContent.find('"',index_begin) pic = PageContent[index_begin:index_end] if (len(pic)>1): pics+=[self._getFullUrl(pic)] UrlList+=pics np_url = self._nextPage(PageContent) if np_url is None: break else: url = np_url #print url PageContent = request.ReqUrl(url, 'utf-8') return UrlList
def GetImageList(self, url, gid): """Returns list of images in gallery page content""" UrlList = [] request = Request(self._config) PageContent = request.ReqUrl(url, 'CP1251', proto="http://") """iterate through gallery pages""" index_begin = 0 index_end = 0 while True: """Find html entries with 'idx=' string indicating image link.""" index_begin = PageContent.find('<div class="pic">', index_end) if (index_begin == -1): break index_begin = PageContent.find('<a href="', index_begin) if (index_begin == -1): break index_end = PageContent.find('" target="_blank">', index_begin) i = index_end - 1 while (PageContent[i] != '"'): i = i - 1 pic = PageContent[i + 1:index_end] if (len(pic) > 1): UrlList.append(self._config[self.configKey()]['baseurl'] + pic) return UrlList
def GetImageUrl(self, UrlList, UrlNum): """Get image source URL""" request = Request(self._config) PageContent = request.ReqUrl(UrlList[UrlNum], 'utf-8') url_index = PageContent.find('contentUrl') url_start = PageContent.find('https://', url_index) url_end = PageContent.find('"', url_start) return PageContent[url_start:url_end]
def GetImageUrl(self, UrlList, UrlNum): """Get image source URL""" request = Request(self._config) PageContent = request.ReqUrl(UrlList[UrlNum], 'utf-8') url_index = PageContent.find('theImage') url_start = PageContent.find('img src="',url_index) url_start += 9 url_end = PageContent.find('"',url_start) return self._getFullUrl(PageContent[url_start+1:url_end])
def GetImageUrl(self, UrlList, UrlNum): """Get image source URL""" request = Request(self._config) PageContent = request.ReqUrl(UrlList[UrlNum], 'CP1251', proto='http://') url_index = PageContent.find('<img style="') url_start = PageContent.find(' src="', url_index) + 6 url_end = PageContent.find('"', url_start) return 'http://' + self._config[ self.configKey()]['baseurl'] + PageContent[url_start:url_end]
def GetImageList(self, url, gid): """Returns list of images in gallery page content""" UrlList = [] request = Request(self._config) PageContent = request.ReqUrl(url, 'utf-8') p = 0 while True: """iterate through gallery pages""" index_begin = 0 index_end = 0 pics = [] while True: """Find html entries with 'idx=' string indicating image link.""" print(PageContent) index_begin = PageContent.find('idx=', index_end) if (index_begin == -1): break index_end = PageContent.find('"', index_begin) i = index_begin while (PageContent[i] != '"'): i = i - 1 pic = PageContent[i + 1:index_end] if (len(pic) > 1): pics += [ str(self._config[self.configKey()]['baseurl']) + pic ] UrlList += pics np_url = self._NextPage(PageContent, gid, p) if (np_url == 0): break else: url = np_url #print url p += 1 PageContent = request.ReqUrl(url, 'utf-8') return UrlList
def _ListUserFolders(self, ProfileUrl): request = Request(self._config) htmldata = request.ReqUrl(ProfileUrl, 'utf-8') Username = self._GetUsername(htmldata) galleries_searchstring = "/usergallery.php" start = htmldata.find(galleries_searchstring) end = htmldata.find('"', start) GalsUrl = self._config[ self.configKey()]['baseurl'] + htmldata[start:end] htmldata = request.ReqUrl(GalsUrl, 'utf-8') j = 0 k = 0 folderid = '' Folders = [] while (folderid != "folderid=-1"): j = htmldata.find("folderid=", k) k = htmldata.find('"', j) folderid = htmldata[j:k] J = j l = -1 while (l == -1): J = J - 10 l = htmldata[J:k].find("https:") FolderUrl = htmldata[l + J:k] if (folderid != "folderid=0" and len(folderid) > 0): n = htmldata.find(">", k) m = htmldata.find("<", n) FolderName = htmldata[n + 1:m] Folders += [[FolderName, FolderUrl]] return Username, Folders
def DownloadImage(config,url,Dir,attempts=3): success = False r = Request(config) for k in range(attempts): pic = r.ReqUrl(url) if pic is not None and pic!=[]: success = True break else: sleep(1) if success: """create output directory if it doesn't exist""" if not path.exists(Dir): makedirs(Dir) i=0 while True: _i = i i = url.find('/',i+1) if (i==-1): break fname = url[_i+1:] _path = Dir+'/'+fname while True: """check if file already exists, in which case progressing numbers are added to the new filename to avoid overwriting of older files.""" pic_num = 0 if not pic: return False if not path.exists(_path): f = open(_path,'wb') f.write(pic) f.close() break else: pic_num+=1 k = fname.find('.') fn1 = fname[:k] ext = fname[k:] fname = fn1+str(pic_num)+ext _path = Dir+'/'+fname return True else: return False
def _ListFolderGalleries(self, FolderUrl): request = Request(self._config) htmldata = request.ReqUrl(FolderUrl, 'utf-8') UserName = self._GetUsername(htmldata) FolderName = self._GetFolderName(htmldata) Galleries = [] j = 0 k = 0 while True: j = htmldata.find("/gallery/", k) if (j == -1): break k = htmldata.find('"', j) GalleryUrl = self._config[ self.configKey()]['baseurl'] + htmldata[j:k] if not (GalleryUrl in Galleries): Galleries += [GalleryUrl] return RemoveBlank(UserName), RemoveBlank(FolderName), Galleries
def OpenGallery(self, Gal_Url, urltype, encoding='utf-8', proto='https://'): """Get gallery title and list of image URLs""" url = Gal_Url gid = self.GetGalleryId(url, urltype) request = Request(self._config) PageContent = request.ReqUrl(url, encoding, proto) ### read gallery title from html content GalTitle = self.GetGalleryTitle(PageContent) ### get image list from gallery page UrlList = self.GetImageList(url, gid) return GalTitle, UrlList
def GetImageList(self, url, gid): """Returns list of images in gallery page content""" UrlList = [] request = Request(self._config) PageContent = request.ReqUrl(url, 'utf-8') # print(PageContent) index_end = PageContent.find('class=\'image\'') while True: """Find html entries with 'idx=' string indicating image link.""" index_begin = PageContent.find('href="', index_end) if (index_begin == -1): break index_begin += 6 index_end = PageContent.find('"', index_begin) pic = PageContent[index_begin:index_end] if (len(pic) > 1) and pic.startswith('http'): UrlList.append(pic) return UrlList
def GetImageList(self, url, gid): """Returns list of images in xhamster gallery""" UrlList = [] request = Request(self._config) PageContent = request.ReqUrl(url, 'utf-8') imglink_begin = 0 imglink_end = 0 pics = [] while True: """find location of image page link""" index_link = PageContent.find("photo-container photo-thumb", imglink_end) if (index_link == -1): break imglink_begin = PageContent.find("href=", index_link) + 6 imglink_end = PageContent.find("\"", imglink_begin) pic = PageContent[imglink_begin:imglink_end] if (len(pic) > 1): pics += [pic] UrlList += pics return UrlList
def enqueue(self, queue, url, urltype): request = Request(self._config) PageContent = request.ReqUrl(url, 'CP1251', proto="http://") """iterate through gallery pages""" index_begin = 0 index_end = 0 while True: """Find html entries with 'idx=' string indicating image link.""" index_begin = PageContent.find('<div class="galleryThumb">', index_end) if (index_begin == -1): break index_begin = PageContent.find('<a href="', index_begin) if (index_begin == -1): break index_end = PageContent.find('" onmouseout="', index_begin) i = index_end - 1 while (PageContent[i] != '"'): i = i - 1 pic = PageContent[i + 1:index_end] if (len(pic) > 1): super().enqueue( queue, self._config[self.configKey()]['baseurl'] + pic, 1)