def get_app_info(self, uri): res = self.br.open(uri) data = res.get_data() soup = BeautifulSoup(data, "html5lib") info = AppInfo() info.name = soup.find(id="desktopContentBlockId").h1.text info.category = "" info.version = "" info.size = "" info.updated = "" info.price = "" info.os = "iOS" developer = soup.find(id="desktopContentBlockId").h2.text info.developer = developer.split(":")[1] if ":" in developer else developer info.language = "" desc = soup.find('div', attrs={"class" : "product-review"}).p.prettify() info.description = desc artwork = soup.find(id="left-stack").div.img["src"] info.artwork = artwork div_images = soup.find('div', attrs={"class" : "screenshots"}) images = div_images.find_all('img') info.images = [img["src"] for img in images] info.debug() return info
def get_app_info(self, uri): res = self.br.open(uri) data = res.get_data() soup = BeautifulSoup(data, "html5lib", from_encoding="utf-8") info = AppInfo() # info.name = soup.find('h1', attrs={"class" : "doc-banner-title"}).text info.name = soup.find('div', attrs={"class" : "document-title"}).text info.category = "" info.version = "" info.size = "" info.updated = "" info.price = "" info.os = "Android" # info.developer = soup.find('a', attrs={"class" : "doc-header-link"}).text info.developer = soup.find('a', attrs={"class" : "document-subtitle"}).text info.language = "" # desc = soup.find(id="doc-original-text").prettify() desc = soup.find('div', attrs={"class" : "show-more-content"}).prettify() info.description = desc # artwork = soup.find('div', attrs={"class" : "doc-banner-icon"}).img["src"] artwork = soup.find('div', attrs={"class" : "cover-container"}).img["src"] info.artwork = artwork # div_images = soup.find('div', attrs={"class" : "screenshot-carousel-content-container"}) div_images = soup.find('div', attrs={"class" : "thumbnails"}) images = div_images.find_all('img') info.images = [img["src"] for img in images] info.debug() return info
def get_app_info(self, uri): res = self.br.open(uri) data = res.get_data() soup = BeautifulSoup(data, "html5lib") info = AppInfo() div_viewbox = soup.find('div', attrs={"class" : "viewbox"}) info.name = div_viewbox.h2.renderContents() info.category = "" info.version = "" info.size = "" info.updated = "" info.price = "" info.os = "iOS" info.developer = "" info.language = "" desc = soup.find('div', attrs={"class" : "content"}).prettify() info.description = desc artwork = soup.find('div', attrs={"class" : "picview"}).img["src"] info.artwork = artwork div_images = soup.find('div', attrs={"class" : "content"}) images = div_images.find_all('img') info.images = [img["src"] for img in images] info.debug() return info
def get_wp_app_info(self, uri): res = self.br.open(uri) data = res.get_data() soup = BeautifulSoup(data, "html5lib") info = AppInfo() div_viewbox = soup.find('div', attrs={"class" : "info"}) info.name = div_viewbox.h1.text.strip() info.category = "" info.version = "" info.size = "" info.updated = "" info.price = "" info.os = "iOS" info.developer = "" info.language = "" desc = soup.find(id="discription").prettify() # not description info.description = desc artwork = soup.find('div', attrs={"class" : "deinfo"}).img["src"] info.artwork = self.get_url(uri, artwork) div_images = soup.find('div', attrs={"class" : "screenshots-container"}) images = div_images.find_all('img') info.images = [self.get_url(uri, img["src"]) for img in images] info.debug() return info
def get_android_app_info(self, uri): res = self.br.open(uri) data = res.get_data() soup = BeautifulSoup(data, "html5lib") info = AppInfo() div_viewbox = soup.find('div', attrs={"class" : "info_title"}) info.name = div_viewbox.text info.category = "" info.version = "" info.size = "" info.updated = "" info.price = "" info.os = "Android" info.developer = "" info.language = "" desc = soup.find('div', attrs={"class" : "rom_introductioncon yingyong_intro"}).prettify() info.description = desc artwork = soup.find('div', attrs={"class" : "yingyong_img"}).img["src"] info.artwork = artwork div_images = soup.find('div', attrs={"class" : "snapshot_list"}) images = div_images.find_all('img') info.images = [img["src"] for img in images] info.debug() return info
def get_ios_app_info(self, uri): res = self.br.open(uri) data = res.get_data() soup = BeautifulSoup(data, "html5lib") info = AppInfo() info.name = soup.find(id="appTitle").text.strip() info.category = "" info.version = "" info.size = "" info.updated = "" info.price = "" info.os = "iOS" info.developer = "" info.language = "" desc = soup.find(id="tab2Content").prettify() info.description = desc artwork = soup.find('img', attrs={"class" : "img175"})["src"] info.artwork = self.get_url(uri, artwork) div_images = soup.find('div', attrs={"class" : "yyxq_yyjt"}) images = div_images.find_all('img') info.images = [self.get_url(uri, img["src"]) for img in images] info.debug() return info
def get_app_info(self, uri): res = self.br.open(uri) data = res.get_data() soup = BeautifulSoup(data, "html5lib") info = AppInfo() div_viewbox = soup.find(id="application") info.name = div_viewbox.h1.renderContents() info.category = "" info.version = "" info.size = "" info.updated = "" info.price = "" info.os = "Windows Phone" info.developer = "" info.language = "" desc = soup.find(id="appDetails").pre.prettify() info.description = desc artwork = soup.find(id="appSummary").img["src"] info.artwork = artwork div_images = soup.find(id="screenshots") images = div_images.find_all('a') info.images = [img["href"] for img in images] info.debug() return info
def new_app_info(self): info = AppInfo() info.name = "Motion Math Zoom" info.category = "Education" info.version = "2.1" info.size = "19.1MB" info.updated = "2011-12-30" info.price = "Free" info.developer = "Motion Math" info.language = "English" info.os = "iOS" info.description = "<p>An animal adventure through the world of numbers! Give your child a chance to play with numbers - they'll have a blast zooming through the number line as they master place value.</p>" info.artwork = "https://lh3.ggpht.com/LDtlOWSOvxXBDhx4XqcBBGkANPlusko8WgB80n62bpnyTUgz34gl9OlOpmflckVNBA=h230" return info