def getAppCharts(country, type): chartUrl = getChartUrl(country, type) chartFile = getChartFile(country, type) prefix = getApptUrlPrefix(country) section = getChartPageSection(country) date = datetime.now().strftime('%Y-%m-%d') f = open(DATA_DIR + "/" + chartFile + "_" + date, 'wb') mainPage = common.getPageAsSoup(chartUrl) total = 0 appGrid = mainPage.find('section', {'class': section}) i = 0 for aDiv in appGrid.findAll('a', href=re.compile('^' + prefix)): if i == 0: i += 1 elif i == 1: i += 1 continue elif i == 2: i = 0 continue # print(aDiv) appUrl = aDiv.get('href') img = aDiv.find('img') title = img.get('alt') iconUrl = img.get('src') print(title, '\n', appUrl, '\n', iconUrl) str_out = title + '\n' + iconUrl + '\n' + appUrl + '\n' bytes_out = str_out.encode('utf-8') f.write(bytes_out) f.close()
def getPopAppsInCategory(categoryUrl): #url = categoryUrl + "&page=" + str( start_idx ) #print( url ) categoryPage = common.getPageAsSoup(categoryUrl) #allAppLinks = [aDiv.get('href') for aDiv in # categoryPage.findAll('a', href=re.compile('^https://itunes.apple.com/us/app'))] for aDiv in categoryPage.findAll('a', href=re.compile('^https://itunes.apple.com/us/app')): appLink = aDiv.get('href') text = aDiv.string print(appLink, text)
def getAppInCategoryWithLetter(categoryUrl, f): previous_apps = [] start_idx = 1 while True: url = categoryUrl + "&page=" + str(start_idx) #print( url ) categoryPage = common.getPageAsSoup(url) allAppLinks = [aDiv.get('href') for aDiv in categoryPage.findAll('a', href=re.compile('^https://itunes.apple.com/us/app'))] if allAppLinks == previous_apps: break for appLink in allAppLinks: print(appLink) f.write(appLink + '\n') previous_apps = allAppLinks start_idx += 1 time.sleep(0.5)
def getAllCategories(dump): tmpCatFile = DATA_DIR + '/app_cat_tmp' tmpf = open(tmpCatFile, 'w') genreUrl = 'https://itunes.apple.com/us/genre/ios/id36?mt=8' mainPage = common.getPageAsSoup(genreUrl) allCategories = [] total = 0 for column in ['list column first', 'list column', 'list column last']: columnDiv = mainPage.find('ul', {'class': column}) #print(columnDiv) for aDiv in columnDiv.findAll('a', href=re.compile('^https://itunes.apple.com/us/genre')): #print(aDiv) catUrl = aDiv.get('href') #title = aDiv.get('title') text = aDiv.string print(catUrl, text) if (text != "Games") & (text != "Newsstand"): tmpf.write(catUrl + ', ' + text + '\n') total += 1 print("Total Categories: ", total) tmpf.close() catFile = DATA_DIR + '/' + DATA_APP_CAT_FILE if os.path.exists(catFile): if filecmp.cmp(tmpCatFile, catFile): print("No update for app_cat.") else: print("app_cat updated.") shutil.copyfile(tmpCatFile, catFile) else: print("app_cat updated.") shutil.copyfile(tmpCatFile, catFile) os.remove(tmpCatFile)