def parse_alexa_rank(): docs = db.articles.objects(canonical_link__nin=[None, '']).no_cache().timeout(False) #.limit(3000) urls = [] docs_pool = {} scrp = scraper.scraper() cnt = {'success': 0, 'fail': 0, 'skip': 0, 'total': len(urls)} for idx, r in enumerate(docs): if not scrp.validate_url(r.canonical_link): cnt['skip'] += 1 continue # url = urllib.parse.quote(r.canonical_link.replace("\'", '')) url = r.canonical_link urls.append(url) docs_pool[url] = r if len(urls) >= 1000: scores = scrp.batch_get_alexa_rank(urls) # print(scores) for s in scores: u = s['url'] #urllib.parse.quote(s['url']) doc = docs_pool.get(u) if not doc: print("[Error] can't find doc url="+u) cnt['fail'] += 1 continue else: doc.update( alexa_rank = s['rank'], alexa_delta = s['delta'], alexa_loc_name = s['loc_name'], alexa_loc_rank = s['loc_rank'], ) cnt['success'] += 1 print(idx, cnt, datetime.now()) urls = [] docs_pool = {}
def searchNew(self): control.busy() t = control.lang(32010).encode('utf-8') k = control.keyboard('', t) k.doModal() q = k.getText() if (q == None or q == ''): return try: from sqlite3 import dbapi2 as database except: from pysqlite2 import dbapi2 as database dbcon = database.connect(control.searchFile) dbcur = dbcon.cursor() dbcur.execute("INSERT INTO search VALUES (?,?)", (None, q)) dbcon.commit() dbcur.close() list = cache.get(scraper().search, 24, q) self.list_builder(list) control.idle() self.createDirectory(sort=False)
def genre(self, page, subpage, genres, type): genre_ids = [58,69,57,59,84,86,60,79,77,93,89,82,71,66,95,88,75,85,83, 90,63,94,72,73,67,87,78,61,70,91,92,64,96,68,62,65,76,80,74,81,98,97] genre_titles = ['Action', 'Adventure', 'Cars', 'Comedy', 'Dementia', 'Demons', 'Drama', 'Ecchi', 'Fantasy', 'Game', 'Harem', 'Historical', 'Horror', 'Josei', 'Kids', 'Magic', 'Martial Arts', 'Mecha', 'Military', 'Music', 'Mystery', 'Parody', 'Police', 'Psychological', 'Romance', 'Samurai', 'School', 'Sci-Fi', 'Seinen', 'Shoujo', 'Shoujo Ai', 'Shounen', 'Shounen Ai', 'Slice of Life', 'Space', 'Sports', 'Super Power', 'Supernatural', 'Thriller', 'Vampire', 'Yaoi', 'Yuri'] if genres is None: genres = xbmcgui.Dialog().multiselect("Genre", genre_titles) else: genres = json.loads(genres) list = [] for i in genres: list.append(genre_ids[int(i)]) list = cache.get(scraper().genreScrape, 24, list, page, subpage, type) subpage, page = self.subpagination(subpage, page) self.list_builder(list) self.addDirectoryItem('Next', 'genreSearch', page=page, genres=genres, subpage=subpage, type=type) self.createDirectory(sort=False)
def searchOld(self, q): list = cache.get(scraper().search, 24, q) self.list_builder(list) self.createDirectory(sort=False) return
def episodeList(self, url, slug): list = cache.get(scraper().episodeList, 24, url) for item in list: self.addDirectoryItem(item['meta']['title'], "playItem", url=item['url'], type=item['type'], slug=slug, is_folder=False, playable=True, meta=item['meta'], art=item['art']) self.createDirectory(sort=False)
def playItem(self, slug, url): control.busy() resolve_dialog = xbmcgui.DialogProgress() link_list = cache.get(scraper().scrapeLinks, 24, slug, url) control.idle() if len(link_list) == 0: dialog = xbmcgui.Dialog() dialog.notification('Anime Incursion', 'No Links Available', xbmcgui.NOTIFICATION_INFO, 5000) else: resolve_dialog.create('Anime Incursion', '') resolve_dialog.update(0) link_list = sorted(link_list, key=lambda x: (x['quality']), reverse=True) link_total = len(link_list) progress = 0 path = '' for i in link_list: # if resolve_dialog.iscanceled() == True: # return progress += 1 resolve_dialog.update( int((100 / float(link_total)) * progress), str(progress) + ' | [B]Host: ' + i['name'].upper() + "[/B] | [B]Resolution: " + str(i['quality']) + "p[/B]") try: if i['direct'] == False: import resolveurl path = resolveurl.resolve(i['url']).encode('utf-8') break else: path = i['url'] break except: continue if path != '': play_item = xbmcgui.ListItem(path=path) print('INFO - ' + str(sys.argv[1])) xbmcplugin.setResolvedUrl(int(sys.argv[1]), True, listitem=play_item) else: dialog = xbmcgui.Dialog() dialog.notification('Anime Incursion', 'Unable to Resolve Links', xbmcgui.NOTIFICATION_INFO, 5000)
def showList(self, page, type, subpage, order='score_desc'): list = cache.get(scraper().filterScrape, 24, page, type, order, subpage) subpage, page = self.subpagination(subpage, page) self.list_builder(list) self.addDirectoryItem('Next', 'showList', page=page, type=type, order=order, subpage=subpage) self.createDirectory(sort=False)
def parse_social_cnts(): docs = db.articles.objects(canonical_link__nin=[None, '']).no_cache().timeout(False) #.limit(3000) # print("[parse_social_cnts] %i to go~ @ %s" % (docs.count(), datetime.now())) urls = [] docs_pool = {} scrp = scraper.scraper() cnt = {'success': 0, 'fail': 0, 'skip': 0, 'total': len(urls)} for idx, r in enumerate(docs): if not scrp.validate_url(r.canonical_link): cnt['skip'] += 1 continue url = urllib.parse.quote(r.canonical_link.replace("\'", '')) urls.append(url) docs_pool[url] = r if len(urls) >= 1000: scores = scrp.batch_get_fb_info(urls) # print(scores) if not scores: # retry time.sleep(60) scores = scrp.batch_get_fb_info(urls) for s in scores: u = urllib.parse.quote(s['url']) doc = docs_pool.get(u) if not doc: print("[Error] can't find doc url="+u) cnt['fail'] += 1 continue else: doc.update( fb_click_count = s['click_count'], fb_comment_count = s['comment_count'], fb_like_count = s['like_count'], fb_share_count = s['share_count'], fb_total_count = s['total_count'], ) cnt['success'] += 1 print(idx, cnt, datetime.now()) urls = [] docs_pool = {}