async def merge_auctions(): # get data if not os.path.exists(utils.SUPER_EQUIP_FILE): _, s_data = await SuperScraper.parse() else: s_data = utils.load_json_with_default(utils.SUPER_EQUIP_FILE, default=False) if not os.path.exists(utils.KEDAMA_EQUIP_FILE): _, k_data = KedamaScraper.parse() else: k_data = utils.load_json_with_default(utils.KEDAMA_EQUIP_FILE, default=False) # merge data merged_data = [] for x in s_data: x['type'] = "super" merged_data.append(x) for x in k_data: x['type'] = "kedama" merged_data.append(x) # dump utils.dump_json(merged_data, utils.AUCTION_FILE) return merged_data
async def merge_items(): # get data if not os.path.exists(utils.SUPER_ITEM_FILE): _, s_data = await SuperScraper.parse() else: s_data = utils.load_json_with_default(utils.SUPER_ITEM_FILE, default=False) if not os.path.exists(utils.KEDAMA_ITEM_FILE): _, k_data = KedamaScraper.parse() else: k_data = utils.load_json_with_default(utils.KEDAMA_ITEM_FILE, default=False) if not os.path.exists(utils.MARKET_ITEM_FILE): _, h_data = await MarketScraper.scrape() else: h_data = utils.load_json_with_default(utils.MARKET_ITEM_FILE, default=False) # merge data merged_data = [] for x, y in [('super', s_data), ('kedama', k_data)]: for z in y: z['type'] = x merged_data.append(z) for x in h_data: h_data[x]['type'] = "market" merged_data.append(h_data[x]) # dump utils.dump_json(merged_data, utils.ITEM_FILE) return merged_data
async def parse(cls): # inits CACHE = utils.load_json_with_default(utils.SUPER_CACHE_FILE, default=cls.DEFAULT_CACHE) EQUIP_DATA = utils.load_json_with_default(utils.SUPER_EQUIP_FILE, default=[]) ITEM_DATA = utils.load_json_with_default(utils.SUPER_ITEM_FILE, default=[]) for x in ['files', 'equips', 'items']: CACHE['parsed'][x] = set(CACHE['parsed'][x]) # scan auctions for file in glob.glob(utils.SUPER_HTML_DIR + "*.html"): auc_name = os.path.basename(file).replace(".html", "") # skip ones already cached if auc_name in CACHE['parsed']: continue # get data result = cls._parse_page(open(file, encoding='utf-8').read()) items = result['items'] equips = result['equips'] fails = [f"{auc_name} - {y}" for y in result['fails']] # add in dates / auction numbers if auc_name not in CACHE['seen']: await cls.scrape() # cache probably got deleted for x in items + equips: tmp = auc_name.replace("itemlist", "") x['auction_number'] = CACHE['num_map'][tmp] x['time'] = CACHE['time_map'][tmp] x['thread'] = cls.THREAD_BASE_LINK + tmp x['id'] = f"{x['auction_number']}_{x['id']}" for x in items: if x['id'] not in CACHE['parsed']['items']: ITEM_DATA.append(x) CACHE['parsed']['items'].add(x['id']) for x in equips: if x['id'] not in CACHE['parsed']['equips']: EQUIP_DATA.append(x) CACHE['parsed']['equips'].add(x['id']) CACHE['fails'] += fails CACHE['parsed']['files'].add(auc_name) utils.dump_json(ITEM_DATA, utils.SUPER_ITEM_FILE) utils.dump_json(EQUIP_DATA, utils.SUPER_EQUIP_FILE) for x in ['files', 'equips', 'items']: CACHE['parsed'][x] = list(CACHE['parsed'][x]) utils.dump_json(CACHE, utils.SUPER_CACHE_FILE) return ITEM_DATA, EQUIP_DATA
async def get_series_data(self, update, session=None): # inits name = update['series'] link = update['series_link'] DATA = utils.load_json_with_default(utils.SERIES_CACHE, {}) CONFIG = utils.load_bot_config() # refresh data if new series or not updated in a while if name in DATA: last_check = time.time() - DATA[name].get('last_checked', 0) if name not in DATA or last_check > CONFIG[ 'series_refresh_rate'] * 24 * 60 * 60: # get data html = await get_html(link, session) soup = BeautifulSoup(html, 'html.parser') s_data = self.parse_series_page(soup, update) s_data['link'] = link # cache DATA[name] = s_data DATA[name]['last_checked'] = time.time() utils.dump_json(DATA, utils.SERIES_CACHE) return DATA[name]
async def purge_rr(self): if self.purge_rr.current_loop == 0: return print("Purging rr logs...") for log_file in glob.glob(utils.REACTION_ROLE_LOG_DIR + "*.json"): # load log log = utils.load_json_with_default(log_file, default=False) # mark entries for purging ch_dels, m_dels = [], [] for ch_id in log: # check channel channel = self.bot.get_channel(ch_id) if not channel: ch_dels.append(ch_id) #check message for m_id in log: try: await channel.fetch_message(m_id) except discord.NotFound: m_dels.append((ch_id, m_id)) # do purging for tup in m_dels: del log[tup[0]][tup[1]] for ch in ch_dels: del log[ch] # save log utils.dump_json(log, log_file)
async def get_rr_message(query, ctx, bot): # inits spl = query.strip().split(maxsplit=1) # validity checks if not ctx.guild: raise TemplatedError("no_guild") if not spl: raise TemplatedError("no_rr_message_id") # get log message_id = spl[0] new_query = spl[1] log_file = utils.REACTION_ROLE_LOG_DIR + str(ctx.guild.id) + ".json" log = utils.load_json_with_default(log_file) # get message channel for ch in log: if message_id in log[ch]: msg_ch = bot.get_channel(int(ch)) break else: raise TemplatedError("bad_rr_message_id", string=message_id) # fetch message try: msg = await msg_ch.fetch_message(int(message_id)) return msg, new_query except discord.NotFound: raise TemplatedError("deleted_rr_message", string=message_id) except ValueError: raise TemplatedError("rr_message_not_int", string=message_id)
def edit_rr_log(message, message_dict=None, roles=None, emotes=None): # load log log_file = utils.REACTION_ROLE_LOG_DIR + str(message.guild.id) + ".json" log = utils.load_json_with_default(log_file, default={}) # add default values (includes channel id because cant fetch message without it) ch_id = str(message.channel.id) m_id = str(message.id) if ch_id not in log: log[ch_id] = {} if m_id not in log[ch_id]: log[ch_id][m_id] = dict(message={}, roles=[], emotes=[]) # edit entry entry = log[str(message.channel.id)][str(message.id)] if message_dict is not None: entry['message'] = message_dict if roles is not None: entry['roles'] = [x.id for x in roles] if emotes is not None: entry['emotes'] = [str(x) for x in emotes] # save log utils.dump_json(log, log_file) return entry
async def searchLottoItems(item): import utils from utils.scraper_utils import get_session lst = [] split = item.split() data = utils.load_json_with_default(utils.DATA_DIR + "lotto_data.json", default={ "w": {}, "a": {} }) numW = max([int(x) for x in data["w"].keys()]) if data['w'] else 0 numA = max([int(x) for x in data["a"].keys()]) if data['a'] else 0 if weaponLotteryOutdated(numW) or armorLotteryOutdated(numA): print("OUTDATED", numW, numA) await lotto_dl(get_session()) await lotto_parse() data = utils.load_json_with_default(utils.DATA_DIR + "lotto_data.json", default={ "w": {}, "a": {} }) for type in data: ks = list(data[type].keys()) ks.sort(key=lambda x: -int(x)) for num in ks: ent = data[type][num] if all([x in ent["eq"].lower() for x in split]): date = f"#{num} / {ent['date'][0]} {ent['date'][1]}{ent['date'][2]}" try: winner = ent['winners'][0] except Exception as e: print(num, type, ent) tix = formatPrice(ent["tickets"]) item = ent["eq"] print(date, winner, tix) lst.append([item, winner, tix, date]) return lst
async def scrape(cls): # inits CACHE = utils.load_json_with_default(utils.SUPER_CACHE_FILE, default=cls.DEFAULT_CACHE) async with get_session() as session: # check for new auctions home_html = await get_html(cls.HOME_BASE_LINK, session) home_soup = BeautifulSoup(home_html, 'html.parser') rows = home_soup.find("tbody").find_all("tr") auc_names = [ r.find("a", href=lambda x: x and "itemlist" in x)['href'] for r in rows ] auc_nums = [r.find("td").get_text().zfill(3) for r in rows] auc_dates = [r.find_all("td")[1].get_text() for r in rows] auc_dates = [cls._to_epoch(x) for x in auc_dates] assert len(auc_names) == len(auc_nums) == len(auc_dates) # get uncached pages new_aucs = [] for i in range(len(rows)): if auc_names[i] not in CACHE['seen']: new_aucs.append((auc_nums[i], auc_names[i], auc_dates[i])) # create folder for auction page html if not os.path.exists(utils.SUPER_HTML_DIR): os.makedirs(utils.SUPER_HTML_DIR) # pull uncached pages for num, name, date in new_aucs: out_path = utils.SUPER_HTML_DIR + name + ".html" if not os.path.exists(out_path): await asyncio.sleep(cls.SCRAPE_DELAY) auc_html = await get_html(cls.HOME_BASE_LINK + name, session) if "Auction ended" not in auc_html: continue # ignore ongoing with open(out_path, "w", encoding='utf-8') as f: f.write(auc_html) tmp = name.replace("itemlist", "") CACHE['seen'].append(name) CACHE['num_map'][tmp] = num CACHE['time_map'][tmp] = date # update cache if new_aucs: CACHE['seen'].sort(reverse=True) utils.dump_json(CACHE, utils.SUPER_CACHE_FILE) # true if new auctions found return bool(new_aucs)
def iter_data(): data_dir = utils.DS_ORIG_DIR + series + "/" lst = glob.glob(data_dir + "*") for fp in lst: data = utils.load_json_with_default(fp, default=False) ret = dict(data=data, file_path=fp) if get_src: ret['orig_path'] = get_src(data) yield ret
async def handle_rr(self, payload, typ): # inits ch_id= str(payload.channel_id) m_id= str(payload.message_id) # check for guild if not payload.guild_id: return # load log log_file= utils.REACTION_ROLE_LOG_DIR + str(payload.guild_id) + ".json" if not os.path.exists(log_file): return log= utils.load_json_with_default(log_file, default=False) # check for existence if ch_id not in log: return if m_id not in log[ch_id]: return # get role index try: ind= log[ch_id][m_id]['emotes'].index(str(payload.emoji)) except ValueError: return # get role role_id= log[ch_id][m_id]['roles'][ind] guild= self.bot.get_guild(payload.guild_id) role= guild.get_role(role_id) member= guild.get_member(payload.user_id) # add role to member try: if typ == "add": await member.add_roles(role, reason=f"reaction role") elif typ == "remove": if role in member.roles: await member.remove_roles(role, reason=f"reaction role") else: raise Exception(f"Bad typ passed to handle_rr (expected 'add' or 'remove' but got '{typ}')") except discord.Forbidden: CONFIG= utils.load_yaml(utils.REACTION_CONFIG) template= utils.load_yaml(utils.ERROR_STRING_FILE)[f'no_{typ}_perms_template'] text= utils.render(template,dict(member=member, role=role)) channel= self.bot.get_channel(int(ch_id)) await channel.send(text, delete_after=CONFIG['error_deletion_delay']) return
async def get(self): bot_config = utils.load_bot_config() role_config = utils.load_yaml(utils.MENTIONS_FILE) roles_available = await IPC_CLIENT.request("get_available_roles") series_data = utils.load_json_with_default(utils.SERIES_CACHE, {}) html = Template(filename=TEMPLATE_DIR + "index.html", lookup=LOOKUP).render( GENERAL=bot_config, ROLES=role_config, ROLES_AVAILABLE=roles_available, SERIES_DATA=series_data, ) return Response(html)
def filter_updates(updates): # inits SEEN = utils.load_json_with_default(utils.SEEN_CACHE, []) for x in updates: # check if seen hash = x['title'].replace(" ", "-") if hash in SEEN: continue SEEN.append(hash) # return yield x # clean up SEEN = SEEN[-12345:] utils.dump_json(SEEN, utils.SEEN_CACHE)
def check_update_log(check_name, min_time, exact_day=None): # inits time_check= False day_check=False log= utils.load_json_with_default(utils.UPDATE_LOG) log.setdefault(check_name, 0) # check if right day if datetime.today().weekday() == exact_day or exact_day is None: day_check= True # check if enough time has passed since last check if day_check: now= datetime.now().timestamp() if now - log[check_name] > min_time: time_check= True # dump and return return day_check and time_check
async def filter_updates(self, updates, session=None): # inits ret = [] SEEN = utils.load_json_with_default(utils.SEEN_CACHE, []) BLACKLIST = utils.load_yaml_with_default(utils.BLACKLIST_FILE, default=[]) # updates.sort(key=lambda x: (x['series'], x['chapter_number'])) async for x in updates: # inits series_data = await self.get_series_data(x, session) x['series_data'] = series_data # ignore already seen hash = x['series'] + "_" + str(x['chapter_number']) if hash in SEEN: if self.stop_on_old: return continue SEEN.append(hash) # ignore blacklisted flag = False for y in BLACKLIST: flag = utils.contains_all( to_search=series_data['display_name'], to_find=y) flag |= utils.contains_all(to_search=series_data['group'], to_find=y) if flag: break if flag: continue # return yield x # clean up SEEN = SEEN[-12345:] utils.dump_json(SEEN, utils.SEEN_CACHE)
async def get_all_rr_messsages(query, ctx, bot): messages = [] # check for guild if not ctx.guild: raise TemplatedError("no_guild") # get log log_file = utils.REACTION_ROLE_LOG_DIR + str(ctx.guild.id) + ".json" log = utils.load_json_with_default(log_file) # return existing messages for ch in log: for m in log[ch]: # fetch message try: msg_ch = bot.get_channel(int(ch)) msg = await msg_ch.fetch_message(int(m)) messages.append(msg) except discord.NotFound: pass return messages
async def lotto_parse(): import glob, json, os, utils from bs4 import BeautifulSoup data = utils.load_json_with_default(utils.DATA_DIR + "lotto_data.json", default={ "w": {}, "a": {} }) files = reversed(glob.glob(utils.CACHE_DIR + "lottery/*.html")) for file in files: baseName = os.path.basename(file).replace(".html", "") num = int(baseName[1:]) type = baseName[0] if str(num) in data[type]: continue #print(file) html = open(file, encoding="utf-8").read() #if "do not want" in str(html).lower(): continue soup = BeautifulSoup(html, 'html.parser') left = soup.find("div", {"id": "leftpane"}) date = [ x.text for x in left.find_all("div") if "Grand Prize" in x.text ][0].split() date = [date[3], date[4][:-1][:-2], date[4][:-1][-2:]] eq = left.find("div", {"id": "lottery_eqname"}).text prizePane = [ x for x in left.find_all("div") if "font-family:verdana" in str(x) and "Prize:" in str(x) ] winners = [ x.text.replace("Winner: ", "") for x in prizePane[0].find_all("div") if "Winner:" in x.text ] prizes = [ x.text.split()[2:] for x in prizePane[0].find_all("div") if "Prize:" in x.text ] prizes = [[int(x[0]), " ".join(x[1:])] for x in prizes] right = soup.find("div", {"id": "rightpane"}).find_all("div") tix = [x.text for x in right if "You hold" in x.text] tix = tix[0].split()[4] print(num, date, eq, winners, prizes, tix) if winners: data[type][num] = { "date": date, "eq": eq, "winners": winners, "prizes": prizes, "tickets": tix } with open(utils.DATA_DIR + "lotto_data.json", "w") as file: file.write(json.dumps(data, indent=2))
def _search(item_name, keywords): DATA = utils.load_json_with_default(utils.ITEM_FILE, default=False)
inp= input("...? ") # time.sleep(.1) # prints won't show immediately unless you uncomment this for whatever reason # print text from new process proc= multiprocessing.Process(target=echo, args=(inp,), daemon=True) proc.start() """ import glob, cv2, utils, numpy as np, os, random import multiprocessing import base64, requests, webbrowser, time, ctypes from classes.dynamic_page import DynamicPage series = "knight_run" SEEN_DATA = utils.load_json_with_default("./verified.json") ts = utils.Timestamp() session = requests.session() # leave none if image that contours were created from is unavailable get_src = None def get_src(data): tmp = data['other']['source'] return rf"C:\Programming\KR_Textify\data\chaps\{tmp['chap_num']}/naver/{tmp['page_num']}.png" def iter_data(): data_dir = utils.DS_ORIG_DIR + series + "/" lst = glob.glob(data_dir + "*")
def update_log(check_name): log = utils.load_json_with_default(utils.UPDATE_LOG) log[check_name] = datetime.now().timestamp() utils.dump_json(log, utils.UPDATE_LOG)
async def scrape(cls): # inits DATA = utils.load_json_with_default(utils.MARKET_ITEM_FILE) CACHE = utils.load_json_with_default(utils.MARKET_CACHE_FILE, default=cls.DEFAULT_CACHE) CACHE['invalid'] = set(CACHE['invalid']) target_page_number = 1 target_index = None session = get_session() html = await get_html(cls.BASE_LINK, session) # Loop logic: # 1. add results for current page to data # 2. calculate target_index # 3. check if done # (target_index >= num_results OR target_index >= a pending entry index) # 4. move to page containing target_index # (the target index may shift off-page by the time we visit the page due to new purchases, but doesnt matter, we'll get it eventually) # 5. go to step 1 while True: # step 1 result = cls.get_entries(html, target_page_number) DATA.update(result['entries']) total = result['total'] CACHE['invalid'] |= result['invalid_indices'] # step 2 if target_index is None: target_index = 1 # one-indexed from oldest while str( target_index) in DATA or target_index in CACHE['invalid']: target_index += 1 # step 3 if result['pending_indices'] and target_index >= min( result['pending_indices']): break if target_index >= total: break # step 4 target_page_number = cls.get_target_page(target_index, total) html = await get_html(cls.BASE_LINK + str(target_page_number), session) # be nice to lestion print(f"{(len(DATA.keys()) + len(CACHE['invalid']))} / {total}...", end="") await asyncio.sleep(cls.SCRAPE_DELAY) # intermediate save tmp = copy.deepcopy(CACHE) tmp['invalid'] = list(CACHE['invalid']) tmp['invalid'].sort() utils.dump_json(tmp, utils.MARKET_CACHE_FILE) utils.dump_json(DATA, utils.MARKET_ITEM_FILE) # final save CACHE['invalid'] = list(CACHE['invalid']) CACHE['invalid'].sort() utils.dump_json(CACHE, utils.MARKET_CACHE_FILE) utils.dump_json(DATA, utils.MARKET_ITEM_FILE)
def __init__(self): self.RANGES= utils.load_json_with_default(utils.RANGES_FILE, default=False)
def reload_ranges(self): self.RANGES= utils.load_json_with_default(utils.RANGES_FILE, default=False)
async def searchLottoWinners(winner): import utils from utils.scraper_utils import get_session lst = [] stats = { "Equips": [0, 0], "Chaos Token": [0, 0], "Chaos Tokens": [0, 0], "Golden Lottery Ticket": [0, 0], "Golden Lottery Tickets": [0, 0], "Caffeinated Candy": [0, 0], "Caffeinated Candies": [0, 0] } data = utils.load_json_with_default(utils.DATA_DIR + "lotto_data.json", default={ "w": {}, "a": {} }) numW = max([int(x) for x in data["w"].keys()]) if data['w'] else 0 numA = max([int(x) for x in data["a"].keys()]) if data['a'] else 0 if weaponLotteryOutdated(numW) or armorLotteryOutdated(numA): print("OUTDATED", numW, numA) await lotto_dl(get_session()) await lotto_parse() data = utils.load_json_with_default(utils.DATA_DIR + "lotto_data.json", default={ "w": {}, "a": {} }) for type in data: ks = list(data[type].keys()) ks.sort(key=lambda x: -int(x)) for num in ks: ent = data[type][num] if winner.lower() in [x.lower() for x in ent["winners"]]: date = f"#{num} / {ent['date'][0]} {ent['date'][1]}{ent['date'][2]}" lower = [x.lower() for x in ent['winners']] place = lower.index(winner) if place == 0: prize = ent["eq"].replace("Peerless ", "") stats["Equips"][0] += 1 stats["Equips"][1] += 1 gp = "" else: n = ent["prizes"][place - 1][0] nm = ent["prizes"][place - 1][1] prize = str(n) + " " + nm stats[nm][0] += n stats[nm][1] += 1 gp = ent["eq"].replace("Peerless ", "") tix = formatPrice(ent["tickets"]) print(date, prize, tix) lst.append([prize, gp, tix, date]) merge = lambda x, y: [str(x[0] + y[0]), str(x[1] + y[1])] stats["Chaos Tokens"] = merge(stats["Chaos Tokens"], stats["Chaos Token"]) del stats["Chaos Token"] stats["Golden Lottery Tickets"] = merge(stats["Golden Lottery Tickets"], stats["Golden Lottery Ticket"]) del stats["Golden Lottery Ticket"] stats["Caffeinated Candies"] = merge(stats["Caffeinated Candy"], stats["Caffeinated Candies"]) del stats["Caffeinated Candy"] stats["Equips"][0] = str(stats["Equips"][0]) stats["Equips"][1] = str(stats["Equips"][1]) return lst, stats