def _read_accessories_table(table: Tag): rows = iter(table.select('tbody > tr')) # Skip header next(rows) for row in rows: # Columns are: Name, Attack, Evasion, Element, Status, CT, Cmb, Mtl, Value, Licence name_cell = row.find(name='th', attrs={'class': 'b'}) *_unused, price_cell, _licence = row.find_all(name='td') # The name can either be a string or a tag (usually <a>). first_name_part = name_cell.contents[0] if isinstance(first_name_part, str): name = first_name_part.strip() else: name = first_name_part.text.strip() price = get_price(price_cell.text.strip()) # TODO I need to add the rest of the stats. item = { 'id': make_id(name), 'name': name, 'price': price, } yield item # Some cases span multiple rows, but we only want the first, so skip them. rowspan = int(name_cell.attrs.get('rowspan', '1')) for _ in range(rowspan - 1): next(rows)
def _read_items_table(table: Tag): rows = iter(table.select('tbody > tr')) # Skip header next(rows) for row in rows: # Columns are: Name, Description, Value name_cell = row.find(name='th', attrs={'class': 'b'}) description_cell, price_cell, *_ = row.find_all(name='td') name = name_cell.text.strip() description = description_cell.text.strip() price = get_price(price_cell.text.strip()) item = { 'id': make_id(name), 'name': name, 'description': description, 'price': price, } yield item # Some cases span multiple rows, but we only want the first, so skip them. rowspan = int(name_cell.attrs.get('rowspan', '1')) for _ in range(rowspan - 1): next(rows)
def _read_items_table(table: Tag): rows = iter(table.select('tbody > tr')) # Skip header next(rows) for row in rows: # Two rows, read in pairs: # 1st: Columns are: Name, Action, Location # 2nd: In-game description name_cell = row.find(name='th', attrs={'class': 'b'}) action_cell, location_cell = row.find_all(name='td') description_cell = next(rows).find('td') name = name_cell.text.strip() action = action_cell.text.strip() location = location_cell.text.strip() description = description_cell.text.strip() item = { 'id': make_id(name), 'name': name, 'action': action, 'location': location, 'description': description, } yield item
def main(args): with xlrd.open_workbook(args.path_to_file) as wb: sh = wb.sheet_by_index(0) rows = sh.get_rows() # Skip header next(rows) raw_recipes = (_get_row(r) for r in rows) recipes = [] items = {} for raw in raw_recipes: name, result, item, quantity, price, difference = raw item = parse_items(item) item[0]['quantity'] = int(quantity) name_without_star = name.replace('*', '') recipe = { '_id': make_id(name_without_star), 'name': name_without_star or None, 'result': parse_items(result), 'repeatable': name.endswith('*'), 'items': item, 'cost': parse_price(price) } for i in recipe['items'] + recipe['result']: if i is None: continue items.setdefault(i['item']['_id'], i['item']) if recipe['name'] is None: # 'Continuation' of previous recipe. Append items. recipes[-1]['result'].extend(recipe['result']) recipes[-1]['items'].extend(recipe['items']) else: # End of recipe. recipes.append(recipe) with open(path.join(args.output_file, 'recipes.json'), 'w') as json_out: # json_out.write('export default ') data = {'version': 1, '_cache': recipes} json.dump(data, json_out, ensure_ascii=False, indent=4) with open(path.join(args.output_file, 'items.json'), 'w') as json_out: # json_out.write('export default ') data = {'version': 1, '_cache': list(items.values())} json.dump(data, json_out, ensure_ascii=False, indent=4)
def _get_item(item_cell, amount_cell): name = item_cell.text.strip() return { 'item': { 'id': make_id(name), 'name': name, }, 'amount': int(amount_cell.text.strip()), }
def to_item_and_amount(item_and_amount: List[str]): if len(item_and_amount) == 2: item, amount = item_and_amount else: item = item_and_amount[0] amount = '1' if not item: return None item_name = item.strip() return { 'item': { '_id': make_id(item_name), 'name': item_name, 'type': 'loot' }, 'quantity': int(amount.strip()) }
async def get_bazaar(): async with aiohttp.ClientSession() as session: async with session.get(url) as response: if response.status != OK: raise RuntimeError("Could not get bazaar page.") text = await response.text() soup = BeautifulSoup(text, features="html.parser") # Find the heading with our desired name. heading = soup.find(name='span', attrs={'id': 'Zodiac_versions'}) # The table follows it. table = heading.find_next('table', attrs={'class': 'full-width FFXII article-table'}) bazaar = [] rows = iter(table.select('tbody > tr')) # Skip header next(rows) groups = [] for row in rows: current_group = [] # First row may contain a rowspan. If that's the case, # the next n - 1 rows will contain the rest of the items. # I will need to iterate into those and group them. name_cell = row.find(name='th', attrs={'class': 'b'}) rowspan = int(name_cell.attrs.get('rowspan', '1')) # Append the first row. current_group.append(row) # And then any following rows until the group is filled. while len(current_group) < rowspan: row = next(rows) current_group.append(row) groups.append(current_group) for group in groups: name_row, *item_rows = group # Columns are: Name (Spans), Result with amount (Spans), Item, Amount, Price (Spans), Diff (Spans) name_cell = name_row.find(name='th', attrs={'class': 'b'}) result_cell, item_cell, amount_cell, price_cell, diff_cell = name_row.find_all(name='td') name = name_cell.text.strip() result_text = result_cell.text.strip() items = [_get_item(item_cell, amount_cell)] price = price_cell.text.strip() diff = diff_cell.text.strip() for item_row in item_rows: item_cell, amount_cell = item_row.find_all(name='td') items.append(_get_item(item_cell, amount_cell)) repeatable = name[-1] == '*' if repeatable: name = name.rstrip('*') result = process_result(result_text) # There's two 'Magick Shard' recipes. One for Holy Motes, and other for Scathe Motes. # Treat them differently here. if name == 'Magick Shard': if any(r['item']['id'] == 'holyMote' for r in result): name += ' [Holy Mote]' if any(r['item']['id'] == 'scatheMote' for r in result): name += ' [Scathe Mote]' recipe = { 'id': make_id(name), 'name': name, 'result': result, 'items': items, 'price': get_price(price), 'diff': get_price(diff), 'repeatable': repeatable } bazaar.append(recipe) return bazaar
async def get_loot(): if _cache_file.exists(): log.debug("Reading from cache file=%r", _cache_file) with _cache_file.open('r') as cache_reader: return json.load(cache_reader) item_order = _get_item_order() quest_items = _get_quest_items() log.debug("Reading from url=%r", url) async with aiohttp.ClientSession() as session: async with session.get(url) as response: if response.status != OK: raise RuntimeError("Could not get loot.") text = await response.text() soup = BeautifulSoup(text, features="html.parser") tables = soup.select('table.full-width.article-table.FFXII') items = [] for table in tables: rows = table.select('tbody > tr') # Skip header row. rows = iter(rows) next(rows) item_row_groups = [] current_row_group = [] for row in rows: # Each group of 3 rows is an item. if len(current_row_group) == 3: item_row_groups.append(current_row_group) current_row_group = [] current_row_group.append(row) # Catch any last row. if len(current_row_group) == 3: item_row_groups.append(current_row_group) for row_group in item_row_groups: if len(row_group) != 3: print("Cannot process", row_group) continue # First row contains 6 columns: Name, Price, Drop, Monograph drop, Steal, Poach, Reward(s) # Second row is uses (we don't use it) # Third row is a description stats_row, uses_row, description_row = row_group name = stats_row.find(name='th', attrs={'class': 'b'}).text.strip() stats_columns = list(stats_row.find_all(name='td')) if len(stats_columns) != 6: print('Cannot process', stats_columns) continue price = get_price(stats_columns[0].text.strip()) drop = _get_list(stats_columns[1]) monograph = _get_list(stats_columns[2]) steal = _get_list(stats_columns[3]) poach = _get_list(stats_columns[4]) reward = get_list(stats_columns[5].text.strip(), sep='\n') index = None description = description_row.find('td', attrs={ 'colspan': '6' }).text.strip() if name not in item_order: log.debug("%r has no order set", name) else: index = item_order.pop(name) item = { 'id': make_id(name), 'name': name, 'price': price, 'drop': drop, 'monograph': monograph, 'steal': steal, 'poach': poach, 'reward': reward, 'description': description, 'quest_item': name in quest_items, } if index is not None: item['index'] = index items.append(item) for missing_name in item_order: log.warning("%r was not in the input list", missing_name) return items