def analyze_weapon_page(pq_obj, weapon_type_name): # 先抓出所有table.ti,数据都在这3个表格里 table_t1_group = pq_obj('table.t1') if weapon_type_name == '狩猎笛': if len(table_t1_group) != 4: log('table.ti长度不为4,请检查') return False temp_weapon_data = OrderedDict() # 第一个 table.t1 是该派生各级别数据(攻击,槽,斩味) analyze_weapon_page_base_info(table_t1_group.eq(0), temp_weapon_data, weapon_type_name) # 第二个 table.t1 是强化素材表查询 analyze_weapon_page_upgrade_info(table_t1_group.eq(2), temp_weapon_data) # 第三个 table.t1 是入手方法查询 analyze_weapon_page_craft_info(table_t1_group.eq(3), temp_weapon_data) else: if len(table_t1_group) != 3: log('table.ti长度不为3,请检查') return False temp_weapon_data = OrderedDict() # 第一个 table.t1 是该派生各级别数据(攻击,槽,斩味) analyze_weapon_page_base_info(table_t1_group.eq(0), temp_weapon_data, weapon_type_name) # 第二个 table.t1 是强化素材表查询 analyze_weapon_page_upgrade_info(table_t1_group.eq(1), temp_weapon_data) # 第三个 table.t1 是入手方法查询 analyze_weapon_page_craft_info(table_t1_group.eq(2), temp_weapon_data) return temp_weapon_data
def generate_weapon_excel(weapon_all_data): k_max = 6 m_num_max = 6 # 生成excel数据 for weapon_type, weapon_type_data in weapon_all_data.items(): headers = OrderedDict() contents = OrderedDict() # headers[weapon_type] = ['_index'] headers[weapon_type] = [ 'name_jp', 'name_chs', 'derive', 'derive_index', 'pos' ] contents[weapon_type] = OrderedDict() total_index = 0 for weapon_name, weapon_data in weapon_type_data.items(): # 整理数据 total_index += 1 # weapon_data['_index'] = total_index # # 拆分斩味数据 # for i in range(k_max): # weapon_data['斩味+%d' % i] = weapon_data['斩味'][i] if i < len(weapon_data['斩味']) else [] # del weapon_data['斩味'] # # # 拆分素材数据 # for m_type in ['强化', '生产']: # if '%s素材' % m_type in weapon_data: # for i in range(m_num_max): # weapon_data['%s素材%d' % (m_type, i + 1)] = weapon_data['%s素材' % m_type][i]['素材名'] if i < len( # weapon_data['%s素材' % m_type]) else '' # weapon_data['%s素材数量%d' % (m_type, i + 1)] = weapon_data['%s素材' % m_type][i]['数量'] if i < len( # weapon_data['%s素材' % m_type]) else '' # del weapon_data['%s素材' % m_type] # 调整数据 # 重新生成数据表 temp_weapon_data = OrderedDict() for data_key, data_value in weapon_data.items(): use_header = data_key if type(data_value) == list: use_header = '%s[]' % data_key data_value = ';'.join([str(value) for value in data_value]) elif type(data_value) not in [int, float, bool, str]: data_value = str(data_value) temp_weapon_data[use_header] = data_value if use_header not in headers[weapon_type]: headers[weapon_type].append(use_header) contents[weapon_type][weapon_name] = temp_weapon_data save_filepath = os.path.join('武器_%s.xlsx' % weapon_type) save_to_excel(headers, contents, save_filepath) log('武器 %s 数据保存完毕' % weapon_type)
def run_job(): if not os.path.exists(WEAPON_OBJ_FILEPATH): weapon_all_data = get_weapon_data() else: weapon_all_data = load_obj(WEAPON_OBJ_FILEPATH) wiki = HuijiWiki('mhw') if not wiki.login('Yuee bot', '123654abC'): log('登录失败,请检查。') return False if not wiki.get_edit_token(): log('获取令牌失败,请检查。') return False # update_weapon_data(wiki, weapon_all_data) generate_weapon_excel(weapon_all_data)
def output_weapon_name_excel(weapon_all_data): sheet_name = '译名对照' # 武器excel headers = OrderedDict() contents = OrderedDict() headers[sheet_name] = ['_index', '类型', '日文名', '中文名'] contents[sheet_name] = OrderedDict() # 素材excel headers_item = OrderedDict() contents_item = OrderedDict() headers_item[sheet_name] = ['_index', '日文名', '中文名'] contents_item[sheet_name] = OrderedDict() item_name_list = [] total_index = 0 total_item_index = 0 for weapon_type, weapon_type_data in weapon_all_data.items(): for weapon_name, weapon_data in weapon_type_data.items(): # 整理数据 total_index += 1 contents[sheet_name][weapon_name] = { '_index': total_index, '类型': weapon_type, '日文名': weapon_name, '中文名': '', } for craft_type in ['upgrade', 'craft']: for i in range(1, 5): craft_key = '%s_m%d' % (craft_type, i) if craft_key in weapon_data: item_name = weapon_data[craft_key] if item_name not in item_name_list: item_name_list.append(item_name) total_item_index += 1 contents_item[sheet_name][total_item_index] = { '_index': total_item_index, '日文名': item_name, '中文名': '', } save_filepath = os.path.join('weapon_name.xlsx') save_to_excel(headers, contents, save_filepath) save_filepath = os.path.join('item_name.xlsx') save_to_excel(headers_item, contents_item, save_filepath) log('译名表保存完毕。')
def config_loader(config_path, error_check_list=OrderedDict(), section_name=''): # 获取配置文件 if not os.path.exists(config_path): log('配置文件《%s》未找到,请检查。' % os.path.split(config_path)[1]) return False cfg = configparser.ConfigParser() cfg.read(config_path) config = OrderedDict() for cfg_section in cfg.sections(): if cfg_section not in config: config[cfg_section] = OrderedDict() for cf_key, cf_value in cfg[cfg_section].items(): config[cfg_section][cf_key] = cf_value # 错误检查 for check_section_name, check_info_list in error_check_list.items(): if check_section_name not in config: log('配置文件错误:没有找到“%s”段落,请检查。' % check_section_name) return False for check_info in check_info_list: if check_info['n'] not in config[check_section_name]: if check_info['t'] == 'check_value': config[check_section_name][ check_info['n']] = check_info['d'] else: log('配置文件错误:“%s”段落中没有参数“%s”,请检查。' % (check_section_name, check_info['n'])) return False value = config[check_section_name][check_info['n']] if check_info['t'] == 'path_exists': if not os.path.exists(value): log('文件夹未找到:“%s”,请检查。' % os.path.abspath(value)) elif check_info['t'] == 'file_exists': if not os.path.exists(value): log('文件未找到:“%s”,请检查。' % os.path.abspath(value)) elif check_info['t'] == 'path_create': check_folder(value) if section_name != '' and section_name in config: return config[section_name] else: return config
def get_weapon_data(): mhwg_org = Site_MHWg_org() # 获取首页对象 mainpage = mhwg_org.mainpage() # 从首页获取武器列表 weapon_data = OrderedDict() pq_weapon_a_groups = mainpage('#sc_2 a') for weapon_type_index in range(len(pq_weapon_a_groups)): pq_weapon_type = pq_weapon_a_groups.eq(weapon_type_index) type_page_url = pq_weapon_type.attr('href') weapon_type_name = WEAPON_NAME_MAP[pq_weapon_type.text()] if weapon_type_name not in ['双剑']: continue weapon_data[weapon_type_name] = OrderedDict() # 从武器列表页,获取所有武器派生页面的数据 pq_weapon_type_page = mhwg_org.get_page(type_page_url) pq_weapon_type_a_groups = pq_weapon_type_page('.t1 a') for weapon_index in range(len(pq_weapon_type_a_groups)): pq_weapon = pq_weapon_type_a_groups.eq(weapon_index) weapon_page_url = pq_weapon.attr('href') if weapon_page_url.find('#') >= 0: continue pq_weapon_page = mhwg_org.get_page(weapon_page_url) if not pq_weapon_page: log('武器页未找到 %s' % weapon_page_url) continue # if weapon_page_url not in ['/ida/220824.html']: # continue log('开始解析 %s' % weapon_page_url) result = analyze_weapon_page(pq_weapon_page, weapon_type_name) if not result: continue weapon_data[weapon_type_name].update(result) # save_obj(weapon_data, 'weapon_data.obj') return weapon_data
def analyze_weapon_page_upgrade_info(info_obj, temp_weapon_data): info_obj_tr_group = info_obj('tr') for i in range(1, len(info_obj_tr_group)): info_obj_tr = info_obj_tr_group.eq(i) info_obj_td_group = info_obj_tr('td') # 名称 weapon_name = info_obj_td_group.eq(1).text() # 攻击力 find = re.findall(r'(.*?)\n\[巻き戻し不可\]', weapon_name) can_not_back = False if find: weapon_name = find[0] can_not_back = True if weapon_name not in temp_weapon_data: # log('强化部分出现了基础数据部分没有的武器名称:%s' % weapon_name) continue temp_weapon_info = temp_weapon_data[weapon_name] if temp_weapon_info['atk'] == 0: log('武器没有攻击力,可能是未完成的数据,已跳过。武器名称:%s' % weapon_name) return False temp_weapon_info['rare'] = int(info_obj_td_group.eq(0).text()) temp_weapon_info['can_not_back'] = can_not_back # 可强化路线 temp_weapon_info['upgrade_target'] = [] upgrade_target_item_text = info_obj_td_group.eq(3).text() if upgrade_target_item_text != '': temp_weapon_info[ 'upgrade_target'] = upgrade_target_item_text.split('\n') # 强化素材 # temp_weapon_info['获得类型'] = '强化' # temp_weapon_info['价格'] = 0 # temp_weapon_info['素材'] = [] temp_weapon_info.update( get_material_info(info_obj_td_group.eq(2).text(), 'upgrade')) # 强化表完成 return True
def generate_data(self, is_new=False): self._data = OrderedDict() if is_new: return True if 'data' not in self._original_data: log('[[%s]]数据有误:未找到内容数据,请检查。' % self._title) return False for index in range(0, len(self._original_data['data'])): row_data = OrderedDict() for key_index in range(0, len(self._header)): row_data[self._header[key_index]] = self._original_data[ 'data'][index][key_index] key = index if self._key == '' else row_data[self._key] if self._key != '' and not key: log('[[%s]]数据有误:第 %d 行当作KEY的“%s”值是None,请检查。' % (self._title, index + 1, self._key)) return False self._data[key] = row_data return True
def read_all_sheets_from_xlsx(xls_path, mode=list()): result_data = OrderedDict() if not os.path.exists(xls_path): log('[ %s ] 文件不存在!' % xls_path) return False # 打开工作薄 try: data_wb = xlrd.open_workbook(xls_path) except xlrd.biffh.XLRDError as e: log('[ %s ] 文件打开失败!错误:' + str(e) % xls_path) return False # 打开工作表 sheet_names = data_wb.sheet_names() # 读取数据 try: for sheet_name in sheet_names: data_ws = data_wb.sheet_by_name(sheet_name) result_data[sheet_name] = _read_sheet_data(data_ws, mode) except Exception as e: log('读取EXCEL文件发生错误,请检查工作薄中的格式是否符合规范。(%s)' % e) return result_data
def generate_header(self, is_new=False): # 获取header self._header = [] if is_new: return True if 'schema' not in self._original_data: log('[[%s]]数据有误:未找到表头字段,请检查。' % self._title) return False if 'fields' not in self._original_data['schema']: log('[[%s]]数据有误:未找到表头字段,请检查。' % self._title) return False for field in self._original_data['schema']['fields']: self._header.append(field['name']) if self._key != '' and self._key not in self._header: log('[[%s]]数据有误:表头字段中未找到当作KEY的“%s”,请检查。' % (self._title, self._key)) return False return True
def read_sheet_from_xlsx(xls_path, sheet_name='', sheet_index=-1, mode=list()): if type(mode) == str: mode = [mode] result_data = OrderedDict() if not os.path.exists(xls_path): log('[ %s ] 文件不存在!' % xls_path) return False # 打开工作薄 try: data_wb = xlrd.open_workbook(xls_path) except xlrd.biffh.XLRDError as e: log('[ %s ] 文件打开失败!错误:' + str(e) % xls_path) return False # 打开工作表 try: if sheet_index > -1: data_ws = data_wb.sheet_by_index(sheet_index) elif sheet_name != '': data_ws = data_wb.sheet_by_name(sheet_name) else: data_ws = data_wb.sheet_by_index(0) sheet_name = data_ws.name except xlrd.biffh.XLRDError as e: print(e) return False # 读取数据 try: result_data[sheet_name] = _read_sheet_data(data_ws, mode) except Exception as e: log('读取EXCEL文件发生错误,请检查工作薄中的格式是否符合规范。(%s)' % e) return result_data
def _log(self, text): log(text)
def get_weapon_stat(td_obj): stat_text = td_obj.text() result_info = OrderedDict([('other_effect', [])]) for stat_text_one_line in stat_text.split('\n'): stat_text_one_line = stat_text_one_line.strip() # 攻击力 find = re.findall(r'攻撃:(\d+)', stat_text_one_line) if find: result_info['atk'] = int(find[0]) continue # 孔数 find = re.findall(r'スロット:(.*)', stat_text_one_line) if find: if find[0] == '': continue result_info['socket'] = [] for socket_info in find[0].split(' '): if socket_info == '-': result_info['socket'].append(0) elif socket_info == '①': result_info['socket'].append(1) elif socket_info == '②': result_info['socket'].append(2) elif socket_info == '③': result_info['socket'].append(3) continue # 防御 find = re.findall(r'防御(.+)', stat_text_one_line) if find: result_info['def'] = int(find[0].replace('+', '')) continue # 会心 find = re.findall(r'会心(.+)%', stat_text_one_line) if find: result_info['cri'] = int(find[0].replace('+', '')) continue # 找属性攻击力 find = re.findall(r'(:?属性解放:)?(\D+)(\d+)', stat_text_one_line) if find: elem_type = find[0][1] if elem_type in ELEMENT_MAP: if find[0][0] == '属性解放:': result_info['elem_release'] = True result_info['elem'] = ELEMENT_MAP[elem_type] result_info['elem_v'] = int(find[0][2]) continue # 铳枪 elif elem_type in GUNLANCE_MAP: result_info['gun_type'] = '%s%s' % (GUNLANCE_MAP[elem_type], find[0][2]) continue # 盾斧,剑斧的瓶 find = re.findall(r'ビン:(\D+)(\d*)', stat_text_one_line) if find: elem_type = find[0][0] if elem_type in ELEMENT_MAP: result_info['bottle'] = ELEMENT_MAP[elem_type] result_info['bottle_v'] = 0 if find[0][1] != '': result_info['bottle_v'] = int(find[0][1]) continue # 龙封 find = re.findall(r'龍封力\[(.+)]', stat_text_one_line) if find: result_info['seal_dragon'] = find[0] continue # 狩猎笛 if stat_text_one_line == '■■■': result_info['sound'] = [] all_span_obj_groups = td_obj.children('span') for sound_span_index in range(len(all_span_obj_groups)): sound_span_obj = all_span_obj_groups.eq(sound_span_index) if sound_span_obj.text() != '■■■': # log('音色提取出错,请检查') continue sound_color_obj_groups = sound_span_obj.eq(0).find('span') for sound_index in range(len(sound_color_obj_groups)): sound_color_obj = sound_color_obj_groups.eq(sound_index) find_color = re.findall(r'color:(.*?);', sound_color_obj.attr('style')) if find_color: if find_color[0] not in SOUND_COLOR_MAP: log('音色颜色未匹配到:%s' % find_color[0]) result_info['sound'].append( SOUND_COLOR_MAP[find_color[0]]) continue # 操虫棍 if stat_text_one_line in CLUB_EFFECT: result_info['effect'] = CLUB_EFFECT[stat_text_one_line] continue # 轻弩重弩期待值,删掉 find = re.findall(r'\(期待値:(\d+)\)', stat_text_one_line) if find: continue # 轻弩重弩抖动 find = re.findall(r'ブレ:(.+)', stat_text_one_line) if find: result_info['bure'] = BURE_MAP[find[0]] continue # 强化部件 find = re.findall(r'強化パーツ:(\d+)', stat_text_one_line) if find: result_info['upgrade_slot'] = int(find[0]) continue # 其他 result_info['other_effect'].append(stat_text_one_line) return result_info