def get_laowa_lens_list(scraping: IScrapingService) -> DataFrame: # レンズのURL一覧を取得する lens_list: List[Tuple[str, str]] = [] page = scraping.get_page('https://www.laowa.jp/cat1/', cache=False) for div_element in page.find_all('div.product3'): h3_element = div_element.find('h3') if h3_element is None: continue a_element = div_element.find('a') if a_element is None: continue lens_name = h3_element.text lens_url = a_element.attrs['href'] if 'LAOWA' in lens_name and 'mm' in lens_name: lens_list.append((lens_name, lens_url)) # レンズの情報を取得する lens_raw_data_list: List[Dict[str, any]] = [] for lens_name, lens_url in lens_list: page = scraping.get_page(lens_url) temp: Dict[str, str] = {'レンズ名': lens_name, 'URL': lens_url} section_element = page.find('div.productTable') if section_element is not None: for tr_element in section_element.find_all('tr'): td_elements = tr_element.find_all('td') if len(td_elements) < 2: continue if td_elements[0].full_text is None or td_elements[1].full_text is None: continue temp[td_elements[0].full_text] = td_elements[1].full_text # 特殊処理 if temp['レンズ名'] == 'LAOWA 15mm F4 WIDE ANGLE MACRO': if 'Nikon' in temp['質量']: # 記述が入れ替わっているので対策 temp2 = temp.copy() temp2['マウント'] = temp['質量'] temp2['質量'] = temp['マウント'] temp = temp2 lens_raw_data_list.append(temp) df = DataFrame.from_records(lens_raw_data_list) # 変換用に整形 df['maker'] = 'LAOWA' df = convert_columns(df, { 'レンズ名': 'name', 'URL': 'url', 'フォーマット': '対応フォーマット', '対応マウント': 'マウント', '寸法(鏡筒直径×長さ)': 'サイズ', '最小フォーカシングディスタンス': '最短撮影距離', '最大倍率比': '最大撮影倍率', '最大倍率': '最大撮影倍率', }, [ '開放F値', '画角', 'レンズ構成', 'シフト機能', '最大イメージサークル', '絞り羽根枚数', 'フォーカス', 'JAN', '発売日', '絞り羽枚数', 'フォーカシング', 'フィルタースレッド', 'ワーキングディスタンス', '最大口径比', '絞り羽根枚数(F)', '絞り羽根枚数(T)', 'シフト量', '最小ワーキングディスタンス', '対応フォーマット', ]) if '対応フォーマット' in df: del df['対応フォーマット'] if None in df: del df[None] if '' in df: del df[''] mount_list: List[str] = [] for mount_temp in df['マウント']: if 'マイクロフォーサーズ' in mount_temp: mount_list.append('マイクロフォーサーズ') elif 'Leica L' in mount_temp: mount_list.append('ライカL') else: mount_list.append('') df['mount'] = mount_list df = df[df['mount'] != ''] del df['マウント'] w_list, t_list = extract_numbers(df['焦点距離'], [r'(\d+\.?\d*)-(\d+\.?\d*)mm'], [r'(\d+\.?\d*)mm']) w_list2: List[int] = [] t_list2: List[int] = [] for w, t, mount in zip(w_list, t_list, list(df['mount'])): if mount == 'マイクロフォーサーズ': w_list2.append(int((Decimal(w) * 2).quantize(Decimal('1')))) t_list2.append(int((Decimal(t) * 2).quantize(Decimal('1')))) elif mount == 'ライカL': w_list2.append(int(w)) t_list2.append(int(t)) df['wide_focal_length'] = w_list2 df['telephoto_focal_length'] = t_list2 del df['焦点距離'] w, t = extract_numbers(df['name'], [r'F(\d+\.?\d*)-(\d+\.?\d*)'], [r'F(\d+\.?\d*)']) df['wide_f_number'] = [float(x) for x in w] df['telephoto_f_number'] = [float(x) for x in t] w_fd_list: List[int] = [] t_fd_list: List[int] = [] for fd in df['最短撮影距離'].values: result = regex(fd, r'(\d+.?\d*)mm~(\d+.?\d*)mm') if len(result) > 0: w_fd_list.append(int(result[0])) t_fd_list.append(int(result[1])) continue result = regex(fd, r'(\d+.?\d*)cm') if len(result) > 0: w_fd_list.append(int(Decimal(result[0]) * 10)) t_fd_list.append(int(Decimal(result[0]) * 10)) continue result = regex(fd, r'(\d+.?\d*)cm') if len(result) > 0: w_fd_list.append(int(Decimal(result[0]) * 10)) t_fd_list.append(int(Decimal(result[0]) * 10)) continue result = regex(fd, r'(\d+.?\d*)mm') if len(result) > 0: w_fd_list.append(int(result[0])) t_fd_list.append(int(result[0])) continue w_fd_list.append(0) t_fd_list.append(0) df['wide_min_focus_distance'] = w_fd_list df['telephoto_min_focus_distance'] = t_fd_list del df['最短撮影距離'] mag_list: List[float] = [] for val1, val2 in zip(df['最大撮影倍率'].values, df['mount'].values): value = Decimal(0) while True: if val1 != val1: break result = regex(val1, r'(\d+.?\d*)/(\d+.?\d*)倍') if len(result) > 0: value = Decimal(result[0]) / Decimal(result[1]) break result = regex(val1, r'(\d+.?\d*):(\d+.?\d*)') if len(result) > 0: value = Decimal(result[0]) / Decimal(result[1]) break result = regex(val1, r'(\d+.?\d*)倍') if len(result) > 0: value = Decimal(result[0]) break result = regex(val1, r'(\d+.?\d*)') if len(result) > 0: value = Decimal(result[0]) break break if val2 == 'マイクロフォーサーズ': mag_list.append(float(value * 2)) else: mag_list.append(float(value)) df['max_photographing_magnification'] = mag_list del df['最大撮影倍率'] fd_list: List[float] = [] for fd, name in zip(df['フィルター径'].values, df['name'].values): if fd != fd or name == 'LAOWA 10-18mm F4.5-5.6 FE ZOOM': fd_list.append(-1) continue result = regex(fd, r'(\d+.?\d*)mm') if len(result) == 0: fd_list.append(-1) continue fd_list.append(int(result[0])) df['filter_diameter'] = fd_list del df['フィルター径'] df['is_drip_proof'] = False df['has_image_stabilization'] = False i: List[bool] = [] for record in df.to_records(): if record['wide_focal_length'] == record['telephoto_focal_length']: i.append(True) continue i.append(False) df['is_inner_zoom'] = i d, le = extract_numbers(df['サイズ'], [r'(\d+\.?\d*)[^\d]+(\d+\.?\d*)(mm|mm)'], []) df['overall_diameter'] = [float(x) for x in d] df['overall_length'] = [float(x) for x in le] del df['サイズ'] weight: List[float] = [] for f in df['質量']: result = regex(f, r'([\d,]+)(g|g)') if len(result) > 0: weight.append(int(result[0].replace(',', ''))) else: weight.append(-1) df['weight'] = weight del df['質量'] df['price'] = 0 return df
def get_sigma_lens_list(scraping: IScrapingService) -> DataFrame: # レンズのURL一覧を取得する page = scraping.get_page('https://www.sigma-global.com/jp/lenses/', cache=False) lens_list_mft: List[Tuple[str, str]] = [] lens_list_l: List[Tuple[str, str]] = [] for li_element in page.find('div.p-lens-search__main').find_all('li'): lens_link = li_element.find('a').attrs['href'] if 'lenses' not in lens_link: continue h4_element = li_element.find('h4') if h4_element is None: continue lens_name = h4_element.text if 'micro-four-thirds' in li_element.attrs['data-lens-mount']: lens_list_mft.append((lens_name, lens_link)) if 'l-mount' in li_element.attrs['data-lens-mount']: lens_list_l.append((lens_name, lens_link)) # 特殊処理 if '35mm F1.4 DG HSM' in lens_name: lens_list_l.append((lens_name, lens_link)) page: DomObject = scraping.get_page( 'https://www.sigma-global.com/jp/lenses/discontinued/', cache=False) lens_list_old: List[Tuple[str, str]] = [] for li_element in page.find_all('li.p-support-service__item'): a_element = li_element.find('a') lens_link = a_element.attrs['href'] lens_name = a_element.find('h4 > span').text lens_list_old.append((lens_name, lens_link)) # レンズごとに情報を取得する lens_raw_data_list: List[Dict[str, any]] = [] for lens_list, lens_mount in [(lens_list_mft, 'マイクロフォーサーズ'), (lens_list_l, 'ライカL')]: for lens_name, lens_link in lens_list: if 'lenses/c' in lens_link and '| Contemporary' not in lens_name: lens_name2 = lens_name + ' | Contemporary' elif 'lenses/a' in lens_link and '| Art' not in lens_name: lens_name2 = lens_name + ' | Art' else: lens_name2 = lens_name page = scraping.get_page(lens_link) temp_dict: Dict[str, str] = { 'mount': lens_mount, 'name': lens_name2, 'url': lens_link } raw_dict = item_page_to_raw_dict(page, lens_mount) temp_dict.update(raw_dict) lens_raw_data_list.append(temp_dict) for lens_name, lens_link in lens_list_old: if 'DN' not in lens_name: # DNが含まれない=ミラーレス用ではないので除外 continue page = scraping.get_page(lens_link) temp_dict: Dict[str, str] = { 'mount': 'マイクロフォーサーズ', 'name': lens_name, 'url': lens_link } temp_dict2 = item_page_to_raw_dict(page, '') if len(temp_dict2) > 0: temp_dict.update(temp_dict2) lens_raw_data_list.append(temp_dict) df = DataFrame.from_records(lens_raw_data_list) # 変換用に整形 df['maker'] = 'SIGMA' df['product_number'] = df['エディションナンバー'] del df['エディションナンバー'] del df['レンズ構成枚数'] del df['画角'] del df['絞り羽根枚数'] del df['最小絞り'] del df['付属品'] del df['対応マウント / バーコード'] # focal_length w, t = extract_numbers(df['name'], [r'(\d+)-(\d+)mm'], [r'(\d+)mm']) wide_focal_length: List[int] = [] telephoto_focal_length: List[int] = [] for wf, tf, mount, name in zip(w, t, df['mount'], df['name']): if mount == 'マイクロフォーサーズ': wide_focal_length.append(int(wf) * 2) telephoto_focal_length.append(int(tf) * 2) else: if 'DC' in name: wide_focal_length.append(int(1.5 * int(wf))) telephoto_focal_length.append(int(1.5 * int(tf))) else: wide_focal_length.append(int(wf)) telephoto_focal_length.append(int(tf)) df['wide_focal_length'] = wide_focal_length df['telephoto_focal_length'] = telephoto_focal_length # f_number w, t = extract_numbers(df['name'], [r'F(\d+\.?\d*)-(\d+\.?\d*)'], [r'F(\d+\.?\d*)']) df['wide_f_number'] = [float(x) for x in w] df['telephoto_f_number'] = [float(x) for x in t] # min_focus_distance w, t = extract_numbers(df['最短撮影距離'], [ r'(\d+\.?\d*)-(\d+\.?\d*)cm', r'(\d+\.?\d*) \(W\)-(\d+\.?\d*) \(T\)cm', r'(\d+\.?\d*)\(W\) - (\d+\.?\d*)\(T\)cm', f'(\d+\.?\d*)(W)-(\d+\.?\d*)(T)cm' ], [r'(\d+\.?\d*)cm']) df['wide_min_focus_distance'] = [int(Decimal(x).scaleb(1)) for x in w] df['telephoto_min_focus_distance'] = [int(Decimal(x).scaleb(1)) for x in t] del df['最短撮影距離'] # max_photographing_magnification m: List[float] = [] for record in df.to_records(): temp = regex(record['最大撮影倍率'].replace(':', ':'), r'.*1:(\d+\.?\d*).*1:(\d+\.?\d*).*') if len(temp) > 0: if float(temp[0]) < float(temp[1]): denominator = temp[0] else: denominator = temp[1] else: temp = regex(record['最大撮影倍率'].replace(':', ':'), r'.*1:(\d+\.?\d*).*') denominator = temp[0] if record['mount'] == 'マイクロフォーサーズ': m.append( float((Decimal('2') / Decimal(denominator)).quantize( Decimal('0.01')))) else: if 'DC' in record['name']: m.append( float((Decimal('1.5') / Decimal(denominator)).quantize( Decimal('0.01')))) else: m.append( float((Decimal('1') / Decimal(denominator)).quantize( Decimal('0.01')))) df['max_photographing_magnification'] = m del df['最大撮影倍率'] # filter_diameter filter_diameter: List[float] = [] for f in df['フィルターサイズ']: if f == f: result = regex(f, r'(\d+.?\d*)mm') if len(result) > 0: filter_diameter.append(float(result[0])) else: filter_diameter.append(-1) else: filter_diameter.append(-1) df['filter_diameter'] = filter_diameter del df['フィルターサイズ'] # is_drip_proof df['is_drip_proof'] = df['name'].map(lambda x: 'DC' in x or 'DG' in x) # has_image_stabilization df['has_image_stabilization'] = df['name'].map(lambda x: 'OS' in x) # is_inner_zoom is_inner_zoom: List[bool] = [] for record in df.to_dict(orient='records'): is_inner_zoom.append( record['wide_focal_length'] == record['telephoto_focal_length']) df['is_inner_zoom'] = is_inner_zoom # overall_diameter, overall_length overall_diameter, overall_length = extract_numbers( df['最大径 × 長さ'], [r'(\d+\.?\d*)mm[^\d]*(\d+\.?\d*)mm'], []) for i in range(0, len(df)): # データが存在しない分については手動で埋める if df['name'].values[i] == '19mm F2.8 EX DN': overall_diameter[i] = '60.6' overall_length[i] = '45.7' elif df['name'].values[i] == '30mm F2.8 EX DN': overall_diameter[i] = '60.6' overall_length[i] = '38.6' elif df['name'].values[i] == '19mm F2.8 DN | Art': overall_diameter[i] = '60.8' overall_length[i] = '45.7' elif df['name'].values[i] == '30mm F2.8 DN | Art': overall_diameter[i] = '60.8' overall_length[i] = '40.5' elif df['name'].values[i] == '60mm F2.8 DN | Art': overall_diameter[i] = '60.8' overall_length[i] = '55.5' df['overall_diameter'] = [float(x) for x in overall_diameter] df['overall_length'] = [float(x) for x in overall_length] del df['最大径 × 長さ'] # weight weight: List[float] = [] for i in range(0, len(df)): f = df['質量'].values[i] if f != f: if df['name'].values[i] == '19mm F2.8 EX DN': weight.append(140) elif df['name'].values[i] == '30mm F2.8 EX DN': weight.append(130) elif df['name'].values[i] == '19mm F2.8 DN | Art': weight.append(160) elif df['name'].values[i] == '30mm F2.8 DN | Art': weight.append(140) elif df['name'].values[i] == '60mm F2.8 DN | Art': weight.append(190) continue result = regex(f, r'([\d,]+)g') if len(result) > 0: weight.append(int(result[0].replace(',', ''))) else: weight.append(int(f)) df['weight'] = weight del df['質量'] # price price: List[float] = [] for f in df['希望小売価格']: result = regex(f, r'([\d,]+) *円') if len(result) > 0: price.append(int(result[0].replace(',', ''))) else: price.append(26240) # アドホックな修正 df['price'] = price del df['希望小売価格'] return df
def get_samyang_lens_list(scraping: IScrapingService) -> DataFrame: # レンズのURL一覧を取得する lens_list: List[Tuple[str, str, str]] = [] page = scraping.get_page( 'https://www.kenko-tokina.co.jp/camera-lens/samyang/', cache=False) for li_element in page.find_all('li.col.list_item'): lens_name = li_element.find('h3 > a').text lens_url = li_element.find('h3 > a').attrs['href'] if 'data-spec3' in li_element.attrs: mount_info = li_element.attrs['data-spec3'] if 'マイクロフォーサーズ' in mount_info: lens_list.append((lens_name, lens_url, 'マイクロフォーサーズ')) # レンズの情報を取得する temp_list: List[Dict[str, any]] = [] for lens_name, lens_url, lens_mount in lens_list: page = scraping.get_page(lens_url) for table_element in page.find_all('table'): temp: Dict[str, any] = { 'name': lens_name, 'url': lens_url, 'mount': lens_mount } for tr_element in table_element.find_all('tr'): th_element = tr_element.find('th') td_element = tr_element.find('td') if th_element.text == '大きさ' or th_element.text == '全長' or th_element.text == '質量' or th_element.text == '重さ': # 大きさや質量はマウント毎に異なるので特殊処理を実施 temp2 = td_element.html.replace('<td>', '').replace( '</td>', '').replace('\n', '').split('<br>') temp3 = '' for temp4 in temp2: if 'マイクロフォーサーズ' in temp4: temp3 = temp4 break if temp3 == '': for temp4 in temp2: if 'ソニーE' in temp4: temp3 = temp4 break if temp3 == '': temp3 = temp2[0] temp[th_element.text] = temp3.replace('\n', '') else: temp[th_element.text] = td_element.text if len(temp) > 0: temp_list.append(temp) df = DataFrame.from_records(temp_list) # 変換用に整形 df['maker'] = 'SAMYANG' df['price'] = 0 del df['レンズ構成'] del df['レンズフード'] del df['マウント'] del df['JANコード'] del df['フォーマットサイズ'] del df['絞り羽根'] del df['絞り羽根枚数'] del df['付属品'] del df['JANコード:'] w, t = extract_numbers(df['焦点距離'], [], [r'(\d+\.?\d*)mm']) df['wide_focal_length'] = [int(Decimal(x) * 2) for x in w] df['telephoto_focal_length'] = [int(Decimal(x) * 2) for x in w] del df['焦点距離'] del df['画角'] w, t = extract_numbers(df['name'], [], [r'F(\d+\.?\d*)']) df['wide_f_number'] = [float(x) for x in w] df['telephoto_f_number'] = [float(x) for x in t] del df['明るさ'] del df['絞り'] w: List[int] = [] t: List[int] = [] for fd in df['最短撮影距離'].values: result = regex(fd, r'(\d+.?\d*)cm') if len(result) > 0: w.append(int(Decimal(result[0]) * 10)) t.append(int(Decimal(result[0]) * 10)) continue result = regex(fd, r'(\d+.?\d*)( *)m') if len(result) > 0: w.append(int(Decimal(result[0]) * 1000)) t.append(int(Decimal(result[0]) * 1000)) continue w.append(0) t.append(0) df['wide_min_focus_distance'] = w df['telephoto_min_focus_distance'] = t del df['最短撮影距離'] mag_list: List[float] = [] for val1, val2 in zip(df['最大撮影倍率'].values, df['mount'].values): value = Decimal(0) while True: if val1 != val1: break result = regex(val1, r'(\d+.?\d*)倍') if len(result) > 0: value = Decimal(result[0]) break break if val2 == 'マイクロフォーサーズ': mag_list.append(float(value * 2)) else: mag_list.append(float(value)) df['max_photographing_magnification'] = mag_list del df['最大撮影倍率'] fd_list: List[float] = [] for record in df.to_dict(orient='records'): text = record['フィルターサイズ'] if text != text: text = record['フィルター径'] result = regex(text, r'(\d+.?\d*)mm') if len(result) == 0: fd_list.append(-1) continue fd_list.append(int(result[0])) df['filter_diameter'] = fd_list del df['フィルターサイズ'] del df['フィルター径'] df['is_drip_proof'] = False df['has_image_stabilization'] = False i: List[bool] = [] for record in df.to_records(): if record['wide_focal_length'] == record['telephoto_focal_length']: i.append(True) continue i.append(False) df['is_inner_zoom'] = i diameter_list: List[float] = [] length_list: List[float] = [] for record in df.to_dict(orient='records'): text = record['大きさ'] if text == text: result = regex(text, r'(\d+.?\d*)[^\d.]+(\d+.?\d*) *mm') if len(result) != 2: diameter_list.append(-1) length_list.append(-1) else: length_list.append(float(result[0].replace('×', ''))) diameter_list.append(float(result[1])) else: text = record['最大径'] if text == text: result = regex(text, r'(\d+.?\d*)') if len(result) > 0: diameter_list.append(float(result[0])) else: diameter_list.append(-1) else: diameter_list.append(-1) text = record['全長'] if text == text: result = regex(text, r'(\d+.?\d*)') if len(result) > 0: length_list.append(float(result[0])) else: length_list.append(-1) else: length_list.append(-1) df['overall_diameter'] = diameter_list df['overall_length'] = length_list del df['大きさ'] del df['最大径'] del df['全長'] weight_list: List[float] = [] for record in df.to_dict(orient='records'): text = record['質量'] if text != text: text = record['重さ'] result = regex(text, r'(\d+.?\d*)g') if len(result) == 0: weight_list.append(-1) continue weight_list.append(int(float(result[0]) + 0.5)) df['weight'] = weight_list del df['質量'] del df['重さ'] return df
def get_olympus_lens_list(scraping: IScrapingService) -> DataFrame: # レンズのURL一覧を取得する page = scraping.get_page( 'https://www.olympus-imaging.jp/product/dslr/mlens/index.html', cache=False) lens_list: List[Tuple[str, str]] = [] for a_element in page.find_all('h2.productName > a'): lens_name = a_element.text.split('/')[0].replace('\n', '') if 'M.ZUIKO' not in lens_name: continue lens_product_number = a_element.attrs['href'].replace( '/product/dslr/mlens/', '').replace('/index.html', '') lens_list.append((lens_name, lens_product_number)) page = scraping.get_page( 'https://www.olympus-imaging.jp/product/dslr/record/index.html', cache=False) for a_element in page.find_all('div.section'): div_element = a_element.find('div.mb15 > h2') a_element2 = a_element.find('li > a') if div_element is None or a_element2 is None: continue lens_name = div_element.text if 'M.ZUIKO DIGITAL' not in lens_name: continue lens_product_number = a_element2.attrs['href'].replace( '/product/dslr/mlens/', '').replace('/index.html', '') lens_list.append((lens_name, lens_product_number)) # レンズごとに情報を取得する lens_data_list: List[Dict[str, str]] = [] for lens_name, lens_product_number in lens_list: # 詳細ページから情報を取得する if lens_product_number != '14-42_35-56': spec_url = f'https://www.olympus-imaging.jp/product/dslr/mlens/{lens_product_number}/spec.html' else: spec_url = f'https://www.olympus-imaging.jp/product/dslr/mlens/{lens_product_number}/spec/index.html' page = scraping.get_page(spec_url) temp_dict: Dict[str, str] = {} for tr_element in page.find('table').find_all('tr'): tr_element: DomObject = tr_element # th側は、spanで囲まれてたりstrongで囲まれてたりするクソ仕様なので、力技で解決させた th_element = tr_element.find('th > span') if th_element is None: th_element = tr_element.find('th > strong') if th_element is None: th_element = tr_element.find('th') # td側はそのまま td_element = tr_element.find('td') # 合体 temp_dict[th_element.text] = td_element.text # 製品トップページから情報を取得する index_url = f'https://www.olympus-imaging.jp/product/dslr/mlens/{lens_product_number}/index.html' page = scraping.get_page(index_url) temp_dict['URL'] = index_url table_element = page.find('table') # 詳細ページとはth・tdの拾い方を変えているのは、 # M.ZUIKO DIGITAL ED 30mm F3.5 Macroの製品トップページの時のみ、 # 希望小売価格「だけ」が取得できない不具合があったため for th_element, td_element in zip(table_element.find_all('th'), table_element.find_all('td')): th_element2 = th_element.find('span') if th_element2 is None: th_element2 = th_element.find('strong') if th_element2 is None: th_element2 = th_element temp_dict[th_element2.text] = td_element.text # 必要な列を追加 temp_dict['name'] = lens_name.replace(' ', ' ') temp_dict['product_number'] = lens_product_number # 不要な列を削除 del_column_list = [ 'レンズ構成', 'フォーカシング方式', 'AF方式', '特長', 'マウント規格', '画角', '最近接撮影範囲', '絞り羽枚数', '同梱品', '主な同梱品', '別売りアクセサリー', '別売アクセサリー', '製品名', 'JANコード', 'JAN', '発売日', 'オンラインショップ', 'フード', '最大口径比', '最小口径比', '最大口径比/最小口径比', '35mm判換算最大撮影倍率', '最大撮影倍率(35mm判換算)', '手ぶれ補正性能', 'ズーム', 'ズーム方式', '35mm判換算', '絞り範囲', ] for column in del_column_list: if column in temp_dict: del temp_dict[column] # 一部列だけ列名を変更しないと結合できないので対処 if '大きさ 最大径×長さ' in temp_dict: temp_dict['大きさ 最大径×全長'] = temp_dict['大きさ 最大径×長さ'] del temp_dict['大きさ 最大径×長さ'] if '大きさ 最大径 × 全長' in temp_dict: temp_dict['大きさ 最大径×全長'] = temp_dict['大きさ 最大径 × 全長'] del temp_dict['大きさ 最大径 × 全長'] if '大きさ 最大径×全長' in temp_dict: temp_dict['大きさ 最大径×全長'] = temp_dict['大きさ 最大径×全長'] del temp_dict['大きさ 最大径×全長'] if '大きさ 最大径 x 全長' in temp_dict: temp_dict['大きさ 最大径×全長'] = temp_dict['大きさ 最大径 x 全長'] del temp_dict['大きさ 最大径 x 全長'] if '防滴性能 / 防塵機構' in temp_dict: temp_dict['防滴処理'] = temp_dict['防滴性能 / 防塵機構'] del temp_dict['防滴性能 / 防塵機構'] if '防滴性能/防塵機構' in temp_dict: temp_dict['防滴処理'] = temp_dict['防滴性能/防塵機構'] del temp_dict['防滴性能/防塵機構'] if '防滴性能 / 防塵機構搭載' in temp_dict: temp_dict['防滴処理'] = temp_dict['防滴性能 / 防塵機構搭載'] del temp_dict['防滴性能 / 防塵機構搭載'] if '価格' in temp_dict: temp_dict['希望小売価格'] = temp_dict['価格'] del temp_dict['価格'] lens_data_list.append(temp_dict) df = DataFrame.from_records(lens_data_list) # 変換用に整形 df['maker'] = 'OLYMPUS' # focal_length w, t = extract_numbers(df['焦点距離'], [r'(\d+)-(\d+)mm', r'(\d+) - (\d+)mm'], [r'(\d+)mm']) df['wide_focal_length'] = [int(x) * 2 for x in w] df['telephoto_focal_length'] = [int(x) * 2 for x in t] # M.ZUIKO DIGITAL ED 150-400mm F4.5 TC1.25x IS PROは内蔵テレコンを持つので、その対策 with warnings.catch_warnings(): warnings.simplefilter('ignore', SettingWithCopyWarning) df.telephoto_focal_length[df.product_number == '150-400_45ispro'] = 1000 del df['焦点距離'] # f_number w, t = extract_numbers(df['name'], [r'F(\d+\.?\d*)-(\d+\.?\d*)'], [r'F(\d+\.?\d*)']) df['wide_f_number'] = [float(x) for x in w] df['telephoto_f_number'] = [float(x) for x in t] # min_focus_distance w, t = extract_numbers(df['最短撮影距離'], [ r'(\d+\.?\d+)m(.+) / (\d+\.?\d+)m(.+)', r'(\d+\.?\d+)m \(.+\) / (\d+\.?\d+)m \(.+\)', r'(\d+\.?\d+)m.+/(\d+\.?\d+)m.+' ], [r'(\d+\.?\d+)m', r'(\d+\.?\d+) m']) df['wide_min_focus_distance'] = [int(Decimal(x).scaleb(3)) for x in w] df['telephoto_min_focus_distance'] = [int(Decimal(x).scaleb(3)) for x in t] del df['最短撮影距離'] # max_photographing_magnification w, t = extract_numbers(df['最大撮影倍率'], [ r'(\d+\.?\d+)倍 \(Wide\) / (\d+\.?\d+)倍 \(Tele\)', r'Wide:(\d+\.?\d+)倍/Tele:(\d+\.?\d+)倍', r'(\d+\.?\d+)倍(Wide) / (\d+\.?\d+)倍(Tele)', r'Wide:(\d+\.?\d+)倍/Tele:(\d+\.?\d+)倍', r'(\d+\.?\d+)倍(Wide)/ (\d+\.?\d+)倍(Tele)' ], [ r'(\d+\.?\d+)倍(35mm判換算\d+\.?\d+倍相当)', r'(\d+\.?\d+)倍(35mm判換算 \d+\.?\d+倍相当)', r'(\d+\.?\d+)倍(Wide / Tele)', r'(\d+\.?\d+)倍 \(35mm判換算 \d+\.?\d+倍相当\)', r'(\d+\.?\d+)倍(35mm判換算 \d+\.?\d+倍)', r'(\d+\.?\d+)倍(マクロモード時)(35mm判換算 \d+\.?\d+倍)' ]) m: List[float] = [] for a, b, text in zip(w, t, df['最大撮影倍率'].values): if a == b: mm = float(Decimal(a) * 2) elif '換算' in text: mm = max(float(Decimal(a)), float(Decimal(b))) else: mm = max(float(Decimal(a)), float(Decimal(b))) * 2 m.append(mm) df['max_photographing_magnification'] = m del df['最大撮影倍率'] # filter_diameter filter_diameter: List[float] = [] for f in df['フィルターサイズ']: if f != f: filter_diameter.append(-1) continue result = regex(f, r'(\d+.?\d*)mm') if len(result) > 0: filter_diameter.append(float(result[0])) else: filter_diameter.append(-1) df['filter_diameter'] = filter_diameter del df['フィルターサイズ'] # is_drip_proof df['is_drip_proof'] = df['防滴処理'].map(lambda x: x == x and x != '') del df['防滴処理'] # has_image_stabilization df['has_image_stabilization'] = df['name'].map(lambda x: 'IS' in x) del df['レンズ内手ぶれ補正機構'] # is_inner_zoom i: List[bool] = [] for record in df.to_records(): if record['wide_focal_length'] == record['telephoto_focal_length']: i.append(True) continue if record['product_number'] in [ '7-14_28pro', '40-150_28pro', '150-400_45ispro' ]: i.append(True) continue i.append(False) df['is_inner_zoom'] = i # overall_diameter, overall_length d, le = extract_numbers(df['大きさ 最大径×全長'], [ r'φ(\d+.?\d*)x(\d+.?\d*)mm', r'Ø(\d+.?\d*)×(\d+.?\d*)mm', r'Φ (\d+.?\d*) mm x (\d+.?\d*) mm', r'⌀(\d+.?\d*) x (\d+.?\d*)mm', r'Ø(\d+.?\d*) × (\d+.?\d*)mm', r'Ø(\d+.?\d*) x (\d+.?\d*)mm', r'Ø(\d+.?\d*)mm x (\d+.?\d*)mm', r'Ø(\d+.?\d*)x (\d+.?\d*)mm', r'Ø(\d+.?\d*)x(\d+.?\d*)mm', r'⌀(\d+.?\d*)×(\d+.?\d*)mm', r'Ø(\d+.?\d*)mm × (\d+.?\d*)mm', r'φ(\d+.?\d*)×(\d+.?\d*)mm' ], []) df['overall_diameter'] = [float(x) for x in d] df['overall_length'] = [float(x) for x in le] del df['大きさ 最大径×全長'] # weight weight: List[float] = [] for f in df['質量']: result = regex(f, r'([\d,]+)[^\d]*(g|g)') if len(result) > 0: weight.append(int(result[0].replace(',', ''))) else: weight.append(-1) df['weight'] = weight del df['質量'] # price price: List[float] = [] for f in df['希望小売価格']: result = regex(f, r'([\d,]+)円') if len(result) > 0: price.append(int(result[0].replace(',', ''))) else: price.append(-1) df['price'] = price del df['希望小売価格'] # mount・url df['mount'] = 'マイクロフォーサーズ' df['url'] = df['URL'] del df['URL'] return df
def get_panasonic_old_lens_list(scraping: IScrapingService) -> DataFrame: # 情報ページを開く page = scraping.get_page( 'https://panasonic.jp/dc/products/g_series_lens.html', cache=False) # 情報URLの一覧を取得する link_url_set: Set[str] = set() for a_element in page.find_all('a'): link_url = a_element.attrs['href'] if len(regex(link_url, r'(http://panasonic\.jp/dc/p-db/.+\.html)')) > 0: link_url_set.add(link_url) # 順番に取得する temp_list: List[Dict[str, any]] = [] for link_url in link_url_set: page = scraping.get_page(link_url.replace('.html', '_spec.html')) table_element = page.find('table') temp_dict: Dict[str, any] = {} temp_dict['リンク'] = link_url temp_dict['型番'] = regex(link_url, r'http://panasonic\.jp/dc/p-db/(.+)\.html')[0] for th_element, td_element in zip(table_element.find_all('th'), table_element.find_all('td')): temp_dict[th_element.text] = td_element.text temp_list.append(temp_dict) df = DataFrame.from_records(temp_list) # 変換用に整形 df['maker'] = 'Panasonic' df['name'] = df['レンズ名称'] del df['レンズ名称'] df['product_number'] = df['型番'] del df['型番'] w, t = extract_numbers(df['焦点距離'], [r'(\d+)mm~(\d+)mm', r'(\d+)-(\d+)mm'], [r'(\d+)mm']) df['wide_focal_length'] = [int(x) * 2 for x in w] df['telephoto_focal_length'] = [int(x) * 2 for x in t] del df['焦点距離'] w, t = extract_numbers(df['name'], [r'F(\d+\.?\d*)-(\d+\.?\d*)'], [r'F(\d+\.?\d*)']) df['wide_f_number'] = [float(x) for x in w] df['telephoto_f_number'] = [float(x) for x in t] del df['開放絞り'] del df['絞り形式'] del df['最小絞り'] w, t = extract_numbers( df['最短撮影距離'], [r'(\d+\.?\d+)m / (\d+\.?\d+)m', r'(\d+\.?\d+)m~∞.*(\d+\.?\d+)m~∞'], [r'(\d+\.?\d+)m', r'(\d+\.?\d+)m~∞']) df['wide_min_focus_distance'] = [int(Decimal(x).scaleb(3)) for x in w] df['telephoto_min_focus_distance'] = [int(Decimal(x).scaleb(3)) for x in t] del df['最短撮影距離'] df['max_photographing_magnification'] = 0.0 # なぜか記載がなかったので filter_diameter: List[float] = [] for f in df['フィルター径']: result = regex(f, r'(\d+.?\d*)mm') if len(result) > 0: filter_diameter.append(float(result[0])) else: filter_diameter.append(-1) df['filter_diameter'] = filter_diameter del df['フィルター径'] df['is_drip_proof'] = False # なぜか記載がなかったので df['has_image_stabilization'] = df['name'].map(lambda x: 'O.I.S.' in x) i: List[bool] = [] for record in df.to_records(): if record['wide_focal_length'] == record['telephoto_focal_length']: i.append(True) continue if record['product_number'] in [ 'H-F007014', 'H-E08018', 'H-PS45175', 'S-E70200', 'S-R70200' ]: i.append(True) continue i.append(False) df['is_inner_zoom'] = i d, le = extract_numbers(df['外形寸法'], [r'(\d+\.?\d*)mm[^\d]*(\d+\.?\d*)mm'], []) df['overall_diameter'] = [float(x) for x in d] df['overall_length'] = [float(x) for x in le] del df['外形寸法'] weight: List[float] = [] for f in df['質量']: result = regex(f, r'([\d,]+)g') if len(result) > 0: weight.append(int(result[0].replace(',', ''))) else: weight.append(-1) df['weight'] = weight del df['質量'] df['price'] = 0 # なぜか記載がなかったので df['mount'] = 'マイクロフォーサーズ' del df['レンズ構成'] del df['マウント'] df['url'] = df['リンク'] del df['リンク'] return df
def get_panasonic_lens_list(scraping: IScrapingService) -> DataFrame: # 情報ページを開く page = scraping.get_page('https://panasonic.jp/dc/comparison.html', cache=False) # tableタグからデータを収集する df1 = DataFrame() for table_element in page.find_all('table'): if 'LUMIX G' not in table_element.full_text: continue df1['レンズ名'] = [ cleansing(x.text) for x in table_element.find_all('th p') ] df1['URL'] = [ 'https://panasonic.jp' + x.attrs['href'] for x in table_element.find_all('th a') ] for tr_element in table_element.find_all('tbody > tr'): key = cleansing(tr_element.find('th').text) value = [cleansing(x.text) for x in tr_element.find_all('td')] df1[key] = value break df2 = DataFrame() for table_element in page.find_all('table'): if 'LUMIX S' not in table_element.full_text: continue df2['レンズ名'] = [ cleansing(x.text) for x in table_element.find_all('th p') ] df2['URL'] = [ 'https://panasonic.jp' + x.attrs['href'] for x in table_element.find_all('th a') ] for tr_element in table_element.find_all('tbody > tr'): if tr_element.find('th') is None: continue key = cleansing(tr_element.find('th').text) value = [cleansing(x.text) for x in tr_element.find_all('td')] df2[key] = value # なぜか、「最大径×全長」だけ記述位置が異なるので対策 key = cleansing(table_element.find('tbody > th').text) value = [ cleansing(x.text) for x in table_element.find_all('tbody > td') ] df2[key] = value break # データを加工し、結合できるように整える df1 = convert_columns( df1, { 'レンズ名': 'name', 'URL': 'url', '品番': 'product_number', '35mm判換算焦点距離': 'focal_length', '最短撮影距離': 'min_focus_distance', '最大撮影倍率': 'max_photographing_magnification', '手ブレ補正': 'has_image_stabilization', 'フィルターサイズ': 'filter_diameter', '最大径×全長': 'overall_size', '質量': 'weight', '防塵・防滴': 'is_drip_proof', 'メーカー希望小売価格': 'price', }, [ 'レンズ構成', '絞り羽根 / 形状', '最小絞り値', 'レンズコーティング', '対角線画角', 'レンズキャップ', ]) df1['mount'] = 'マイクロフォーサーズ' df2 = convert_columns( df2, { 'レンズ名': 'name', 'URL': 'url', '品番': 'product_number', '焦点距離': 'focal_length', '撮影距離範囲': 'min_focus_distance', '手ブレ補正': 'has_image_stabilization', 'フィルター径': 'filter_diameter', '防塵・防滴': 'is_drip_proof', '最大撮影倍率': 'max_photographing_magnification', '最大径×全長': 'overall_size', '質量': 'weight', 'メーカー希望小売価格': 'price' }, [ 'レンズ構成', 'マウント', '絞り羽根 / 形状', '開放絞り', '最小絞り', ]) df2['mount'] = 'ライカL' # 結合 df = pandas.concat([df1, df2]) # 変換用に整形 df['maker'] = 'Panasonic' # focal_length w, t = extract_numbers(df['focal_length'], [r'(\d+)mm~(\d+)mm', r'(\d+)-(\d+)mm'], [r'(\d+)mm']) df['wide_focal_length'] = [int(x) for x in w] df['telephoto_focal_length'] = [int(x) for x in t] del df['focal_length'] # f_number w, t = extract_numbers(df['name'], [r'F(\d+\.?\d*)-(\d+\.?\d*)'], [r'F(\d+\.?\d*)']) df['wide_f_number'] = [float(x) for x in w] df['telephoto_f_number'] = [float(x) for x in t] # min_focus_distance w, t = extract_numbers( df['min_focus_distance'], [r'(\d+\.?\d+)m / (\d+\.?\d+)m', r'(\d+\.?\d+)m~∞.*(\d+\.?\d+)m~∞'], [r'(\d+\.?\d+)m', r'(\d+\.?\d+)m~∞']) df['wide_min_focus_distance'] = [int(Decimal(x).scaleb(3)) for x in w] df['telephoto_min_focus_distance'] = [int(Decimal(x).scaleb(3)) for x in t] del df['min_focus_distance'] # max_photographing_magnification m: List[float] = [] for record in df.to_records(): temp = record['max_photographing_magnification'].replace('倍', '') if record['mount'] == 'マイクロフォーサーズ': m.append(float(Decimal(temp) * 2)) else: m.append(float(temp)) df['max_photographing_magnification'] = m # filter_diameter filter_diameter: List[float] = [] for f in df['filter_diameter']: result = regex(f, r'(\d+.?\d*)mm') if len(result) > 0: filter_diameter.append(float(result[0])) else: filter_diameter.append(-1) df['filter_diameter'] = filter_diameter # is_drip_proof df['is_drip_proof'] = df['is_drip_proof'].map(lambda x: x == '○') # has_image_stabilization df['has_image_stabilization'] = df['has_image_stabilization'].map( lambda x: x != '-') # is_inner_zoom i: List[bool] = [] for record in df.to_records(): if record['wide_focal_length'] == record['telephoto_focal_length']: i.append(True) continue if record['product_number'] in [ 'H-F007014', 'H-E08018', 'H-PS45175', 'S-E70200', 'S-R70200' ]: i.append(True) continue i.append(False) df['is_inner_zoom'] = i # overall_diameter, overall_length d, le = extract_numbers(df['overall_size'], [r'(\d+\.?\d*)mm[^\d]*(\d+\.?\d*)mm'], []) df['overall_diameter'] = [float(x) for x in d] df['overall_length'] = [float(x) for x in le] del df['overall_size'] # weight weight: List[float] = [] for f in df['weight']: result = regex(f, r'([\d,]+)g') if len(result) > 0: weight.append(int(result[0].replace(',', ''))) else: weight.append(-1) df['weight'] = weight # price price: List[float] = [] for f in df['price']: result = regex(f, r'([\d,]+) *円') if len(result) > 0: price.append(int(result[0].replace(',', ''))) else: price.append(-1) df['price'] = price return df
def get_leica_lens_list(scraping: IScrapingService) -> DataFrame: # レンズのURL一覧を取得する lens_list: List[Tuple[str, str]] = [] page_index = 0 while True: page_url = f'https://leica-camera.com/en-US/photography/lenses/sl?field_pim_categories=&page={page_index}' page = scraping.get_page(page_url, cache=False) article_elements = page.find_all('article.content-teasers-item') if len(article_elements) == 0: break page_index += 1 for article_element in article_elements: lens_name = article_element.find('div.field--name-external-field-main-product-title' ).full_text if 'SL' not in lens_name: continue if 'Leica' not in lens_name: continue if 'hood' in lens_name: continue lens_url = 'https://leica-camera.com' + article_element.find('a.node-link').attrs['href'] lens_list.append((lens_name, lens_url)) # レンズの生情報を取得する lens_raw_data_list: List[Dict[str, any]] = [] for lens_name, lens_url in lens_list: page = scraping.get_page(lens_url) temp: Dict[str, str] = {'レンズ名': lens_name, 'URL': lens_url} for tr_element in page.find_all('tr'): th_elements = tr_element.find_all('th') td_elements = tr_element.find_all('td') if len(td_elements) >= 2: temp[td_elements[0].text] = td_elements[1].text elif len(th_elements) >= 1 and len(td_elements) >= 1: temp[th_elements[0].text] = td_elements[0].text else: continue lens_raw_data_list.append(temp) df = DataFrame.from_records(lens_raw_data_list) # 変換用に整形 df['maker'] = 'LEICA' df['mount'] = 'ライカL' df = convert_columns(df, { 'レンズ名': 'name', 'URL': 'url', 'Order Number': 'Order number', 'Largest scale': 'Largest reproduction ratio', 'Filter thread': 'Filter mount', 'Length': 'Length to bayonet mount', 'Diameter': 'Largest diameter', 'Black, anodized': 'Order number', 'Length to bayonet flange': 'Length to bayonet mount', 'Focus range': 'Working range', }, [ 'Field angle (diagonal, horizontal, vertical)', 'Number of lenses/groups', 'Number of asph. surfaces / lenses', 'Entrance pupil position', 'Smallest object field', 'Setting/function', 'Aperture setting range', 'Lowest value', 'Bayonet/sensor format', 'View angle (diagonal/horizontal/vertical) Full-frame (24 × 36 mm)', 'Number of lenses/assemblies', 'Number of aspherical surfaces', 'Position of the entrance pupil before the bayonet', 'Setting', 'Setting/Function', 'Smallest aperture', 'Bayonet', 'Lens hood', 'Full-frame (24 × 36 mm)', 'Angle of view (diagonal, horizontal, vertical)', 'Number of elements/groups', 'Position of entrance pupil', 'Smallest value', 'Number of aspherical lenses', ]) # product_number df['product_number'] = df['Order number'].map(lambda x: str(x).replace(' ', '')) del df['Order number'] # wide_focal_length, telephoto_focal_length w, t = extract_numbers(df['name'], [r'SL (\d+)-(\d+)mm f', r'SL(\d+)-(\d+) f', r'SL (\d+)-(\d+) f'], [r'SL(\d+) f', r'SL (\d+) f', r'SL 1:\d+\.?\d*/(\d+)']) df['wide_focal_length'] = w df['telephoto_focal_length'] = t # wide_f_number, telephoto_f_number w, t = extract_numbers(df['name'], [r'f/(\d+\.?\d*)-(\d+\.?\d*)'], [r'f/(\d+\.?\d*)', r'SL 1:(\d+\.?\d*)/\d+']) df['wide_f_number'] = w df['telephoto_f_number'] = t # wide_min_focus_distance, telephoto_min_focus_distance w: List[int] = [] t: List[int] = [] for record in list(df['Working range'].values): match_result = regex(record, r'(\d+,\d*) m to infinity.+(\d+,\d*) m to infinity') if len(match_result) > 0: w.append(int(Decimal(match_result[0].replace(',', '.')).scaleb(3))) t.append(int(Decimal(match_result[1].replace(',', '.')).scaleb(3))) continue match_result = regex(record, r'(\d+\.?\d*) m to infinity') if len(match_result) > 0: w.append(int(Decimal(match_result[0]).scaleb(3))) t.append(int(Decimal(match_result[0]).scaleb(3))) continue match_result = regex(record, r'∞ to (\d+\.?\d*) m') if len(match_result) > 0: w.append(int(Decimal(match_result[0]).scaleb(3))) t.append(int(Decimal(match_result[0]).scaleb(3))) continue match_result = regex(record, r'(\d+\.?\d*)mm to infinity') if len(match_result) > 0: w.append(int(match_result[0])) t.append(int(match_result[0])) continue w.append(0) t.append(0) df['wide_min_focus_distance'] = w df['telephoto_min_focus_distance'] = t del df['Working range'] # max_photographing_magnification m: List[float] = [] for record in df.iterrows(): series: Series = record[1] denominator = regex(series['Largest reproduction ratio'].replace(',', '.'), r'1:(\d+\.?\d*)') m.append(float((Decimal('1') / Decimal(denominator[0])).quantize(Decimal('0.01')))) df['max_photographing_magnification'] = m del df['Largest reproduction ratio'] # filter_diameter df['filter_diameter'] = df['Filter mount'].map(lambda x: int(str(x).replace('E', ''))) del df['Filter mount'] # is_drip_proof, has_image_stabilization, is_inner_zoom is_drip_proof = [] has_image_stabilization = [] is_inner_zoom = [] for record in df.iterrows(): record = record[1] is_drip_proof.append(False) if record['O.I.S. Performance as per CIPA']: has_image_stabilization.append(True) else: has_image_stabilization.append(False) if record['name'] in ['Leica APO-Vario-Elmarit-SL 90-280 f/2.8-4'] or \ record['wide_focal_length'] == record['telephoto_focal_length']: is_inner_zoom.append(True) else: is_inner_zoom.append(False) df['is_drip_proof'] = is_drip_proof df['has_image_stabilization'] = has_image_stabilization df['is_inner_zoom'] = is_inner_zoom del df['O.I.S. Performance as per CIPA'] # overall_diameter, overall_length overall_diameter = [] overall_length = [] for record in df.iterrows(): record = record[1] if '/' in record['Largest diameter']: diameter = regex(record['Largest diameter'].replace('\u2009', ' '), r'(\d+\.?\d*)/\d+ mm') elif ':' in record['Largest diameter']: diameter = regex(record['Largest diameter'].replace('\u2009', ' '), r': (\d+\.?\d*) mm') else: diameter = regex(record['Largest diameter'].replace('\u2009', ' '), r'(\d+\.?\d*) mm') if '/' in record['Length to bayonet mount']: length = regex(record['Length to bayonet mount'].replace('\u2009', ' '), r'(\d+\.?\d*)/\d+ mm') elif ':' in record['Length to bayonet mount']: length = regex(record['Length to bayonet mount'].replace('\u2009', ' '), r': (\d+\.?\d*) mm') else: length = regex(record['Length to bayonet mount'].replace('\u2009', ' '), r'(\d+\.?\d*) mm') overall_diameter.append(float(diameter[0])) overall_length.append(float(length[0])) df['overall_diameter'] = overall_diameter df['overall_length'] = overall_length del df['Largest diameter'] del df['Length to bayonet mount'] # weight weight: List[float] = [] for i in range(0, len(df)): f = df['Weight'].values[i].replace('\u2009', ' ') result = regex(f, r'([\d.]+) g') if len(result) > 0: result2 = regex(f, r'([\d.]+)/[\d.]+ g') if len(result2) > 0: weight.append(int(result2[0].replace('.', ''))) else: weight.append(int(result[0].replace('.', ''))) else: weight.append(int(f)) df['weight'] = weight del df['Weight'] df['price'] = 0 return df
def get_cosina_lens_list(scraping: IScrapingService) -> DataFrame: # レンズのURL一覧を取得する lens_list: List[Tuple[str, str]] = [] page = scraping.get_page( 'http://www.cosina.co.jp/seihin/voigtlander/mft-mount/index.html', encoding='cp932', cache=False) for a_element in page.find_all('td > a'): lens_name = a_element.find('img').attrs['alt'] lens_url = 'http://www.cosina.co.jp/seihin/voigtlander/mft-mount/' + a_element.attrs[ 'href'] if 'mm' in lens_name and 'mft' in a_element.attrs['href']: if lens_name not in lens_name_table: print(lens_name) raise Exception('未対応のレンズが含まれています') lens_list.append((lens_name_table[lens_name], lens_url)) # レンズの情報を取得する temp_list: List[Dict[str, any]] = [] for lens_name, lens_url in lens_list: page = scraping.get_page(lens_url, encoding='cp932') temp: Dict[str, str] = {'レンズ名': lens_name, 'URL': lens_url} for tr_element in page.find_all('tr'): td_elements = tr_element.find_all('td') if len(td_elements) < 2: continue if 'bgcolor' not in td_elements[0].attrs: continue if td_elements[0].full_text == '' or td_elements[1].full_text == '': continue temp[td_elements[0].full_text] = td_elements[1].full_text for h2_element in page.find_all('h2'): text = h2_element.text if '希望小売価格' in text: temp['希望小売価格'] = text.replace('\n', '') temp_list.append(temp) df = DataFrame.from_records(temp_list) # 変換用に整形 df['maker'] = 'COSINA' df['name'] = df['レンズ名'] del df['レンズ名'] df['product_number'] = '' w, t = extract_numbers(df['焦点距離'], [], [r'(\d+\.?\d*)mm']) df['wide_focal_length'] = [int(Decimal(x) * 2) for x in w] df['telephoto_focal_length'] = [int(Decimal(x) * 2) for x in w] del df['焦点距離'] del df['画角'] del df['レンズ構成'] w, t = extract_numbers(df['name'], [], [r'F(\d+\.?\d*)']) df['wide_f_number'] = [float(x) for x in w] df['telephoto_f_number'] = [float(x) for x in t] del df['口径比'] del df['最小絞り'] del df['絞り羽根枚数'] w, t = extract_numbers(df['最短撮影距離'], [], [r'(\d+\.?\d+)m']) df['wide_min_focus_distance'] = [int(Decimal(x).scaleb(3)) for x in w] df['telephoto_min_focus_distance'] = [int(Decimal(x).scaleb(3)) for x in t] del df['最短撮影距離'] m: List[str] = [] for record in df['最大撮影倍率']: m.append(regex(record, r'1(:|:)(\d+\.?\d*)')[1]) df['max_photographing_magnification'] = [ float(str((Decimal(1.0) / Decimal(x)).quantize(Decimal('0.01')))) for x in m ] del df['最大撮影倍率'] m: List[int] = [] for record in df['フィルターサイズ']: m.append(int(regex(record, r'(\d+)mm')[0])) df['filter_diameter'] = m del df['フィルターサイズ'] df['is_drip_proof'] = False df['has_image_stabilization'] = False df['is_inner_zoom'] = True di, le = extract_numbers(df['最大径×全長'], [r'φ(\d+\.?\d*)×(\d+\.?\d*)mm'], []) df['overall_diameter'] = di df['overall_length'] = le del df['最大径×全長'] weight, _ = extract_numbers(df['重量'], [], [r'(\d+)g']) df['weight'] = weight del df['重量'] price, _ = extract_numbers(df['希望小売価格'], [], [r'¥([\d,]+)']) df['price'] = [int(x.replace(',', '')) for x in price] del df['希望小売価格'] df['mount'] = 'マイクロフォーサーズ' df['url'] = df['URL'] del df['レンズフード'] del df['その他:'] del df['URL'] return df