def handle(self, *args, **kwargs): args = set(args) baseurl = 'http://db.netkeiba.com/' dr = webdriver.PhantomJS() if 'trainer' in args: path = '?pid=trainer_top' dr.get(baseurl + path) dr.find_element_by_id('check_01').click() dr.find_element_by_class_name('form_side_btn').click() dr.find_element_by_id('check_bel_1').click() dr.find_element_by_id('check_bel_2').click() dr.find_element_by_id('check_bel_3').click() dr.find_element_by_id('check_bel_4').click() # select = Select(dr.find_element_by_css_selector('select.field')) # select = Select(dr.find_element_by_css_selector('select.field')).select_by_value('100') dr.find_element_by_css_selector('input[value=検索]').click() trainers = [] n_p = 2 while BeautifulSoup(dr.page_source).find_all('table', class_='nk_tb_common race_table_01'): table = BeautifulSoup(dr.page_source).find_all('table', class_='nk_tb_common race_table_01')[0] trainers.append(table.find_all('tr')[2:]) try: dr.find_element_by_css_selector( 'div.pager > a[href="javascript:paging(\'{}\')"]'.format(n_p) ).click() n_p += 1 except: break time.sleep(1) for trainer in list(chain.from_iterable(trainers)): t_arr = trainer.find_all('td') table = self._parse_table(t_arr) defaults = table.copy() defaults.pop('name') Trainer.objects.update_or_create( name=table['name'], defaults=defaults ) elif 'jockey' in args: path = '?pid=jockey_top' dr.get(baseurl + path) dr.find_element_by_id('check_01').click() dr.find_element_by_class_name('form_side_btn').click() dr.find_element_by_id('check_bel_1').click() dr.find_element_by_id('check_bel_2').click() dr.find_element_by_id('check_bel_3').click() dr.find_element_by_id('check_bel_4').click() # select = Select(dr.find_element_by_css_selector('select.field')) # select = Select(dr.find_element_by_css_selector('select.field')).select_by_value('100') dr.find_element_by_css_selector('input[value=検索]').click() jockeys = [] n_p = 2 while BeautifulSoup(dr.page_source).find_all('table', class_='nk_tb_common race_table_01'): table = BeautifulSoup(dr.page_source).find_all('table', class_='nk_tb_common race_table_01')[0] jockeys.append(table.find_all('tr')[2:]) try: dr.find_element_by_css_selector( 'div.pager > a[href="javascript:paging(\'{}\')"]'.format(n_p) ).click() n_p += 1 except: break time.sleep(1) for jockey in list(chain.from_iterable(jockeys)): t_arr = jockey.find_all('td') table = self._parse_table(t_arr) defaults = table.copy() defaults.pop('name') Jockey.objects.update_or_create( name=table['name'], defaults=defaults ) elif 'race' in args: # for i in range(1, 11): for i in range(9, 11): path = '?pid=race_search_detail' dr.get(baseurl + path) dr.find_element_by_id('check_track_1').click() dr.find_element_by_id('check_track_2').click() print("race:jyo_num:{}".format(i)) dr.find_element_by_id('check_Jyo_{0:02d}'.format(i)).click() count_list = Select(dr.find_element_by_css_selector('select[name=list]')) count_list.options[2].click() dr.find_element_by_css_selector('input[value=検索]').click() races = [] n_p = 2 while BeautifulSoup(dr.page_source).find_all('table', class_='nk_tb_common race_table_01'): print("race:jyo_num{}\tnext_page_num:{}".format(i, n_p)) table = BeautifulSoup(dr.page_source).find_all('table', class_='nk_tb_common race_table_01')[0] races.append(table.find_all('tr')[1:]) try: dr.find_element_by_css_selector( 'div.pager > a[href="javascript:paging(\'{}\')"]'.format(n_p) ).click() n_p += 1 except: break time.sleep(1) dr.find_element_by_id('check_Jyo_{0:02d}'.format(i)).click() for race in list(chain.from_iterable(races)): t_arr = race.find_all('td') table = self._parse_race_table(t_arr) defaults = table.copy() defaults.pop('name') defaults.pop('date') try: Race.objects.update_or_create( name=table['name'], date=table['date'], defaults=defaults ) except Exception as e: print('race:{} error:{}'.format(race, e)) elif 'horse' in args: # for i in range(1, 8, 2): for i in range(7, 8, 2): path = '?pid=horse_top' dr.get(baseurl + path) dr.find_element_by_id('check_02').click() dr.find_element_by_class_name('form_side_btn').click() count_list = Select(dr.find_element_by_css_selector('select[name=list]')) count_list.options[2].click() under_age_list = Select(dr.find_element_by_css_selector('select[name=under_age]')) over_age_list = Select(dr.find_element_by_css_selector('select[name=over_age]')) under_age = i over_age = i + 1 print("horse:age_range:{}-{}".format(under_age, over_age)) under_age_list.options[under_age].click() over_age_list.options[over_age].click() dr.find_element_by_css_selector('input[value=検索]').click() horses = [] n_p = 2 while BeautifulSoup(dr.page_source).find_all('table', class_='nk_tb_common race_table_01'): print("horse:next_page_num:{}".format(n_p)) table = BeautifulSoup(dr.page_source).find_all('table', class_='nk_tb_common race_table_01')[0] horses.append(table.find_all('tr')[2:]) try: dr.find_element_by_css_selector( 'div.pager > a[href="javascript:paging(\'{}\')"]'.format(n_p) ).click() n_p += 1 except: break time.sleep(1) for horse in list(chain.from_iterable(horses)): t_arr = horse.find_all('td') table = self._parse_horse_table(t_arr) defaults = table.copy() defaults.pop('name') Horse.objects.update_or_create( name=table['name'], defaults=defaults ) elif 'raceresult' in args: for i in range(1, 12): path = '?pid=race_search_detail' dr.get(baseurl + path) dr.find_element_by_id('check_track_1').click() dr.find_element_by_id('check_track_2').click() print("raceresult:year:{}".format(i)) start_count_list = Select(dr.find_element_by_css_selector('select[name=start_year]')) end_count_list = Select(dr.find_element_by_css_selector('select[name=end_year]')) start_count_list.options[i].click() end_count_list.options[i].click() for num in range(1, 11): dr.find_element_by_id('check_Jyo_{0:02d}'.format(num)).click() count_list = Select(dr.find_element_by_css_selector('select[name=list]')) count_list.options[2].click() dr.find_element_by_css_selector('input[value=検索]').click() raceresults = [] n_p = 2 while BeautifulSoup(dr.page_source).find_all('table', class_='nk_tb_common race_table_01'): print("raceresult:year:{}\tnext_page_num:{}".format(i, n_p)) table = BeautifulSoup(dr.page_source).find_all('table', class_='nk_tb_common race_table_01')[0] r_arr = table.find_all('tr') for race in r_arr[1:]: css_selector = 'a[href="{}"]'.format( race.find_all('td')[4].find('a').attrs['href'] ) import ipdb;ipdb.set_trace() dr.find_element_by_css_selector(css_selector).click() raceresults.append(table.find_all('tr')[1:]) try: dr.find_element_by_css_selector( 'div.pager > a[href="javascript:paging(\'{}\')"]'.format(n_p) ).click() n_p += 1 except: break break time.sleep(1) for raceresult in list(chain.from_iterable(raceresults)): t_arr = raceresult.find_all('td') import ipdb;ipdb.set_trace() table = self._parse_race_table(t_arr) defaults = table.copy() defaults.pop('name') defaults.pop('date') try: Race.objects.update_or_create( name=table['name'], date=table['date'], defaults=defaults ) except Exception as e: print('race:{} error:{}'.format(raceresult, e))