Ejemplo n.º 1
0
    def handle(self, *args, **kwargs):
        args = set(args)
        baseurl = 'http://db.netkeiba.com/'
        dr = webdriver.PhantomJS()
        if 'trainer' in args:
            path = '?pid=trainer_top'
            dr.get(baseurl + path)
            dr.find_element_by_id('check_01').click()
            dr.find_element_by_class_name('form_side_btn').click()
            dr.find_element_by_id('check_bel_1').click()
            dr.find_element_by_id('check_bel_2').click()
            dr.find_element_by_id('check_bel_3').click()
            dr.find_element_by_id('check_bel_4').click()
            # select = Select(dr.find_element_by_css_selector('select.field'))
            # select = Select(dr.find_element_by_css_selector('select.field')).select_by_value('100')
            dr.find_element_by_css_selector('input[value=検索]').click()
            trainers = []
            n_p = 2
            while BeautifulSoup(dr.page_source).find_all('table', class_='nk_tb_common race_table_01'):
                table = BeautifulSoup(dr.page_source).find_all('table', class_='nk_tb_common race_table_01')[0]
                trainers.append(table.find_all('tr')[2:])
                try:
                    dr.find_element_by_css_selector(
                        'div.pager > a[href="javascript:paging(\'{}\')"]'.format(n_p)
                    ).click()
                    n_p += 1
                except:
                    break
                time.sleep(1)
            for trainer in list(chain.from_iterable(trainers)):
                t_arr = trainer.find_all('td')
                table = self._parse_table(t_arr)
                defaults = table.copy()
                defaults.pop('name')
                Trainer.objects.update_or_create(
                    name=table['name'],
                    defaults=defaults
                )

        elif 'jockey' in args:
            path = '?pid=jockey_top'
            dr.get(baseurl + path)
            dr.find_element_by_id('check_01').click()
            dr.find_element_by_class_name('form_side_btn').click()
            dr.find_element_by_id('check_bel_1').click()
            dr.find_element_by_id('check_bel_2').click()
            dr.find_element_by_id('check_bel_3').click()
            dr.find_element_by_id('check_bel_4').click()
            # select = Select(dr.find_element_by_css_selector('select.field'))
            # select = Select(dr.find_element_by_css_selector('select.field')).select_by_value('100')
            dr.find_element_by_css_selector('input[value=検索]').click()
            jockeys = []
            n_p = 2
            while BeautifulSoup(dr.page_source).find_all('table', class_='nk_tb_common race_table_01'):
                table = BeautifulSoup(dr.page_source).find_all('table', class_='nk_tb_common race_table_01')[0]
                jockeys.append(table.find_all('tr')[2:])
                try:
                    dr.find_element_by_css_selector(
                        'div.pager > a[href="javascript:paging(\'{}\')"]'.format(n_p)
                    ).click()
                    n_p += 1
                except:
                    break
                time.sleep(1)
            for jockey in list(chain.from_iterable(jockeys)):
                t_arr = jockey.find_all('td')
                table = self._parse_table(t_arr)
                defaults = table.copy()
                defaults.pop('name')
                Jockey.objects.update_or_create(
                    name=table['name'],
                    defaults=defaults
                )

        elif 'race' in args:
            # for i in range(1, 11):
            for i in range(9, 11):
                path = '?pid=race_search_detail'
                dr.get(baseurl + path)
                dr.find_element_by_id('check_track_1').click()
                dr.find_element_by_id('check_track_2').click()
                print("race:jyo_num:{}".format(i))
                dr.find_element_by_id('check_Jyo_{0:02d}'.format(i)).click()
                count_list = Select(dr.find_element_by_css_selector('select[name=list]'))
                count_list.options[2].click()
                dr.find_element_by_css_selector('input[value=検索]').click()
                races = []
                n_p = 2
                while BeautifulSoup(dr.page_source).find_all('table', class_='nk_tb_common race_table_01'):
                    print("race:jyo_num{}\tnext_page_num:{}".format(i, n_p))
                    table = BeautifulSoup(dr.page_source).find_all('table', class_='nk_tb_common race_table_01')[0]
                    races.append(table.find_all('tr')[1:])
                    try:
                        dr.find_element_by_css_selector(
                            'div.pager > a[href="javascript:paging(\'{}\')"]'.format(n_p)
                        ).click()
                        n_p += 1
                    except:
                        break
                    time.sleep(1)
                dr.find_element_by_id('check_Jyo_{0:02d}'.format(i)).click()
                for race in list(chain.from_iterable(races)):
                    t_arr = race.find_all('td')
                    table = self._parse_race_table(t_arr)
                    defaults = table.copy()
                    defaults.pop('name')
                    defaults.pop('date')
                    try:
                        Race.objects.update_or_create(
                            name=table['name'],
                            date=table['date'],
                            defaults=defaults
                        )
                    except Exception as e:
                        print('race:{} error:{}'.format(race, e))


        elif 'horse' in args:
            # for i in range(1, 8, 2):
            for i in range(7, 8, 2):
                path = '?pid=horse_top'
                dr.get(baseurl + path)
                dr.find_element_by_id('check_02').click()
                dr.find_element_by_class_name('form_side_btn').click()
                count_list = Select(dr.find_element_by_css_selector('select[name=list]'))
                count_list.options[2].click()
                under_age_list = Select(dr.find_element_by_css_selector('select[name=under_age]'))
                over_age_list = Select(dr.find_element_by_css_selector('select[name=over_age]'))
                under_age = i
                over_age = i + 1
                print("horse:age_range:{}-{}".format(under_age, over_age))
                under_age_list.options[under_age].click()
                over_age_list.options[over_age].click()
                dr.find_element_by_css_selector('input[value=検索]').click()
                horses = []
                n_p = 2
                while BeautifulSoup(dr.page_source).find_all('table', class_='nk_tb_common race_table_01'):
                    print("horse:next_page_num:{}".format(n_p))
                    table = BeautifulSoup(dr.page_source).find_all('table', class_='nk_tb_common race_table_01')[0]
                    horses.append(table.find_all('tr')[2:])
                    try:
                        dr.find_element_by_css_selector(
                            'div.pager > a[href="javascript:paging(\'{}\')"]'.format(n_p)
                        ).click()
                        n_p += 1
                    except:
                        break
                    time.sleep(1)
                for horse in list(chain.from_iterable(horses)):
                    t_arr = horse.find_all('td')
                    table = self._parse_horse_table(t_arr)
                    defaults = table.copy()
                    defaults.pop('name')
                    Horse.objects.update_or_create(
                        name=table['name'],
                        defaults=defaults
                    )

        elif 'raceresult' in args:
            for i in range(1, 12):
                path = '?pid=race_search_detail'
                dr.get(baseurl + path)
                dr.find_element_by_id('check_track_1').click()
                dr.find_element_by_id('check_track_2').click()
                print("raceresult:year:{}".format(i))
                start_count_list = Select(dr.find_element_by_css_selector('select[name=start_year]'))
                end_count_list = Select(dr.find_element_by_css_selector('select[name=end_year]'))
                start_count_list.options[i].click()
                end_count_list.options[i].click()
                for num in range(1, 11):
                    dr.find_element_by_id('check_Jyo_{0:02d}'.format(num)).click()
                count_list = Select(dr.find_element_by_css_selector('select[name=list]'))
                count_list.options[2].click()
                dr.find_element_by_css_selector('input[value=検索]').click()
                raceresults = []
                n_p = 2
                while BeautifulSoup(dr.page_source).find_all('table', class_='nk_tb_common race_table_01'):
                    print("raceresult:year:{}\tnext_page_num:{}".format(i, n_p))
                    table = BeautifulSoup(dr.page_source).find_all('table', class_='nk_tb_common race_table_01')[0]
                    r_arr = table.find_all('tr')
                    for race in r_arr[1:]:
                        css_selector = 'a[href="{}"]'.format(
                            race.find_all('td')[4].find('a').attrs['href']
                        )
                        import ipdb;ipdb.set_trace()
                        dr.find_element_by_css_selector(css_selector).click()
                        raceresults.append(table.find_all('tr')[1:])
                    try:
                        dr.find_element_by_css_selector(
                            'div.pager > a[href="javascript:paging(\'{}\')"]'.format(n_p)
                        ).click()
                        n_p += 1
                    except:
                        break
                    break
                    time.sleep(1)
                for raceresult in list(chain.from_iterable(raceresults)):
                    t_arr = raceresult.find_all('td')
                    import ipdb;ipdb.set_trace()
                    table = self._parse_race_table(t_arr)
                    defaults = table.copy()
                    defaults.pop('name')
                    defaults.pop('date')
                    try:
                        Race.objects.update_or_create(
                            name=table['name'],
                            date=table['date'],
                            defaults=defaults
                        )
                    except Exception as e:
                        print('race:{} error:{}'.format(raceresult, e))