def get_weights(self): st = xpath(self.doc, 'span', 'st', 'data-ending', fn='/text()') lb = xpath(self.doc, 'span', 'lb', 'data-ending', fn='/text()') wgt = [f'{s}-{l}' for s, l in zip(st, lb)] lbs = [int(s) * 14 + int(l) for s, l in zip(st, lb)] return wgt, lbs
def get_numbers(self): nums = xpath(self.doc, 'span', 'rp-horseTable__saddleClothNo', 'class', fn='/text()') return [num.strip('.') for num in nums]
def get_starting_prices(self): sps = xpath(self.doc, 'span', 'rp-horseTable__horse__price', 'class', fn='/text()') return [sp.replace('No Odds', '').strip() for sp in sps]
def get_draws(self): draws = xpath(self.doc, 'sup', 'rp-horseTable__pos__draw', 'class', fn='/text()') return [ draw.replace(u'\xa0', u' ').strip().strip("()") for draw in draws ]
def get_names_horse(self): horses = xpath(self.doc, 'a', 'link-horseName', fn='/text()') joined = [] for horse, nat in zip(horses, self.get_nationaliies()): joined.append(f"{self.clean(horse)} {nat}") return joined
def get_positions(self): positions = xpath(self.doc, 'span', 'text-horsePosition', fn='/text()') del positions[1::2] positions = [pos.strip() for pos in positions] if len(positions) > 0 and positions[0] == 'VOI': raise VoidRaceError(f'VoidRaceError: {self.url}') return positions
def get_positions(self): positions = xpath(self.doc, 'span', 'text-horsePosition', fn='/text()') del positions[1::2] positions = [pos.strip() for pos in positions] if positions[0] == 'VOI': raise VoidRaceError return positions
def get_distance_btn(self): btn = [] ovr_btn = [] for x in xpath(self.doc, 'span', 'rp-horseTable__pos__length', 'class'): distances = x.findall('span') if len(distances) == 2: if distances[0].text is None: btn.append('0') else: btn.append(distances[0].text) if distances[1].text is None: ovr_btn.append('0') else: ovr_btn.append(distances[1].text.strip('[]')) else: if distances[0].text is None: btn.append('0') ovr_btn.append('0') else: if distances[0].text == 'dht': btn.append(distances[0].text) try: ovr_btn.append(ovr_btn[-1]) except IndexError: ovr_btn.append(btn[-1]) else: btn.append(distances[0].text) ovr_btn.append(distances[0].text) try: btn = [self.distance_to_decimal(b) for b in btn] except AttributeError: print('btn error: ', self.url) sys.exit() ovr_btn = [self.distance_to_decimal(b) for b in ovr_btn] num_runners = len(self.runner_info['pos']) if len(ovr_btn) < num_runners: ovr_btn.extend(['' for x in range(num_runners - len(ovr_btn))]) if len(btn) < num_runners: btn.extend(['' for x in range(num_runners - len(btn))]) return ovr_btn, btn
def get_nationaliies(self): nats = xpath(self.doc, 'span', 'rp-horseTable__horse__country', 'class', fn='/text()') nationalities = [] for nat in nats: if nat.strip() == '': nationalities.append('(GB)') else: nationalities.append(nat.strip()) return nationalities
def get_race_urls_date(dates, region): urls = set() days = [f'https://www.racingpost.com/results/{d}' for d in dates] docs = asyncio.run(get_documents(days)) course_ids = {course[0] for course in courses(region)} for doc in docs: race_links = xpath(doc[1], 'a', 'link-listCourseNameLink') for race in race_links: if race.attrib['href'].split('/')[2] in course_ids: urls.add('https://www.racingpost.com' + race.attrib['href']) return sorted(list(urls))
def get_prizemoney(self): prizes = xpath(self.doc, 'div', 'text-prizeMoney', fn='/text()') prize = [p.strip().replace(",", '').replace('£', '') for p in prizes] pos = self.runner_info['pos'] try: del prize[0] [prize.append('') for i in range(len(pos) - len(prize))] except IndexError: prize = ['' for i in range(len(pos))] for i, p in enumerate(pos): if p == 'DSQ': prize.insert(i, '') prize.pop() return prize
def __init__(self, url, document, code, fields): self.url = url self.doc = document self.race_info = {} self.runner_info = {} url_split = self.url.split('/') self.race_info['code'] = code self.race_info['date'] = convert_date(url_split[6]) self.race_info['course'] = self.get_course(url_split[5]) self.race_info['course_id'] = url_split[4] self.race_info['region'] = get_region(url_split[4]) self.race_info['race_id'] = url_split[7] self.race_info['going'] = find(self.doc, 'span', 'rp-raceTimeCourseName_condition', property='class') self.race_info['surface'] = get_surface(self.race_info['going']) self.race_info['off'] = find(self.doc, 'span', 'text-raceTime') self.race_info['race_name'] = find(self.doc, 'h2', 'rp-raceTimeCourseName__title', property='class') self.race_info['class'] = find(self.doc, 'span', 'rp-raceTimeCourseName_class', property='class').strip('()') self.race_info['race_name'] = self.clean(self.race_info['race_name']) if self.race_info['class'] == '': self.race_info['class'] = self.get_race_class() self.race_info['pattern'] = self.get_race_pattern() self.race_info['race_name'] = self.clean_race_name( self.race_info['race_name']) self.race_info['age_band'], self.race_info[ 'rating_band'] = self.parse_race_bands() if self.race_info[ 'class'] == '' and self.race_info['rating_band'] != '': self.race_info['class'] = self.get_class_from_rating() self.race_info['sex_rest'] = self.sex_restricted() self.race_info['dist'], self.race_info['dist_y'],\ self.race_info['dist_f'], self.race_info['dist_m'] = self.get_race_distances() self.race_info['type'] = self.get_race_type() self.race_info['ran'] = self.get_num_runners() pedigree = Pedigree( xpath(self.doc, 'tr', 'block-pedigreeInfoFullResults', fn='/td')) self.runner_info['sire_id'] = pedigree.id_sires self.runner_info['sire'] = pedigree.sires self.runner_info['dam_id'] = pedigree.id_dams self.runner_info['dam'] = pedigree.dams self.runner_info['damsire_id'] = pedigree.id_damsires self.runner_info['damsire'] = pedigree.damsires self.runner_info['sex'] = self.get_sexs(pedigree.pedigrees) self.runner_info['comment'] = self.get_comments() self.runner_info['pos'] = self.get_positions() self.runner_info['prize'] = self.get_prizemoney() self.runner_info['draw'] = self.get_draws() self.runner_info['ovr_btn'], self.runner_info[ 'btn'] = self.get_distance_btn() self.runner_info['sp'] = self.get_starting_prices() self.runner_info['dec'] = self.get_decimal_odds() self.runner_info['num'] = self.get_numbers() if not self.race_info['ran']: self.race_info['ran'] = len(self.runner_info['num']) else: self.race_info['ran'] = int(self.race_info['ran']) self.runner_info['age'] = self.get_horse_ages() self.runner_info['horse'] = self.get_names_horse() self.runner_info['horse_id'] = self.get_ids_horse() self.runner_info['jockey'] = self.get_names_jockey() self.runner_info['jockey_id'] = self.get_ids_jockey() self.runner_info['trainer'] = self.get_names_trainer() self.runner_info['trainer_id'] = self.get_ids_trainer() self.runner_info['owner'] = self.get_names_owner() self.runner_info['owner_id'] = self.get_ids_owner() self.runner_info['hg'] = self.get_headgear() self.runner_info['wgt'], self.runner_info['lbs'] = self.get_weights() self.runner_info['or'] = xpath(self.doc, 'td', 'OR', 'data-ending', fn='/text()') self.runner_info['rpr'] = xpath(self.doc, 'td', 'RPR', 'data-ending', fn='/text()') self.runner_info['ts'] = xpath(self.doc, 'td', 'TS', 'data-ending', fn='/text()') self.runner_info['silk_url'] = xpath(self.doc, 'img', 'rp-horseTable__silk', 'class', fn='/@src') self.runner_info['time'] = self.get_finishing_times() self.runner_info['secs'] = self.time_to_seconds( self.runner_info['time']) self.clean_non_completions() self.csv_data = self.create_csv_data(fields)
def get_names_trainer(self): trainers = xpath(self.doc, 'a', 'link-trainerName', fn='/text()') return [self.clean(trainer.strip()) for trainer in trainers[::2][::2]]
def get_names_jockey(self): jockeys = xpath(self.doc, 'a', 'link-jockeyName', fn='/text()') return [self.clean(jock.strip()) for jock in jockeys[::2]]
def get_ids_trainer(self): trainer_ids = xpath(self.doc, 'a', 'link-trainerName', fn='/@href') return [trainer_id.split('/')[3] for trainer_id in trainer_ids[::2]]
def get_ids_jockey(self): jockey_ids = xpath(self.doc, 'a', 'link-jockeyName', fn='/@href') return [jockey_id.split('/')[3] for jockey_id in jockey_ids[::2]]
def get_ids_horse(self): horse_ids = xpath(self.doc, 'a', 'link-horseName', fn='/@href') return [horse_id.split('/')[3] for horse_id in horse_ids]
def get_horse_ages(self): ages = xpath(self.doc, 'td', 'horse-age', fn='/text()') return [age.strip() for age in ages]