def _fill_meta(self, doc): def team_scr(doc, t): xp = "".join(['//table[@id="', t, '"]']) team = doc.xpath(xp)[0] team = [ s for s in team.xpath(".//text()") if s.lower() != t.lower() and "\r\n" not in s and "game" not in s.lower() ] return team final = {} final["away"], at = tuple(team_scr(doc, "Visitor")) final["home"], ht = tuple(team_scr(doc, "Home")) # clean team names away = TP.team_name_parser(at) home = TP.team_name_parser(ht) game_info = doc.xpath('//table[@id="GameInfo"]')[0].xpath(".//text()") game_info = "; ".join(s.strip() for s in game_info if s.strip() != "") att = re.findall(r"(?<=[aA]ttendance\s)(\d*\,?\d*)", game_info) att = int(att[0].replace(",", "")) if att else 0 date = re.findall(r"\w+\,?\s\w+\s\d+\,?\s\d+", game_info) date = date[0] if date else "" loc = re.findall(r"(?<=at\W)([^\;]*)", game_info) loc = loc[0] if loc else "" return {"home": home, "away": away, "final": final, "attendance": att, "date": date, "location": loc}
def _fill_meta(self, doc): def team_scr(doc, t): xp = ''.join(['//table[@id="', t, '"]']) team = doc.xpath(xp)[0] team = [s for s in team.xpath('.//text()') if s.lower() != t.lower() and '\r\n' not in s and 'game' not in s.lower()] return team final = { } final['away'], at = tuple(team_scr(doc, 'Visitor')) final['home'], ht = tuple(team_scr(doc, 'Home')) away = TP.team_name_parser(at) home = TP.team_name_parser(ht) return { 'home': home, 'away': away, 'final': final }
def _fill_meta(self, doc): def team_scr(doc, t): xp = ''.join(['//table[@id="', t, '"]']) team = doc.xpath(xp)[0] team = [ s for s in team.xpath('.//text()') if s.lower() != t.lower() and '\r\n' not in s and 'game' not in s.lower() ] return team final = {} final['away'], at = tuple(team_scr(doc, 'Visitor')) final['home'], ht = tuple(team_scr(doc, 'Home')) # clean team names away = TP.team_name_parser(at) home = TP.team_name_parser(ht) game_info = doc.xpath('//table[@id="GameInfo"]')[0].xpath('.//text()') game_info = '; '.join(s.strip() for s in game_info if s.strip() != '') att = re.findall(r'(?<=[aA]ttendance\s)(\d*\,?\d*)', game_info) att = int(att[0].replace(',', '')) if att else 0 date = re.findall(r'\w+\,?\s\w+\s\d+\,?\s\d+', game_info) date = date[0] if date else '' loc = re.findall(r'(?<=at\W)([^\;]*)', game_info) loc = loc[0] if loc else '' return { 'home': home, 'away': away, 'final': final, 'attendance': att, 'date': date, 'location': loc }
def _fill_meta(self, doc): def team_scr(doc, t): xp = ''.join(['//table[@id="', t, '"]']) team = doc.xpath(xp)[0] team = [s for s in team.xpath('.//text()') if s.lower() != t.lower() and '\r\n' not in s and 'game' not in s.lower()] return team final = { } final['away'], at = tuple(team_scr(doc, 'Visitor')) final['home'], ht = tuple(team_scr(doc, 'Home')) # clean team names away = TP.team_name_parser(at) home = TP.team_name_parser(ht) game_info = doc.xpath('//table[@id="GameInfo"]')[0].xpath('.//text()') game_info = '; '.join(s.strip() for s in game_info if s.strip() != '') att = re.findall(r'(?<=[aA]ttendance\s)(\d*\,?\d*)', game_info) att = int(att[0].replace(',','')) if att else 0 date = re.findall(r'\w+\,?\s\w+\s\d+\,?\s\d+', game_info) date = date[0] if date else '' loc = re.findall(r'(?<=at\W)([^\;]*)', game_info) loc = loc[0] if loc else '' return { 'home': home, 'away': away, 'final': final, 'attendance': att, 'date': date, 'location': loc }