def get_team_schedule(team, year): """ :param team: string :param year: string or int :return: pandas.DataFrame containing the schedule of the team for the given year """ base_url = "https://www.sports-reference.com/cbb/schools/" url = base_url + team + '/' + str(year) + '-schedule.html' r = get_request(url, headers={"User-Agent": "Mozilla/5.0"}) if r is None: return None soup = BeautifulSoup(r.text, 'lxml') for caption in soup.find_all('caption'): if caption.get_text() == 'Schedule and Results Table': table = caption.find_parent('table') data = parse_table(table) columns = get_table_header(table) df = pd.DataFrame(data, index=np.arange(1, len(data) + 1), columns=columns) return df
def get_schools_stats(year): """ :param year: string or int :return: pandas.DataFrame containing statistics for each team for a given year """ base_url = "https://www.sports-reference.com/cbb/seasons/" url = base_url + str(year) + '-school-stats.html' r = get_request(url, headers={"User-Agent": "Mozilla/5.0"}) if r is None: return None soup = BeautifulSoup(r.text, 'lxml') table = soup.find_all('table')[0] hrefs = table.find_all('a', href=True) link_names = [] for href in hrefs: link_names.append(href['href'].split('/')[3]) data = parse_table(table) columns = get_table_header(table, index=1) df = pd.DataFrame(data, index=np.arange(1, len(data) + 1), columns=columns) df['NCAA'] = [el.endswith('NCAA') for el in df[df.columns[0]]] df[df.columns[0]] = df[df.columns[0]].str.replace('NCAA', '').str.strip() df['Link names'] = link_names return df
def parse(self, msg): t = utils.parse_table(msg) for k, v in t.iteritems(): if k == 'frame': self.d['frame_string'] = v self.d['frame'] = frame.Frame.parse_from(v) elif k == 'deaths': self.d['deaths'] = utils.parse_list(v) else: self.d[k] = v
def parse_membership_fees(self): res = {} membership_fees = self.main_soup.select_one(".tab-content") tabs_ids = ["hot_desk", "dedicated_desk", "private_office"] for tab_id in tabs_ids: table = self.main_soup.select_one("#%s table" % tab_id) data = parse_table(table) res["tab_id"] = data return res
def test_cinder_lvs_volume_on_node(host): # get list of volumes and associated hosts from utility container cmd = "{} cinder list --all-t --fields os-vol-host-attr:host '".format( os_pre) vol_table = host.run(cmd).stdout vol_hosts = utils.parse_table(vol_table)[1] for vol, chost in vol_hosts: chost = chost.split('@')[0].split('.')[0] # VOLEXISTS test cmd = "{} {} lvs | grep volume-{}".format(ssh_pre, chost, vol) host.run_expect([0], cmd) cmd = "{} cinder snapshot-list --all- --volume-id={} '".format( os_pre, vol) snap_table = host.run(cmd).stdout snaps = utils.parse_table(snap_table)[1] for snap in snaps: # SNAPEXISTS test snap_vol = snap[1] cmd = "{} {} lvs | grep _snapshot-{}".format( ssh_pre, chost, snap_vol) host.run_expect([0], cmd)
def team_extractor(local=False): soup = get_html_soup(LOCAL_PATH, LINK, local) con = soup.find(id='pageContent').find(attrs={'class': 'content'}) tables = con.find_all('table') data = [] for t in tables: data.extend(parse_table(t)) teams = {} for team in data: if len(team) > 0: teams[normalize(team[-4])] = [team[-1], team[-2], team[-3]] return teams
def team_rate_extractor(local=False): soup = get_html_soup(LOCAL_PATH, LINK, local) con = soup.find(id='pageContent').find(attrs={'class': 'content'}) tables = con.find_all('table') data = [] for t in tables: data.extend(parse_table(t)) teamrating = [] for team in data: if len(team) > 0: if len(team) >= 5: teamrating.append([normalize(team[-5]), team[-1]]) else: teamrating.append([normalize(team[-2]), team[-1]]) teamrating = sorted(teamrating, key=lambda x: float(x[1]), reverse=True) teamrating = list(zip(*teamrating))[0] return teamrating
def test_cinder_verify_attach(host): # get list of volumes and server attatchments from utility container cmd = "{} cinder list --all-t '".format(os_pre) vol_table = host.run(cmd).stdout vol_list = utils.parse_table(vol_table)[1] for vol in vol_list: vol_id = vol[0] attach_id = vol[7] if not attach_id: continue cmd1 = "{} openstack server show {} -f json '".format( os_pre, attach_id) res = host.run(cmd1) server = json.loads(res.stdout) hypervisor = server['OS-EXT-SRV-ATTR:hypervisor_hostname'].split( '.')[0] instance_name = server['OS-EXT-SRV-ATTR:instance_name'] # ATTATCHED test cmd2 = "{} {} virsh dumpxml {} | grep {}".format( ssh_pre, hypervisor, instance_name, vol_id) host.run_expect([0], cmd2)
def get_ken_pomeroys_rating(year): """ :param year: string or int :return: pandas.DataFrame containing Ken Pomeroy’s ratings for a given year """ base_url = "https://kenpom.com/index.php?y=" url = base_url + str(year) r = get_request( url, headers={ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/7046A194A" }) if r is None: return None soup = BeautifulSoup(r.text, 'lxml') table = soup.find_all('table')[0] data = parse_table(table) columns = get_table_header(table, index=1) data = np.array(data) cleaned_data = [] for i in data: mask = [1, 2, 3, 4, 5, 7, 9, 11, 13, 15, 17, 19] cleaned_data.append(i[mask]) df = pd.DataFrame(cleaned_data, index=np.arange(1, len(data) + 1), columns=columns) df[df.columns[0]] = df[df.columns[0]].str.strip('0123456789 ') return df
def get_espn_bpi(year): """ :param year: string or int :return: pandas.DataFrame """ base_url = "http://www.espn.com/mens-college-basketball/bpi/_/view/bpi/season/" data = [] for i in range(1, 9): url = base_url + str(year) + '/page/' + str(i) r = get_request( url, headers={ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/7046A194A" }) if r is None: return data soup = BeautifulSoup(r.text, 'lxml') try: table = soup.find_all('table')[1] except: break data.extend(parse_table(table)) columns = ['ranking'] + get_table_header(table, index=0) df = pd.DataFrame(data, columns=columns) return df
import inspect from pprint import pprint from itertools import product from config import * import ROOT as R from ROOT import RooFit as RF R.RooRandom.randomGenerator().SetSeed(54321) import shapes import utils local_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) efficiencies = utils.parse_table("../efficiencies/Efficiencies.txt",nBins_Y, nBins_PT)[0] efficiencies_err = utils.parse_table("../efficiencies/Efficiencies.txt",nBins_Y, nBins_PT)[1] yields = utils.parse_table("../fitting/Signal_Yields.txt",nBins_Y, nBins_PT)[0] yields_err = utils.parse_table("../fitting/Signal_Yields.txt",nBins_Y, nBins_PT)[1] yield_distr = R.TH2F("yield_distr", "Yield distribution", nBins_Y, x_Y_LL , x_Y_UL , nBins_PT, x_PT_LL , x_PT_UL ) yield_distr.GetXaxis().SetTitle("#eta") yield_distr.GetYaxis().SetTitle("P^{T} [GeV/c]") yield_distr_table = [[0 for x in xrange(nBins_Y)] for x in xrange(nBins_PT)] yield_distr_table_err = [[0 for x in xrange(nBins_Y)] for x in xrange(nBins_PT)] for i, j in product(range(nBins_Y), range(nBins_PT)): yield_distr_table[i][j]=float(yields[i][j])/float(efficiencies[i][j]) yield_distr_table_err[i][j] = float(yields[i][j])/float(efficiencies[i][j])*R.TMath.sqrt((float(efficiencies_err[i][j])/float(efficiencies[i][j]))**2+(float(yields_err[i][j])/float(yields[i][j]))**2) yield_distr.SetBinContent(i+1, j+1, yield_distr_table[i][j])
def get_list(self): out = subprocess.check_output([self._binpath, 'list']) return parse_table(out)
import inspect from pprint import pprint from itertools import product from config import * import ROOT as R from ROOT import RooFit as RF R.RooRandom.randomGenerator().SetSeed(54321) import shapes import utils local_dir = os.path.dirname( os.path.abspath(inspect.getfile(inspect.currentframe()))) efficiencies = utils.parse_table("../efficiencies/Efficiencies.txt", nBins_Y, nBins_PT)[0] efficiencies_err = utils.parse_table("../efficiencies/Efficiencies.txt", nBins_Y, nBins_PT)[1] yields = utils.parse_table("../fitting/Signal_Yields.txt", nBins_Y, nBins_PT)[0] yields_err = utils.parse_table("../fitting/Signal_Yields.txt", nBins_Y, nBins_PT)[1] yield_distr = R.TH2F("yield_distr", "Yield distribution", nBins_Y, x_Y_LL, x_Y_UL, nBins_PT, x_PT_LL, x_PT_UL) yield_distr.GetXaxis().SetTitle("#eta") yield_distr.GetYaxis().SetTitle("P^{T} [GeV/c]") yield_distr_table = [[0 for x in xrange(nBins_Y)] for x in xrange(nBins_PT)] yield_distr_table_err = [[0 for x in xrange(nBins_Y)] for x in xrange(nBins_PT)]