def raw_symm_cleaner(symm_directory): clean_list = [] for file in os.listdir(symm_directory): # Read in the test file dataset = pd.read_csv(file, header=9) dataset = pd.DataFrame(data=dataset) # Change data types to correct ones dataset['Date'] = pd.to_datetime(dataset['Date'], format='%m/%d/%Y') # Remove last column dataset = dataset.iloc[:, :-1] clean_list.append(dataset) print(file) # Concatenate all raw files into a single .csv file with clean data big_clean_list = pd.concat(clean_list) if not os.path.exists(SYMM_LIST): big_clean_list.to_csv(SYMM_LIST, index=False, mode='w') # Set the file permissions so that anyone can use it oschmod.set_mode(SYMM_LIST, stat.S_IRWXO or stat.S_IRWXU or stat.S_IRWXG)
def _create_relay_info_file(self): relay_info_dir = self.credentials_folder relay_info_filename = None if not os.path.isdir(relay_info_dir): os.makedirs(relay_info_dir) if self.vm_name and self.resource_group_name: relay_info_filename = self.resource_group_name + "-" + self.vm_name + "-relay_info" relay_info_path = os.path.join(relay_info_dir, relay_info_filename) # Overwrite relay_info if it already exists in that folder. file_utils.delete_file(relay_info_path, f"{relay_info_path} already exists, and couldn't be overwritten.") file_utils.write_to_file(relay_info_path, 'w', connectivity_utils.format_relay_info_string(self.relay_info), f"Couldn't write relay information to file {relay_info_path}.", 'utf-8') oschmod.set_mode(relay_info_path, 0o644) # pylint: disable=broad-except try: expiration = datetime.datetime.fromtimestamp(self.relay_info.expires_on) expiration = expiration.strftime("%Y-%m-%d %I:%M:%S %p") colorama.init() print(Fore.GREEN + f"Generated relay information {relay_info_path} is valid until {expiration} " "in local time." + Style.RESET_ALL) except Exception as e: logger.warning("Couldn't determine relay information expiration. Error: %s", str(e)) return relay_info_path
def raw_accel_cleaner(accel_directory): clean_list = [] for file in os.listdir(accel_directory): # Read in the test file dataset = pd.read_csv(file, header=9) dataset = pd.DataFrame(data=dataset) # Change data types to correct ones dataset['Date'] = pd.to_datetime(dataset['Date'], format='%m/%d/%Y') dataset['Total Duration'] = pd.to_datetime(dataset['Total Duration'], format='%H:%M:%S') # Remove last column dataset = dataset.iloc[:, :-1] # Change names of Acceleration Bands dataset["Medium Decelerations"] = dataset[ "Acceleration Band 3 Total Effort Count"] dataset = dataset.drop( columns=['Acceleration Band 3 Total Effort Count'], axis=1) dataset["Medium Accelerations"] = dataset[ "Acceleration Band 6 Total Effort Count"] dataset = dataset.drop( columns=['Acceleration Band 6 Total Effort Count'], axis=1) dataset["High Accelerations"] = dataset[ "Acceleration Band 7 Total Effort Count"] + dataset[ "Acceleration Band 8 Total Effort Count"] dataset = dataset.drop(columns=[ 'Acceleration Band 7 Total Effort Count', 'Acceleration Band 8 Total Effort Count' ], axis=1) dataset["High Decelerations"] = dataset[ "Acceleration Band 1 Total Effort Count"] + dataset[ "Acceleration Band 2 Total Effort Count"] dataset = dataset.drop(columns=[ 'Acceleration Band 1 Total Effort Count', 'Acceleration Band 2 Total Effort Count' ], axis=1) clean_list.append(dataset) print(file) # Concatenate all raw files into a single .csv file with clean data big_clean_list = pd.concat(clean_list) if not os.path.exists(ACCEL_LIST): big_clean_list.to_csv(ACCEL_LIST, index=False, mode='w') # Set the file permissions so that anyone can use it oschmod.set_mode(ACCEL_LIST, stat.S_IRWXO or stat.S_IRWXU or stat.S_IRWXG)
def generate_grobid(overwrite=False): ''' Convert a pdf to a .tei.xml file via Grobid ''' base = 'https://github.com/kermitt2/grobid/' # get latest Grobid release version = requests.get(base + 'releases/latest').url.split('/')[-1] if not os.path.exists(f'./cache/grobid-{version}'): print('\nInstalling Grobid!') try: print('Downloading and extracting...') zip_path, _ = urllib.request.urlretrieve( f'{base}archive/refs/tags/{version}.zip') with zipfile.ZipFile(zip_path, 'r') as f: f.extractall('./cache') print('Installing...') oschmod.set_mode(f'./cache/grobid-{version}/gradlew', '+x') subprocess.run( f'cd ./cache/grobid-{version} ' '&& ./gradlew clean install', shell=True) exec_dir = f'./cache/grobid-{version}/grobid-home/' for folder in [exec_dir + 'pdf2xml', exec_dir + 'pdfalto']: for root, _, files in os.walk(folder): for f in files: oschmod.set_mode(os.path.join(root, f), '+x') except Exception as e: print(e) print('\nFailed to install Grobid!') print('\nConverting PDFs to XMLs via Grobid - this may take some time...') # Kill untracked server if exists subprocess.run(['./gradlew', '--stop'], cwd=f'./cache/grobid-{version}', stderr=subprocess.DEVNULL) p = subprocess.Popen(['./gradlew', 'run'], cwd=f'./cache/grobid-{version}', stdout=subprocess.DEVNULL) for _ in tqdm(range(20), desc='Initiating Grobid server'): time.sleep(1) # wait for Grodid to run, might need to be longer if overwrite: shutil.rmtree('./cache/xml') client = GrobidClient(config_path='./resources/config.json') client.process('processFulltextDocument', pdf_src, tei_coordinates=False, output=xml_src, force=overwrite) p.terminate()
def test_safe_write(self, fixed_lines, expec_code, expec_newlines, expec_err, chmod): with pytest.raises(expec_err): with sysu.reopenable_temp_file("".join(fixed_lines)) as tmp_path: set_mode(str(tmp_path), chmod) iou.safe_write(tmp_path, fixed_lines, "utf-8", expec_newlines) with open(tmp_path) as tmp: assert tmp.read() == expec_code with open(tmp_path, "rb") as tmp: assert expec_newlines.encode() in tmp.readline() raise sysu.Pass()
def make_log_dir(log_dir): """ Create logging directory if it does not exist. Args: log_dir: (:obj:`str`) Path to a directory. """ if not os.path.exists(log_dir): os.makedirs(log_dir) oschmod.set_mode(log_dir, 0o700)
def test_safe_read(self, content, expec_code, expec_newlines, expec_err, chmod): with pytest.raises(expec_err): if expec_newlines: content = content.replace(os.linesep, expec_newlines) with sysu.reopenable_temp_file(content) as tmp_path: set_mode(str(tmp_path), chmod) # default param: permissions: tuple = (os.R_OK, os.W_OK). source_code, _, newlines = iou.safe_read(tmp_path) assert source_code == expec_code assert newlines == expec_newlines raise sysu.Pass()
def main(): """Provide main function for CLI.""" parser = argparse.ArgumentParser( description='Change the mode (permissions) of a file or directory') parser.add_argument('-R', action='store_true', help='apply mode recursively') parser.add_argument( 'mode', nargs=1, help='octal or symbolic mode of the object') parser.add_argument('object', nargs=1, help='file or directory') args = parser.parse_args() mode = args.mode[0] obj = args.object[0] if args.R: oschmod.set_mode_recursive(obj, mode) else: oschmod.set_mode(obj, mode)
def run(self): event_handler = ContainerFilesHandler() event_handler.log_filename = self.log_filename event_handler.EXCLUDES = self.EXCLUDES event_handler.module_name = self.module_name # set 777 permission and allow container read/write/execute oschmod.set_mode(self.DIRECTORY_TO_WATCH, '777') self.observer.schedule(event_handler, self.DIRECTORY_TO_WATCH, recursive=True) self.observer.start() while not self.stop_execution: try: time.sleep(0.1) except Exception: pass
def bw(): # Read in the test file dataset = pd.read_csv( 'C:\\Users\\Kyle Voigt\\Desktop\\fb_catapult\\raw_files\\bodyweight.csv' ) dataset = pd.DataFrame(data=dataset) # Change data types to correct ones dataset['Date'] = pd.to_datetime(dataset['Date'], format='%m/%d/%Y') dataset['Bodyweight (lbs)'] = pd.to_numeric(dataset['Bodyweight (lbs)']) dataset = dataset[dataset['Bodyweight (lbs)'].notna()] dataset['Bodyweight (kg)'] = dataset['Bodyweight (lbs)'] / 2.205 # Find each athlete's median bodyweight in kg kg_df = dataset.groupby(['Player Name'], as_index=False).median() if not os.path.exists(BW_LIST): kg_df.to_csv(BW_LIST, index=False, mode='w') # Set the file permissions so that anyone can use it oschmod.set_mode(BW_LIST, stat.S_IRWXO or stat.S_IRWXU or stat.S_IRWXG)
def install(self): """ Execute the watchmaker workers against the system. Upon successful execution, the system will be properly provisioned, according to the defined configuration and workers. """ self.log.info('Start time: %s', datetime.datetime.now()) self.log.info('Workers to execute: %s', self.config.keys()) # Create watchmaker directories try: os.makedirs(self.system_params['workingdir']) oschmod.set_mode(self.system_params['prepdir'], 0o700) except OSError: if not os.path.exists(self.system_params['workingdir']): msg = ('Unable to create directory - {0}'.format( self.system_params['workingdir'])) self.log.critical(msg) raise workers_manager = self.workers_manager( system_params=self.system_params, workers=self.config) try: workers_manager.worker_cadence() except Exception: msg = 'Execution of the workers cadence has failed.' self.log.critical(msg) raise if self.no_reboot: self.log.info('Detected `no-reboot` switch. System will not be ' 'rebooted.') else: self.log.info( 'Reboot scheduled. System will reboot after the script ' 'exits.') subprocess.call(self.system_params['restart'], shell=True) self.log.info('Stop time: %s', datetime.datetime.now())
def generate_teis(missing_jats): # Put xmls in a temp dur for batch conversion temp_dir = os.getcwd() + '/cache/temp_jats/' for f in missing_jats: os.renames(jats_src + f, temp_dir + f) try: print('\nConverting XMLs from JAR to TEI!') oschmod.set_mode('./resources/Pub2TEI/Samples/saxon9he.jar', '+x') xslt_args = [ '--parserFeature?uri=http%3A//apache.org/xml/features/nonvalidating/load-external-dtd:false', '-dtd:off', '-a:off', '-expand:off', '-xsl:./Stylesheets/Publishers.xsl', f'-s:{temp_dir}', f'-o:{xml_src}' ] subprocess.run(['java', '-jar', './Samples/saxon9he.jar', *xslt_args], cwd=f'./resources/Pub2TEI') except Exception as e: print('An error occured: ', e) quit() # Return xmls to jar dir for f in missing_jats: os.renames(temp_dir + f, jats_src + f)
def test_set_recursive(): """Check file permissions are recursively set.""" # create dirs topdir = 'testdir1' testdir = os.path.join(topdir, 'testdir2', 'testdir3') os.makedirs(testdir) # create files fileh = open(os.path.join(topdir, 'file1'), "w+") fileh.write("contents") fileh.close() fileh = open(os.path.join(testdir, 'file2'), "w+") fileh.write("contents") fileh.close() # set permissions to badness triple7 = stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR | stat.S_IRGRP\ | stat.S_IWGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IWOTH\ | stat.S_IXOTH oschmod.set_mode(topdir, triple7) oschmod.set_mode(testdir, triple7) oschmod.set_mode(os.path.join(topdir, 'file1'), triple7) oschmod.set_mode(os.path.join(testdir, 'file2'), triple7) time.sleep(1) # modes aren't always ready to go immediately # set permissions - the test file_mode = 0o600 dir_mode = 0o700 oschmod.set_mode_recursive(topdir, file_mode, dir_mode) time.sleep(1) # modes aren't always ready to go immediately # check it out assert oschmod.get_mode(topdir) == dir_mode assert oschmod.get_mode(os.path.join(topdir, 'testdir2')) == dir_mode assert oschmod.get_mode(testdir) == dir_mode assert oschmod.get_mode(os.path.join(topdir, 'file1')) == file_mode assert oschmod.get_mode(os.path.join(testdir, 'file2')) == file_mode # clean up shutil.rmtree(topdir)
def test_permissions(): """Tests for stuff.""" test_dir = "tests" path = os.path.join( test_dir, ''.join(random.choice(string.ascii_letters) for i in range(10)) + '.txt') file_hdl = open(path, 'w+') file_hdl.write(path) file_hdl.close() oschmod.set_mode(path, stat.S_IRUSR | stat.S_IWUSR) assert oschmod.get_mode(path) == stat.S_IRUSR | stat.S_IWUSR path = os.path.join( test_dir, ''.join(random.choice(string.ascii_letters) for i in range(10)) + '.txt') file_hdl = open(path, 'w+') file_hdl.write(path) file_hdl.close() mode = stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR | stat.S_IRGRP | \ stat.S_IWGRP | stat.S_IROTH | stat.S_IWOTH oschmod.set_mode(path, mode) assert oschmod.get_mode(path) == mode path = os.path.join( test_dir, ''.join(random.choice(string.ascii_letters) for i in range(10)) + '.txt') file_hdl = open(path, 'w+') file_hdl.write(path) file_hdl.close() mode = stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR | stat.S_IRGRP | \ stat.S_IWGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IWOTH | \ stat.S_IXOTH oschmod.set_mode(path, mode) assert oschmod.get_mode(path) == mode file_list = glob.glob(os.path.join(test_dir, "*txt")) for file_path in file_list: try: os.remove(file_path) except FileNotFoundError: print("Error while deleting file : ", file_path)
def prepare_logging(log_dir, log_level): """ Prepare the logger for handling messages to a file and/or to stdout. Args: log_dir: (:obj:`str`) Path to a directory. If set, Watchmaker logs to a file named ``watchmaker.log`` in the specified directory. Both the directory and the file will be created if necessary. If the file already exists, Watchmaker appends to it rather than overwriting it. If this argument evaluates to ``False``, then logging to a file is disabled. Watchmaker will always output to stdout/stderr. log_level: (:obj:`str`) Level to log at. Case-insensitive. Valid options include, from least to most verbose: - ``critical`` - ``error`` - ``warning`` - ``info`` - ``debug`` """ logformat = ( '%(asctime)s [%(name)s][%(levelname)-5s][%(process)s]: %(message)s' ) level = LOG_LEVELS[str(log_level).lower()] logging.basicConfig(format=logformat, level=level) if not log_dir: logging.warning( 'Watchmaker will not be logging to a file!' ) else: make_log_dir(log_dir) log_filename = os.sep.join((log_dir, 'watchmaker.log')) hdlr = logging.FileHandler(log_filename) oschmod.set_mode(log_filename, 0o600) hdlr.setLevel(level) hdlr.setFormatter(logging.Formatter(logformat)) logging.getLogger().addHandler(hdlr) if HAS_PYWIN32: ehdlr = logging.handlers.NTEventLogHandler('Watchmaker') ehdlr.setLevel(level) ehdlr.setFormatter(logging.Formatter(logformat)) logging.getLogger().addHandler(ehdlr) if HAS_PYWIN32 and EC2_CONFIG_DEPS: try: _enable_ec2_config_event_log() _configure_ec2_config_event_log() except (IOError, OSError) as exc: # noqa: B014 if exc.errno == errno.ENOENT: # PY2/PY3-compatible check for FileNotFoundError # EC2_CONFIG or EC2_LOG_CONFIG do not exist pass else: raise if HAS_PYWIN32 and EC2_LAUNCH_DEPS: try: _configure_ec2_launch_event_log() _schedule_ec2_launch_event_log() except (IOError, OSError) as exc: # noqa: B014 if exc.errno == errno.ENOENT: # PY2/PY3-compatible check for FileNotFoundError # EC2_LAUNCH_LOG_CONFIG or 'powershell.exe' do not exist pass else: raise except subprocess.CalledProcessError: # EC2_LAUNCH_SEND_EVENTS does not exist pass
# This function is designed to turn 8-bit codes into words - copyright belongs to @manmoleculo def bit2word(bits): chars = [] for b in range(int(len(bits) / 8)): byte = bits[b * 8:(b + 1) * 8] chars.append(chr(int(''.join([str(bit) for bit in byte]), 2))) return ''.join(chars) # Get the secret message from the user and put it between { and } message = "{" + raw_input("Enter your secret message: ") + '}' print("Your message is : " + message) msg_bit = (word2bit(message)) print("Your message will turn into ==> " + (''.join(str(x) for x in msg_bit))) time.sleep(1) print('\nTransfer begins now...') for i in msg_bit: if i: oschmod.set_mode(filename, 0o777) # Hex for -rwxrwxrwx access mode # os.chmod(filename, 0o777) # in case oschmod does not work else: oschmod.set_mode(filename, 0o444) # Hex for -r--r--r-- access mode # os.chmod(filename, 0o444) # in case oschmod does not work time.sleep(0.1) sys.stdout.flush() # You did not see anything, did you?! print(i, end='') print('\n')
def raw_pl_cleaner(pl_directory): clean_list = [] for file in os.listdir(pl_directory): # Read in the test file dataset = pd.read_csv(file, header=9) dataset = pd.DataFrame(data=dataset) # Change data types to correct ones dataset['Date'] = pd.to_datetime(dataset['Date'], format='%m/%d/%Y') dataset['Total Duration'] = pd.to_datetime(dataset['Total Duration'], format='%H:%M:%S') dataset['Player Load Band 1 Total Player Load'] = pd.to_numeric( dataset['Player Load Band 1 Total Player Load']) dataset['Player Load Band 2 Total Player Load'] = pd.to_numeric( dataset['Player Load Band 2 Total Player Load']) dataset['Player Load Band 3 Total Player Load'] = pd.to_numeric( dataset['Player Load Band 3 Total Player Load']) dataset['Player Load Band 4 Total Player Load'] = pd.to_numeric( dataset['Player Load Band 4 Total Player Load']) dataset['Player Load Band 5 Total Player Load'] = pd.to_numeric( dataset['Player Load Band 5 Total Player Load']) dataset['Player Load Band 6 Total Player Load'] = pd.to_numeric( dataset['Player Load Band 6 Total Player Load']) dataset['Player Load Band 7 Total Player Load'] = pd.to_numeric( dataset['Player Load Band 7 Total Player Load']) dataset['Player Load Band 8 Total Player Load'] = pd.to_numeric( dataset['Player Load Band 8 Total Player Load']) # Remove last column dataset = dataset.iloc[:, :-1] # Change names of Contact Load Bands dataset["Active Player Load"] = ( dataset["Player Load Band 2 Total Player Load"] + dataset["Player Load Band 3 Total Player Load"] + dataset["Player Load Band 4 Total Player Load"] + dataset["Player Load Band 5 Total Player Load"] + dataset["Player Load Band 6 Total Player Load"] + dataset["Player Load Band 7 Total Player Load"] + dataset["Player Load Band 8 Total Player Load"]) dataset["Medium Player Load"] = ( dataset["Player Load Band 4 Total Player Load"] + dataset["Player Load Band 5 Total Player Load"] + dataset["Player Load Band 6 Total Player Load"] + dataset["Player Load Band 7 Total Player Load"] + dataset["Player Load Band 8 Total Player Load"]) dataset["High Player Load"] = ( dataset["Player Load Band 6 Total Player Load"] + dataset["Player Load Band 7 Total Player Load"] + dataset["Player Load Band 8 Total Player Load"]) dataset = dataset.drop(columns=[ 'Player Load Band 1 Total Player Load', 'Player Load Band 2 Total Player Load', 'Player Load Band 3 Total Player Load', 'Player Load Band 4 Total Player Load', 'Player Load Band 5 Total Player Load', 'Player Load Band 6 Total Player Load', 'Player Load Band 7 Total Player Load', 'Player Load Band 8 Total Player Load' ], axis=1) clean_list.append(dataset) print(file) # Concatenate all raw files into a single .csv file with clean data big_clean_list = pd.concat(clean_list) if not os.path.exists(PL_LIST): big_clean_list.to_csv(PL_LIST, index=False, mode='w') # Set the file permissions so that anyone can use it oschmod.set_mode(PL_LIST, stat.S_IRWXO or stat.S_IRWXU or stat.S_IRWXG)
def raw_throws_cleaner(throws_directory): clean_list = [] for file in os.listdir(throws_directory): # Read in the test file dataset = pd.read_csv(file, header=9) dataset = pd.DataFrame(data=dataset) # Change data types to correct ones dataset['Date'] = pd.to_datetime(dataset['Date'], format='%m/%d/%Y') dataset['Total Duration'] = pd.to_datetime(dataset['Total Duration'], format='%H:%M:%S') # Remove last column dataset = dataset.iloc[:, :-1] # Change names of different throw bands dataset['Easy Throws'] = dataset['Throw Load Band 1 Total Throws'] dataset = dataset.drop(columns=['Throw Load Band 1 Total Throws'], axis=1) dataset['Moderate Throws'] = dataset['Throw Load Band 2 Total Throws'] dataset = dataset.drop(columns=['Throw Load Band 2 Total Throws'], axis=1) dataset['Hard Throws'] = dataset[ 'Throw Load Band 3 Total Throws'] + dataset[ 'Throw Load Band 4 Total Throws'] + dataset[ 'Throw Load Band 5 Total Throws'] dataset = dataset.drop(columns=[ 'Throw Load Band 3 Total Throws', 'Throw Load Band 4 Total Throws', 'Throw Load Band 5 Total Throws' ], axis=1) # Change names of different throw load bands dataset['Easy Throw Load'] = dataset['Throw Load Band 1 Total Load'] dataset = dataset.drop(columns=['Throw Load Band 1 Total Load'], axis=1) dataset['Moderate Throw Load'] = dataset[ 'Throw Load Band 2 Total Load'] dataset = dataset.drop(columns=['Throw Load Band 2 Total Load'], axis=1) dataset['Hard Throw Load'] = dataset[ 'Throw Load Band 3 Total Load'] + dataset[ 'Throw Load Band 4 Total Load'] + dataset[ 'Throw Load Band 5 Total Load'] dataset = dataset.drop(columns=[ 'Throw Load Band 3 Total Load', 'Throw Load Band 4 Total Load', 'Throw Load Band 5 Total Load' ], axis=1) # Create metric that gets rid of easy throws dataset['Active Throws'] = dataset['Moderate Throws'] + dataset[ 'Hard Throws'] dataset['Active Throw Load'] = dataset[ 'Moderate Throw Load'] + dataset['Hard Throw Load'] dataset = dataset.drop(columns=['Easy Throws', 'Moderate Throws'], axis=1) dataset = dataset.drop( columns=['Easy Throw Load', 'Moderate Throw Load'], axis=1) # Create metrics for % hard throws and % hard throw load dataset['Hard Throw %'] = (dataset['Hard Throws'] / dataset['Total Throw Count']) * 100 dataset['Hard Throw Load %'] = (dataset['Hard Throw Load'] / dataset['Total Throw Load']) * 100 clean_list.append(dataset) print(file) # Concatenate all raw files into a single .csv file with clean data big_clean_list = pd.concat(clean_list) if not os.path.exists(THROWS_LIST): big_clean_list.to_csv(THROWS_LIST, index=False, mode='w') # Set the file permissions so that anyone can use it oschmod.set_mode(THROWS_LIST, stat.S_IRWXO or stat.S_IRWXU or stat.S_IRWXG)
def raw_nordic_cleaner(nordic_directory): clean_list = [] for file in os.listdir(nordic_directory): # Read in the test file dataset = pd.read_csv(file) dataset = pd.DataFrame(data=dataset) dataset['Date'] = pd.to_datetime(dataset['Date UTC'], format='%d/%m/%Y') dataset['Time PST'] = pd.to_datetime(dataset['Time UTC'], format='%I:%M %p') dataset = dataset.drop( columns=['ExternalId', 'Notes', 'Date UTC', 'Time UTC'], axis=1) dataset = dataset.rename(columns={"Name": "Player Name"}) # Make sure player names are correct dataset = dataset.replace({ 'Abdul-Malik McClain': 'Malik McClain', 'Adonis Russell-Otey': 'Adonis Otey', 'Briton Allen': 'Briton Allen', 'Gary Bryant Jr.': 'Gary Bryant', 'James Toland': 'James Toland', 'Joshua Jackson, Jr.': 'Josh Jackson', 'Justin Dedich': 'Justin Dedich', 'Keoantay Ingram': 'Keaontay Ingram', 'Maxzell Williams': 'Max Williams', 'Thomas Maurice': 'Tommy Maurice', 'Matt Boermeester': 'Matt Boermeester', 'Michael Jackson III': 'Michael Jackson', 'Taylor McNamara': 'Taylor McNamara', 'Trevor Trout': 'Trevor Trout', 'Tuli Tuipulotu ': 'Tuli Tuipulotu', 'Tyler Petite': 'Tyler Petite' }) # Fetch bodyweight table bw_df = fetch_table(select_bw_table) bw_df = pd.DataFrame( bw_df, columns=["Player Name", "Bodyweight (lbs)", "Bodyweight (kg)"]) bw_df["Bodyweight (lbs)"] = pd.to_numeric(bw_df["Bodyweight (lbs)"]) bw_df["Bodyweight (kg)"] = pd.to_numeric(bw_df["Bodyweight (kg)"]) # Fetch roster table roster_df = fetch_table(select_clean_roster_table) roster_df = pd.DataFrame(roster_df, columns=["Player Name", "Position", "Group"]) # Fetch schedule table schedule_df = fetch_table(select_schedule_table) schedule_df = pd.DataFrame(schedule_df, columns=[ "Date", "Day", "Enemy", "Day_Type", "Day_ID", "Attire", "MD_Minus" ]) schedule_df["Date"] = pd.to_datetime(schedule_df["Date"], format='%Y-%m-%d') # Create force divided by bodyweight metrics (lbs and kgs) merged_df = dataset.merge(bw_df, how='left', on=['Player Name']) merged_df = merged_df.merge(roster_df, how='left', on=['Player Name']) # Create force divided by bodyweight metrics (lbs and kgs) merged_df['L Max Force / BW (lbs)'] = merged_df[ 'L Max Force (N)'] / merged_df['Bodyweight (lbs)'] merged_df['R Max Force / BW (lbs)'] = merged_df[ 'R Max Force (N)'] / merged_df['Bodyweight (lbs)'] merged_df['L Max Force / BW (kg)'] = merged_df[ 'L Max Force (N)'] / merged_df['Bodyweight (kg)'] merged_df['R Max Force / BW (kg)'] = merged_df[ 'R Max Force (N)'] / merged_df['Bodyweight (kg)'] merged_df['L Avg Force / BW (lbs)'] = merged_df[ 'L Avg Force (N)'] / merged_df['Bodyweight (lbs)'] merged_df['R Avg Force / BW (lbs)'] = merged_df[ 'R Avg Force (N)'] / merged_df['Bodyweight (lbs)'] merged_df['L Avg Force / BW (kg)'] = merged_df[ 'L Avg Force (N)'] / merged_df['Bodyweight (kg)'] merged_df['R Avg Force / BW (kg)'] = merged_df[ 'R Avg Force (N)'] / merged_df['Bodyweight (kg)'] merged_df = merged_df.sort_values(by=['Player Name', 'Date']) clean_list.append(merged_df) print(file) # Concatenate all raw files into a single .csv file with clean data big_clean_list = pd.concat(clean_list) if not os.path.exists(NORDIC_LIST): big_clean_list.to_csv(NORDIC_LIST, index=False, mode='w') # Set the file permissions so that anyone can use it oschmod.set_mode(NORDIC_LIST, stat.S_IRWXO or stat.S_IRWXU or stat.S_IRWXG)
def raw_velo_cleaner(velo_directory): clean_list = [] for file in os.listdir(velo_directory): # Read in the test file dataset = pd.read_csv(file, header=9) dataset = pd.DataFrame(data=dataset) # Change data types to correct ones dataset['Date'] = pd.to_datetime(dataset['Date'], format='%m/%d/%Y') dataset['Total Duration'] = pd.to_datetime(dataset['Total Duration'], format='%H:%M:%S') # Remove last column dataset = dataset.iloc[:, :-1] # Change names of different velocity distance bands dataset['Distance > 5 mph'] = ( dataset['Velocity Band 2 Total Distance'] + dataset['Velocity Band 3 Total Distance'] + dataset['Velocity Band 4 Total Distance'] + dataset['Velocity Band 5 Total Distance'] + dataset['Velocity Band 6 Total Distance'] + dataset['Velocity Band 7 Total Distance'] + dataset['Velocity Band 8 Total Distance']) dataset = dataset.drop(columns=[ 'Velocity Band 1 Total Distance', 'Velocity Band 2 Total Distance', 'Velocity Band 3 Total Distance', 'Velocity Band 4 Total Distance' ], axis=1) dataset['Distance > 12 mph'] = ( dataset['Velocity Band 5 Total Distance'] + dataset['Velocity Band 6 Total Distance'] + dataset['Velocity Band 7 Total Distance'] + dataset['Velocity Band 8 Total Distance']) dataset = dataset.drop(columns=[ 'Velocity Band 5 Total Distance', 'Velocity Band 6 Total Distance' ], axis=1) dataset['Distance > 16 mph'] = ( dataset['Velocity Band 7 Total Distance'] + dataset['Velocity Band 8 Total Distance']) dataset = dataset.drop(columns=[ 'Velocity Band 7 Total Distance', 'Velocity Band 8 Total Distance' ], axis=1) # Change names of different velocity effort bands dataset['Efforts > 5 mph'] = ( dataset['Velocity Band 2 Total Effort Count'] + dataset['Velocity Band 3 Total Effort Count'] + dataset['Velocity Band 4 Total Effort Count'] + dataset['Velocity Band 5 Total Effort Count'] + dataset['Velocity Band 6 Total Effort Count'] + dataset['Velocity Band 7 Total Effort Count'] + dataset['Velocity Band 8 Total Effort Count']) dataset = dataset.drop(columns=[ 'Velocity Band 2 Total Effort Count', 'Velocity Band 3 Total Effort Count', 'Velocity Band 4 Total Effort Count' ], axis=1) dataset['Efforts > 12 mph'] = ( dataset['Velocity Band 5 Total Effort Count'] + dataset['Velocity Band 6 Total Effort Count'] + dataset['Velocity Band 7 Total Effort Count'] + dataset['Velocity Band 8 Total Effort Count']) dataset = dataset.drop(columns=[ 'Velocity Band 5 Total Effort Count', 'Velocity Band 6 Total Effort Count' ], axis=1) dataset['Efforts > 16 mph'] = ( dataset['Velocity Band 7 Total Effort Count'] + dataset['Velocity Band 8 Total Effort Count']) dataset = dataset.drop(columns=[ 'Velocity Band 7 Total Effort Count', 'Velocity Band 8 Total Effort Count', 'Relative Max Horizontal Power' ], axis=1) clean_list.append(dataset) print(file) # Concatenate all raw files into a single .csv file with clean data big_clean_list = pd.concat(clean_list) if not os.path.exists(VELO_LIST): big_clean_list.to_csv(VELO_LIST, index=False, mode='w') # Set the file permissions so that anyone can use it oschmod.set_mode(VELO_LIST, stat.S_IRWXO or stat.S_IRWXU or stat.S_IRWXG)
def _write_cert_file(certificate_contents, cert_file): with open(cert_file, 'w', encoding='utf-8') as f: f.write(f"[email protected] {certificate_contents}") oschmod.set_mode(cert_file, 0o644) return cert_file
def ewma_calculation(): def total_seconds(timedelta): try: seconds = timedelta.total_seconds() except AttributeError: # no method total_seconds one_second = np.timedelta64(1000000000, 'ns') # use nanoseconds to get highest possible precision in output seconds = timedelta / one_second return seconds # Fetch table from postgres -> Games Table games_df = fetch_table(select_games_table) games_df = pd.DataFrame(games_df, columns=[ "Date", "Game_Num_Year", "Enemy", "Enemy_Abr", "Location", "Result", "Points_For", "Points_Against", "Game_Type" ]) games_df["Date"] = pd.to_datetime(games_df["Date"], format='%Y-%m-%d') # Fetch table from postgres -> Roster Table roster_df = fetch_table(select_clean_roster_table) roster_df = pd.DataFrame(roster_df, columns=["Player Name", "Position", "Group"]) # Fetch table from postgres -> Accel Table accel_df = fetch_table(select_clean_accel_table) accel_df = pd.DataFrame(accel_df, columns=[ "Player Name", "Period Name", "Period Number", "Date", "Total Duration", "Medium Decelerations", "Medium Accelerations", "High Accelerations", "High Decelerations" ]) accel_df["Date"] = pd.to_datetime(accel_df["Date"], format='%Y-%m-%d') accel_df["Total Duration"] = pd.to_datetime(accel_df["Total Duration"], format='%H:%M:%S') accel_df = accel_df[accel_df['Period Name'] == 'Session'] accel_df = accel_df[accel_df['Period Number'] == 0] accel_df = accel_df.drop(columns=['Period Name', 'Period Number'], axis=1) accel_df = accel_df.groupby(by=['Player Name', 'Date'], as_index=False).sum() # Fetch table from postgres -> IMA Table ima_df = fetch_table(select_clean_ima_table) ima_df = pd.DataFrame(ima_df, columns=[ "Player Name", "Period Name", "Period Number", "Date", "Total Duration", "IMA CoD Right High", "IMA CoD Left High", "IMA Accel High", "IMA Decel High", "IMA Jump Count High Band", "High IMAs", "IMA Explosive %", "Total Jumps", "Hard CoD" ]) ima_df["Date"] = pd.to_datetime(ima_df["Date"], format='%Y-%m-%d') ima_df["Total Duration"] = pd.to_datetime(ima_df["Total Duration"], format='%H:%M:%S') ima_df["IMA Explosive %"] = pd.to_numeric(ima_df["IMA Explosive %"]) ima_df = ima_df[ima_df['Period Name'] == 'Session'] ima_df = ima_df[ima_df['Period Number'] == 0] ima_df = ima_df.drop( columns=['Period Name', 'Period Number', 'IMA Explosive %'], axis=1) ima_df = ima_df.groupby(by=['Player Name', 'Date'], as_index=False).sum() # Fetch table from postgres -> Linemen Contacts Table contacts_df = fetch_table(select_clean_line_contacts_table) contacts_df = pd.DataFrame( contacts_df, columns=[ "Player Name", "Period Name", "Period Number", "Date", "Total Duration", "Total Contact Load", "Total Contacts", "Light Contact Load", "Medium Contact Load", "Hard Contact Load", "Light Contacts", "Medium Contacts", "Hard Contacts", "Hard Contact Load %", "Hard Contacts %", "Active Contact Load", "Active Contacts" ]) contacts_df["Date"] = pd.to_datetime(contacts_df["Date"], format='%Y-%m-%d') contacts_df["Total Duration"] = pd.to_datetime( contacts_df["Total Duration"], format='%H:%M:%S') contacts_df["Total Contact Load"] = pd.to_numeric( contacts_df["Total Contact Load"]) contacts_df["Light Contact Load"] = pd.to_numeric( contacts_df["Light Contact Load"]) contacts_df["Medium Contact Load"] = pd.to_numeric( contacts_df["Medium Contact Load"]) contacts_df["Hard Contact Load"] = pd.to_numeric( contacts_df["Hard Contact Load"]) contacts_df["Active Contact Load"] = pd.to_numeric( contacts_df["Active Contact Load"]) contacts_df["Hard Contact Load %"] = pd.to_numeric( contacts_df["Hard Contact Load %"]) contacts_df["Hard Contacts %"] = pd.to_numeric( contacts_df["Hard Contact Load %"]) contacts_df = contacts_df[contacts_df['Period Name'] == 'Session'] contacts_df = contacts_df[contacts_df['Period Number'] == 0] contacts_df = contacts_df.drop(columns=[ 'Period Name', 'Period Number', 'Hard Contact Load %', 'Hard Contacts %' ], axis=1) contacts_df = contacts_df.groupby(by=['Player Name', 'Date'], as_index=False).sum() # Fetch table from postgres -> Metabolic Power Table met_power_df = fetch_table(select_clean_met_power_table) met_power_df = pd.DataFrame( met_power_df, columns=[ "Player Name", "Period Name", "Period Number", "Date", "Total Duration", "Meta Energy (Cal/kg)", "Total Metabolic Power Average Power", "Active Metabolic Power Average Power", "High Metabolic Power Average Power", "Total Metabolic Power Distance", "Active Metabolic Power Distance", "Explosive Distance", "High Metabolic Power Distance", "Total Metabolic Power Efforts", "Active Metabolic Power Efforts", "High Metabolic Power Efforts" ]) met_power_df["Date"] = pd.to_datetime(met_power_df["Date"], format='%Y-%m-%d') met_power_df["Total Duration"] = pd.to_datetime( met_power_df["Total Duration"], format='%H:%M:%S') met_power_df["Meta Energy (Cal/kg)"] = pd.to_numeric( met_power_df["Meta Energy (Cal/kg)"]) met_power_df["Total Metabolic Power Average Power"] = pd.to_numeric( met_power_df["Total Metabolic Power Average Power"]) met_power_df["Active Metabolic Power Average Power"] = pd.to_numeric( met_power_df["Active Metabolic Power Average Power"]) met_power_df["High Metabolic Power Average Power"] = pd.to_numeric( met_power_df["High Metabolic Power Average Power"]) met_power_df["Total Metabolic Power Distance"] = pd.to_numeric( met_power_df["Total Metabolic Power Distance"]) met_power_df["Active Metabolic Power Distance"] = pd.to_numeric( met_power_df["Active Metabolic Power Distance"]) met_power_df["Explosive Distance"] = pd.to_numeric( met_power_df["Explosive Distance"]) met_power_df["High Metabolic Power Distance"] = pd.to_numeric( met_power_df["High Metabolic Power Distance"]) met_power_df["Total Metabolic Power Efforts"] = pd.to_numeric( met_power_df["Total Metabolic Power Efforts"]) met_power_df["Active Metabolic Power Efforts"] = pd.to_numeric( met_power_df["Active Metabolic Power Efforts"]) met_power_df["High Metabolic Power Efforts"] = pd.to_numeric( met_power_df["High Metabolic Power Efforts"]) met_power_df = met_power_df[met_power_df['Period Name'] == 'Session'] met_power_df = met_power_df[met_power_df['Period Number'] == 0] met_power_df = met_power_df.drop( columns=['Period Name', 'Period Number', 'Meta Energy (Cal/kg)'], axis=1) met_power_df = met_power_df.groupby(by=['Player Name', 'Date'], as_index=False).sum() # Fetch table from postgres -> Player Load Table pl_df = fetch_table(select_clean_pl_table) pl_df = pd.DataFrame(pl_df, columns=[ "Player Name", "Period Name", "Period Number", "Date", "Total Duration", "Total Player Load", "Active Player Load", "Medium Player Load", "High Player Load" ]) pl_df["Date"] = pd.to_datetime(pl_df["Date"], format='%Y-%m-%d') pl_df["Total Duration"] = pd.to_datetime(pl_df["Total Duration"], format='%H:%M:%S') pl_df["Total Player Load"] = pd.to_numeric(pl_df["Total Player Load"]) pl_df["Active Player Load"] = pd.to_numeric(pl_df["Active Player Load"]) pl_df["Medium Player Load"] = pd.to_numeric(pl_df["Medium Player Load"]) pl_df["High Player Load"] = pd.to_numeric(pl_df["High Player Load"]) pl_df = pl_df[pl_df['Period Name'] == 'Session'] pl_df = pl_df[pl_df['Period Number'] == 0] pl_df = pl_df.drop(columns=['Period Name', 'Period Number'], axis=1) pl_df = pl_df.groupby(by=['Player Name', 'Date'], as_index=False).sum() # Fetch table from postgres -> Throws Table throws_df = fetch_table(select_clean_throws_table) throws_df = pd.DataFrame(throws_df, columns=[ "Player Name", "Period Name", "Period Number", "Date", "Total Duration", "Total Throw Count", "Total Throw Load", "Hard Throws", "Hard Throw Load", "Active Throws", "Active Throw Load", "Hard Throw %", "Hard Throw Load %" ]) throws_df["Date"] = pd.to_datetime(throws_df["Date"], format='%Y-%m-%d') throws_df["Total Duration"] = pd.to_datetime(throws_df["Total Duration"], format='%H:%M:%S') throws_df["Total Throw Count"] = pd.to_numeric( throws_df["Total Throw Count"]) throws_df["Total Throw Load"] = pd.to_numeric( throws_df["Total Throw Load"]) throws_df["Hard Throws"] = pd.to_numeric(throws_df["Hard Throws"]) throws_df["Hard Throw Load"] = pd.to_numeric(throws_df["Hard Throw Load"]) throws_df["Active Throws"] = pd.to_numeric(throws_df["Active Throws"]) throws_df["Active Throw Load"] = pd.to_numeric( throws_df["Active Throw Load"]) throws_df["Hard Throw %"] = pd.to_numeric(throws_df["Hard Throw %"]) throws_df["Hard Throw Load %"] = pd.to_numeric( throws_df["Hard Throw Load %"]) throws_df = throws_df[throws_df['Period Name'] == 'Session'] throws_df = throws_df[throws_df['Period Number'] == 0] throws_df = throws_df.drop(columns=[ 'Period Name', 'Period Number', 'Hard Throw %', 'Hard Throw Load %' ], axis=1) throws_df = throws_df.groupby(by=['Player Name', 'Date'], as_index=False).sum() # Fetch table from postgres -> Velocity Table velo_df = fetch_table(select_clean_velo_table) velo_df = pd.DataFrame(velo_df, columns=[ "Player Name", "Period Name", "Period Number", "Date", "Total Duration", "Total Distance", "Active Distance", "Maximum Velocity", "Distance > 5 mph", "Distance > 12 mph", "Distance > 16 mph", "Efforts > 5 mph", "Efforts > 12 mph", "Efforts > 16 mph" ]) velo_df["Date"] = pd.to_datetime(velo_df["Date"], format='%Y-%m-%d') velo_df["Total Duration"] = pd.to_datetime(velo_df["Total Duration"], format='%H:%M:%S') velo_df["Total Distance"] = pd.to_numeric(velo_df["Total Distance"]) velo_df["Active Distance"] = pd.to_numeric(velo_df["Active Distance"]) velo_df["Maximum Velocity"] = pd.to_numeric(velo_df["Maximum Velocity"]) velo_df["Distance > 5 mph"] = pd.to_numeric(velo_df["Distance > 5 mph"]) velo_df["Distance > 12 mph"] = pd.to_numeric(velo_df["Distance > 12 mph"]) velo_df["Distance > 16 mph"] = pd.to_numeric(velo_df["Distance > 16 mph"]) velo_df["Efforts > 5 mph"] = pd.to_numeric(velo_df["Efforts > 5 mph"]) velo_df["Efforts > 12 mph"] = pd.to_numeric(velo_df["Efforts > 12 mph"]) velo_df["Efforts > 16 mph"] = pd.to_numeric(velo_df["Efforts > 16 mph"]) velo_df = velo_df[velo_df['Period Name'] == 'Session'] velo_df = velo_df[velo_df['Period Number'] == 0] velo_df = velo_df.drop(columns=[ 'Period Name', 'Period Number', 'Maximum Velocity', 'Distance > 5 mph' ], axis=1) velo_df = velo_df.groupby(by=['Player Name', 'Date'], as_index=False).sum() # Fetch table from postgres -> Schedule Table schedule_df = fetch_table(select_schedule_table) schedule_df = pd.DataFrame(schedule_df, columns=[ "Date", "Day", "Enemy", "Day_Type", "Day_ID", "Attire", "MD_Minus" ]) schedule_df["Date"] = pd.to_datetime(schedule_df["Date"], format='%Y-%m-%d') # Merge all dataframes into one massive dataframe!!! merged_df = schedule_df.merge(games_df, how='outer', on=['Date', 'Enemy']) merged_df = merged_df.merge(velo_df, how='left', on=['Date']) merged_df = merged_df.merge(roster_df, how='left', on=['Player Name']) merged_df = merged_df.merge(throws_df, how='left', on=['Date', 'Player Name']) merged_df = merged_df.merge(pl_df, how='left', on=['Date', 'Player Name']) merged_df = merged_df.merge(met_power_df, how='left', on=['Date', 'Player Name']) merged_df = merged_df.merge(contacts_df, how='left', on=['Date', 'Player Name']) merged_df = merged_df.merge(ima_df, how='left', on=['Date', 'Player Name']) merged_df = merged_df.merge(accel_df, how='left', on=['Date', 'Player Name']) # Trying out the rolling average method first merged_df = merged_df.set_index(merged_df['Date']) merged_df = merged_df.sort_index() merged_df = merged_df.drop( columns=['Date', 'Points_For', 'Points_Against', 'Game_Num_Year'], axis=1) rolling_7D_df = merged_df.groupby(by=['Player Name']).rolling('7D').sum() rolling_28D_df = merged_df.groupby( by=['Player Name']).rolling('28D').sum() / 4 rolling_df = rolling_7D_df.merge(rolling_28D_df, how='left', on=['Player Name', 'Date'], suffixes=("_ATL", "_CTL")) rolling_df['TD ACWR'] = rolling_df['Total Distance_ATL'] / rolling_df[ 'Total Distance_CTL'] rolling_df['AD ACWR'] = rolling_df['Active Distance_ATL'] / rolling_df[ 'Active Distance_CTL'] rolling_df['D>12 ACWR'] = rolling_df['Distance > 12 mph_ATL'] / rolling_df[ 'Distance > 12 mph_CTL'] rolling_df['D>16 ACWR'] = rolling_df['Distance > 16 mph_ATL'] / rolling_df[ 'Distance > 16 mph_CTL'] rolling_df['E>12 ACWR'] = rolling_df['Efforts > 12 mph_ATL'] / rolling_df[ 'Efforts > 12 mph_CTL'] rolling_df['E>16 ACWR'] = rolling_df['Efforts > 16 mph_ATL'] / rolling_df[ 'Efforts > 16 mph_CTL'] rolling_df['TPL ACWR'] = rolling_df['Total Player Load_ATL'] / rolling_df[ 'Total Player Load_CTL'] rolling_df['APL ACWR'] = rolling_df['Active Player Load_ATL'] / rolling_df[ 'Active Player Load_CTL'] rolling_df['Hard Accel ACWR'] = rolling_df[ 'High Accelerations_ATL'] / rolling_df['High Accelerations_CTL'] rolling_df['Hard Decel ACWR'] = rolling_df[ 'High Decelerations_ATL'] / rolling_df['High Decelerations_CTL'] rolling_df['IMA Accel High ACWR'] = rolling_df[ 'IMA Accel High_ATL'] / rolling_df['IMA Accel High_CTL'] rolling_df['IMA Decel High ACWR'] = rolling_df[ 'IMA Decel High_ATL'] / rolling_df['IMA Decel High_CTL'] rolling_df['Expl Dist ACWR'] = rolling_df[ 'Explosive Distance_ATL'] / rolling_df['Explosive Distance_CTL'] rolling_df['High IMA ACWR'] = rolling_df['High IMAs_ATL'] / rolling_df[ 'High IMAs_CTL'] rolling_df['Total Contact Load ACWR'] = rolling_df[ 'Total Contact Load_ATL'] / rolling_df['Total Contact Load_CTL'] rolling_df['Active Contact Load ACWR'] = rolling_df[ 'Active Contact Load_ATL'] / rolling_df['Active Contact Load_CTL'] rolling_df['Total Contacts ACWR'] = rolling_df[ 'Total Contacts_ATL'] / rolling_df['Total Contacts_CTL'] rolling_df['Active Contacts ACWR'] = rolling_df[ 'Active Contacts_ATL'] / rolling_df['Active Contacts_CTL'] # Trying out the exponentially weighted moving average ewma_7D_df = merged_df.groupby(by=['Player Name']).ewm( span=7, adjust=False).mean() ewma_28D_df = merged_df.groupby(by=['Player Name']).ewm( span=35, adjust=False).mean() ewma_df = ewma_7D_df.merge(ewma_28D_df, how='left', on=['Player Name', 'Date'], suffixes=("_EW_ATL", "_EW_CTL")) ewma_df['TD EWMA'] = ewma_df['Total Distance_EW_ATL'] / ewma_df[ 'Total Distance_EW_CTL'] ewma_df['AD EWMA'] = ewma_df['Active Distance_EW_ATL'] / ewma_df[ 'Active Distance_EW_CTL'] ewma_df['D>12 EWMA'] = ewma_df['Distance > 12 mph_EW_ATL'] / ewma_df[ 'Distance > 12 mph_EW_CTL'] ewma_df['D>16 EWMA'] = ewma_df['Distance > 16 mph_EW_ATL'] / ewma_df[ 'Distance > 16 mph_EW_CTL'] ewma_df['E>12 EWMA'] = ewma_df['Efforts > 12 mph_EW_ATL'] / ewma_df[ 'Efforts > 12 mph_EW_CTL'] ewma_df['E>16 EWMA'] = ewma_df['Efforts > 16 mph_EW_ATL'] / ewma_df[ 'Efforts > 16 mph_EW_CTL'] ewma_df['TPL EWMA'] = ewma_df['Total Player Load_EW_ATL'] / ewma_df[ 'Total Player Load_EW_CTL'] ewma_df['APL EWMA'] = ewma_df['Active Player Load_EW_ATL'] / ewma_df[ 'Active Player Load_EW_CTL'] ewma_df['Hard Accel EWMA'] = ewma_df[ 'High Accelerations_EW_ATL'] / ewma_df['High Accelerations_EW_CTL'] ewma_df['Hard Decel EWMA'] = ewma_df[ 'High Decelerations_EW_ATL'] / ewma_df['High Decelerations_EW_CTL'] ewma_df['IMA Accel High EWMA'] = ewma_df[ 'IMA Accel High_EW_ATL'] / ewma_df['IMA Accel High_EW_CTL'] ewma_df['IMA Decel High EWMA'] = ewma_df[ 'IMA Decel High_EW_ATL'] / ewma_df['IMA Decel High_EW_CTL'] ewma_df['Expl Dist EWMA'] = ewma_df['Explosive Distance_EW_ATL'] / ewma_df[ 'Explosive Distance_EW_CTL'] ewma_df['High IMA EWMA'] = ewma_df['High IMAs_EW_ATL'] / ewma_df[ 'High IMAs_EW_CTL'] ewma_df['Total Contact Load EWMA'] = ewma_df[ 'Total Contact Load_EW_ATL'] / ewma_df['Total Contact Load_EW_CTL'] ewma_df['Active Contact Load EWMA'] = ewma_df[ 'Active Contact Load_EW_ATL'] / ewma_df['Active Contact Load_EW_CTL'] ewma_df['Total Contacts EWMA'] = ewma_df[ 'Total Contacts_EW_ATL'] / ewma_df['Total Contacts_EW_CTL'] ewma_df['Active Contacts EWMA'] = ewma_df[ 'Active Contacts_EW_ATL'] / ewma_df['Active Contacts_EW_CTL'] pd.set_option('display.max_columns', None) pd.set_option('display.max_rows', None) # Merge ACWR and EWMA tables together remerged_df = rolling_df.merge(ewma_df, how='left', on=['Player Name', 'Date']) # Drop all the EWMA columns for ATL and CTL remerged_df = remerged_df.drop(columns=[ 'Total Distance_EW_ATL', 'Active Distance_EW_ATL', 'Distance > 12 mph_EW_ATL', 'Distance > 16 mph_EW_ATL', 'Efforts > 5 mph_EW_ATL', 'Efforts > 12 mph_EW_ATL', 'Efforts > 16 mph_EW_ATL', 'Total Throw Count_EW_ATL', 'Total Throw Load_EW_ATL', 'Hard Throws_EW_ATL', 'Hard Throw Load_EW_ATL', 'Active Throws_EW_ATL', 'Active Throw Load_EW_ATL', 'Total Player Load_EW_ATL', 'Active Player Load_EW_ATL', 'Medium Player Load_EW_ATL', 'High Player Load_EW_ATL', 'Total Metabolic Power Average Power_EW_ATL', 'Active Metabolic Power Average Power_EW_ATL', 'High Metabolic Power Average Power_EW_ATL', 'Total Metabolic Power Distance_EW_ATL', 'Active Metabolic Power Distance_EW_ATL', 'Explosive Distance_EW_ATL', 'High Metabolic Power Distance_EW_ATL', 'Total Metabolic Power Efforts_EW_ATL', 'Active Metabolic Power Efforts_EW_ATL', 'High Metabolic Power Efforts_EW_ATL', 'Total Contact Load_EW_ATL', 'Total Contacts_EW_ATL', 'Light Contact Load_EW_ATL', 'Medium Contact Load_EW_ATL', 'Hard Contact Load_EW_ATL', 'Light Contacts_EW_ATL', 'Medium Contacts_EW_ATL', 'Hard Contacts_EW_ATL', 'Active Contact Load_EW_ATL', 'Active Contacts_EW_ATL', 'IMA CoD Right High_EW_ATL', 'IMA CoD Left High_EW_ATL', 'IMA Accel High_EW_ATL', 'IMA Decel High_EW_ATL', 'IMA Jump Count High Band_EW_ATL', 'High IMAs_EW_ATL', 'Total Jumps_EW_ATL', 'Hard CoD_EW_ATL', 'Medium Decelerations_EW_ATL', 'Medium Accelerations_EW_ATL', 'High Accelerations_EW_ATL', 'High Decelerations_EW_ATL', 'Total Distance_EW_CTL', 'Active Distance_EW_CTL', 'Distance > 12 mph_EW_CTL', 'Distance > 16 mph_EW_CTL', 'Efforts > 5 mph_EW_CTL', 'Efforts > 12 mph_EW_CTL', 'Efforts > 16 mph_EW_CTL', 'Total Throw Count_EW_CTL', 'Total Throw Load_EW_CTL', 'Hard Throws_EW_CTL', 'Hard Throw Load_EW_CTL', 'Active Throws_EW_CTL', 'Active Throw Load_EW_CTL', 'Total Player Load_EW_CTL', 'Active Player Load_EW_CTL', 'Medium Player Load_EW_CTL', 'High Player Load_EW_CTL', 'Total Metabolic Power Average Power_EW_CTL', 'Active Metabolic Power Average Power_EW_CTL', 'High Metabolic Power Average Power_EW_CTL', 'Total Metabolic Power Distance_EW_CTL', 'Active Metabolic Power Distance_EW_CTL', 'Explosive Distance_EW_CTL', 'High Metabolic Power Distance_EW_CTL', 'Total Metabolic Power Efforts_EW_CTL', 'Active Metabolic Power Efforts_EW_CTL', 'High Metabolic Power Efforts_EW_CTL', 'Total Contact Load_EW_CTL', 'Total Contacts_EW_CTL', 'Light Contact Load_EW_CTL', 'Medium Contact Load_EW_CTL', 'Hard Contact Load_EW_CTL', 'Light Contacts_EW_CTL', 'Medium Contacts_EW_CTL', 'Hard Contacts_EW_CTL', 'Active Contact Load_EW_CTL', 'Active Contacts_EW_CTL', 'IMA CoD Right High_EW_CTL', 'IMA CoD Left High_EW_CTL', 'IMA Accel High_EW_CTL', 'IMA Decel High_EW_CTL', 'IMA Jump Count High Band_EW_CTL', 'High IMAs_EW_CTL', 'Total Jumps_EW_CTL', 'Hard CoD_EW_CTL', 'Medium Decelerations_EW_CTL', 'Medium Accelerations_EW_CTL', 'High Accelerations_EW_CTL', 'High Decelerations_EW_CTL' ], axis=1) remerged_df = remerged_df.replace(to_replace=[np.inf, -np.inf], value=1) # Get indexes for rolling averages as columns remerged_df = remerged_df.reset_index() if not os.path.exists(EWMA_LIST): remerged_df.to_csv(EWMA_LIST, index=False, mode='w') # Set the file permissions so that anyone can use it oschmod.set_mode(EWMA_LIST, stat.S_IRWXO or stat.S_IRWXU or stat.S_IRWXG)
def raw_met_power_cleaner(met_power_directory): clean_list = [] for file in os.listdir(met_power_directory): # Read in the test file dataset = pd.read_csv(file, header=9) dataset = pd.DataFrame(data=dataset) # Change data types to correct ones dataset['Date'] = pd.to_datetime(dataset['Date'], format='%m/%d/%Y') dataset['Total Duration'] = pd.to_datetime(dataset['Total Duration'], format='%H:%M:%S') dataset['Metabolic Power Band 1 Total Duration'] = pd.to_datetime( dataset['Metabolic Power Band 1 Total Duration'], format='%H:%M:%S') dataset['Metabolic Power Band 2 Total Duration'] = pd.to_datetime( dataset['Metabolic Power Band 2 Total Duration'], format='%H:%M:%S') dataset['Metabolic Power Band 3 Total Duration'] = pd.to_datetime( dataset['Metabolic Power Band 3 Total Duration'], format='%H:%M:%S') dataset['Metabolic Power Band 4 Total Duration'] = pd.to_datetime( dataset['Metabolic Power Band 4 Total Duration'], format='%H:%M:%S') dataset['Metabolic Power Band 5 Total Duration'] = pd.to_datetime( dataset['Metabolic Power Band 5 Total Duration'], format='%H:%M:%S') dataset['Metabolic Power Band 6 Total Duration'] = pd.to_datetime( dataset['Metabolic Power Band 6 Total Duration'], format='%H:%M:%S') # Remove last column dataset = dataset.iloc[:, :-1] # Change names of metabolic power average power bands dataset["Total Metabolic Power Average Power"] = ( dataset["Metabolic Power Band 1 Average Power"] + dataset["Metabolic Power Band 2 Average Power"] + dataset["Metabolic Power Band 3 Average Power"] + dataset["Metabolic Power Band 4 Average Power"] + dataset["Metabolic Power Band 5 Average Power"] + dataset["Metabolic Power Band 6 Average Power"]) dataset["Active Metabolic Power Average Power"] = ( dataset["Metabolic Power Band 2 Average Power"] + dataset["Metabolic Power Band 3 Average Power"] + dataset["Metabolic Power Band 4 Average Power"] + dataset["Metabolic Power Band 5 Average Power"] + dataset["Metabolic Power Band 6 Average Power"]) dataset["High Metabolic Power Average Power"] = ( dataset['Metabolic Power Band 5 Average Power'] + dataset['Metabolic Power Band 6 Average Power']) dataset = dataset.drop(columns=[ 'Metabolic Power Band 1 Average Power', 'Metabolic Power Band 2 Average Power', 'Metabolic Power Band 3 Average Power', 'Metabolic Power Band 4 Average Power', 'Metabolic Power Band 5 Average Power', 'Metabolic Power Band 6 Average Power' ], axis=1) # Change names of metabolic power total distance bands dataset["Total Metabolic Power Distance"] = ( dataset["Metabolic Power Band 1 Total Distance"] + dataset["Metabolic Power Band 2 Total Distance"] + dataset["Metabolic Power Band 3 Total Distance"] + dataset["Metabolic Power Band 4 Total Distance"] + dataset["Metabolic Power Band 5 Total Distance"] + dataset["Metabolic Power Band 6 Total Distance"]) dataset["Active Metabolic Power Distance"] = ( dataset["Metabolic Power Band 2 Total Distance"] + dataset["Metabolic Power Band 3 Total Distance"] + dataset["Metabolic Power Band 4 Total Distance"] + dataset["Metabolic Power Band 5 Total Distance"] + dataset["Metabolic Power Band 6 Total Distance"]) dataset["Explosive Distance"] = ( dataset["Metabolic Power Band 3 Total Distance"] + dataset["Metabolic Power Band 4 Total Distance"] + dataset["Metabolic Power Band 5 Total Distance"] + dataset["Metabolic Power Band 6 Total Distance"]) dataset["High Metabolic Power Distance"] = ( dataset['Metabolic Power Band 5 Total Distance'] + dataset['Metabolic Power Band 6 Total Distance']) dataset = dataset.drop(columns=[ 'Metabolic Power Band 1 Total Distance', 'Metabolic Power Band 2 Total Distance', 'Metabolic Power Band 3 Total Distance', 'Metabolic Power Band 4 Total Distance', 'Metabolic Power Band 5 Total Distance', 'Metabolic Power Band 6 Total Distance' ], axis=1) # Get rid of of metabolic power total duration bands dataset = dataset.drop(columns=[ 'Metabolic Power Band 1 Total Duration', 'Metabolic Power Band 2 Total Duration', 'Metabolic Power Band 3 Total Duration', 'Metabolic Power Band 4 Total Duration', 'Metabolic Power Band 5 Total Duration', 'Metabolic Power Band 6 Total Duration' ], axis=1) # Change names of metabolic power total effort bands dataset["Total Metabolic Power Efforts"] = ( dataset["Metabolic Power Band 2 Total # Efforts"] + dataset["Metabolic Power Band 3 Total # Efforts"] + dataset["Metabolic Power Band 4 Total # Efforts"] + dataset["Metabolic Power Band 5 Total # Efforts"] + dataset["Metabolic Power Band 6 Total # Efforts"]) dataset["Active Metabolic Power Efforts"] = ( dataset["Metabolic Power Band 3 Total # Efforts"] + dataset["Metabolic Power Band 4 Total # Efforts"] + dataset["Metabolic Power Band 5 Total # Efforts"] + dataset["Metabolic Power Band 6 Total # Efforts"]) dataset["High Metabolic Power Efforts"] = ( dataset['Metabolic Power Band 5 Total # Efforts'] + dataset['Metabolic Power Band 6 Total # Efforts']) dataset = dataset.drop(columns=[ 'Metabolic Power Band 2 Total # Efforts', 'Metabolic Power Band 3 Total # Efforts', 'Metabolic Power Band 4 Total # Efforts', 'Metabolic Power Band 5 Total # Efforts', 'Metabolic Power Band 6 Total # Efforts' ], axis=1) clean_list.append(dataset) print(file) # Concatenate all raw files into a single .csv file with clean data big_clean_list = pd.concat(clean_list) if not os.path.exists(MET_POWER_LIST): big_clean_list.to_csv(MET_POWER_LIST, index=False, mode='w') # Set the file permissions so that anyone can use it oschmod.set_mode(MET_POWER_LIST, stat.S_IRWXO or stat.S_IRWXU or stat.S_IRWXG)
def all_time_speed(): # Fetch table from postgres -> Roster Table roster_df = fetch_table(select_clean_roster_table) roster_df = pd.DataFrame(roster_df, columns=["Player Name", "Position", "Group"]) # Fetch table from postgres -> Velocity Table velo_df = fetch_table(select_clean_velo_table) velo_df = pd.DataFrame(velo_df, columns=[ "Player Name", "Period Name", "Period Number", "Date", "Total Duration", "Total Distance", "Active Distance", "Max Velocity (mph)", "Distance > 5 mph", "Distance > 12 mph", "Distance > 16 mph", "Efforts > 5 mph", "Efforts > 12 mph", "Efforts > 16 mph" ]) velo_df["Date"] = pd.to_datetime(velo_df["Date"], format='%Y-%m-%d') velo_df["Total Duration"] = pd.to_datetime(velo_df["Total Duration"], format='%H:%M:%S') velo_df["Total Distance"] = pd.to_numeric(velo_df["Total Distance"]) velo_df["Active Distance"] = pd.to_numeric(velo_df["Active Distance"]) velo_df["Max Velocity (mph)"] = pd.to_numeric( velo_df["Max Velocity (mph)"]) velo_df["Distance > 5 mph"] = pd.to_numeric(velo_df["Distance > 5 mph"]) velo_df["Distance > 12 mph"] = pd.to_numeric(velo_df["Distance > 12 mph"]) velo_df["Distance > 16 mph"] = pd.to_numeric(velo_df["Distance > 16 mph"]) velo_df["Efforts > 5 mph"] = pd.to_numeric(velo_df["Efforts > 5 mph"]) velo_df["Efforts > 12 mph"] = pd.to_numeric(velo_df["Efforts > 12 mph"]) velo_df["Efforts > 16 mph"] = pd.to_numeric(velo_df["Efforts > 16 mph"]) # Merge this stuff together right meow merged_df = velo_df.merge(roster_df, how='left', on=['Player Name']) # Drop everything but the max speed, player name, period name, period number, and date merged_df = merged_df.drop(columns=[ "Total Duration", "Total Distance", "Active Distance", "Distance > 5 mph", "Distance > 12 mph", "Distance > 16 mph", "Efforts > 5 mph", "Efforts > 12 mph", "Efforts > 16 mph", "Position", "Group" ], axis=1) # Get max speed for each date merged_df = merged_df[merged_df['Period Number'] == 0] merged_df = merged_df.groupby(by=['Player Name', 'Date'], as_index=False).max() merged_df = merged_df.drop(columns=["Period Name", "Period Number"], axis=1) max_speed_df = merged_df.groupby(by=['Player Name'], as_index=False).max() max_speed_df = max_speed_df.drop(columns=["Date"], axis=1) # Merge both datasets remerged_df = merged_df.merge(max_speed_df, how='left', on=['Player Name']) # Rename columns for max velocity (mph) remerged_df = remerged_df.rename( columns={ "Max Velocity (mph)_x": "Daily Max Velocity (mph)", "Max Velocity (mph)_y": "All-Time Max Velocity (mph)" }) remerged_df['% All-Time Max Velocity'] = ( remerged_df["Daily Max Velocity (mph)"] / remerged_df["All-Time Max Velocity (mph)"]) * 100 if not os.path.exists(MAX_SPEED_LIST): remerged_df.to_csv(MAX_SPEED_LIST, index=False, mode='w') # Set the file permissions so that anyone can use it oschmod.set_mode(MAX_SPEED_LIST, stat.S_IRWXO or stat.S_IRWXU or stat.S_IRWXG)
def raw_ima_cleaner(ima_directory): clean_list = [] for file in os.listdir(ima_directory): # Read in the test file dataset = pd.read_csv(file, header=9) dataset = pd.DataFrame(data=dataset) # Change data types to correct ones dataset['Date'] = pd.to_datetime(dataset['Date'], format='%m/%d/%Y') dataset['Total Duration'] = pd.to_datetime(dataset['Total Duration'], format='%H:%M:%S') # Remove last column dataset = dataset.iloc[:, :-1] # Change names of IMA Bands dataset["Low IMAs"] = dataset["IMA CoD Right Low"] + dataset[ "IMA CoD Left Low"] + dataset["IMA Accel Low"] + dataset[ "IMA Decel Low"] dataset["Medium IMAs"] = dataset["IMA CoD Right Medium"] + dataset[ "IMA CoD Left Medium"] + dataset["IMA Accel Medium"] + dataset[ "IMA Decel Medium"] dataset["High IMAs"] = dataset["IMA CoD Right High"] + dataset[ "IMA CoD Left High"] + dataset["IMA Accel High"] + dataset[ "IMA Decel High"] # Create IMA explosive percentage dataset['IMA Explosive %'] = ( (dataset["High IMAs"]) / (dataset["High IMAs"] + dataset["Medium IMAs"])) * 100 # Create IMA high jump percentage dataset['Total Jumps'] = dataset['IMA Jump Count Low Band'] + dataset[ 'IMA Jump Count Med Band'] + dataset['IMA Jump Count High Band'] # Create Hard CoD dataset['Hard CoD'] = dataset['IMA CoD Right High'] + dataset[ 'IMA CoD Left High'] # Drop a bunch of columns dataset = dataset.drop(columns=[ 'IMA CoD Right Low', 'IMA CoD Right Medium', 'IMA CoD Left Low', 'IMA CoD Left Medium', 'IMA Accel Low', 'IMA Accel Medium' ], axis=1) dataset = dataset.drop(columns=[ 'IMA Decel Low', 'IMA Decel Medium', 'IMA Jump Count Low Band', 'IMA Jump Count Med Band', 'Low IMAs', 'Medium IMAs' ], axis=1) clean_list.append(dataset) print(file) # Concatenate all raw files into a single .csv file with clean data big_clean_list = pd.concat(clean_list) if not os.path.exists(IMA_LIST): big_clean_list.to_csv(IMA_LIST, index=False, mode='w') # Set the file permissions so that anyone can use it oschmod.set_mode(IMA_LIST, stat.S_IRWXO or stat.S_IRWXU or stat.S_IRWXG)
def per_minute(): def total_seconds(timedelta): """Convert timedeltas to seconds In Python, time differences can take many formats. This function can take timedeltas in any format and return the corresponding number of seconds, as a float. Beware! Representing timedeltas as floats is not as precise as representing them as a timedelta object in datetime, numpy, or pandas. Parameters ---------- timedelta : various Time delta from python's datetime library or from numpy or pandas. If it is from numpy, it can be an ndarray with dtype datetime64. If it is from pandas, it can also be a Series of datetimes. However, this function cannot operate on entire pandas DataFrames. To convert a DataFrame, do df.apply(to_seconds) Returns ------- seconds : various Returns the total seconds in the input timedelta object(s) as float. If the input is a numpy ndarray or pandas Series, the output is the same, but with a float datatype. """ try: seconds = timedelta.total_seconds() except AttributeError: # no method total_seconds one_second = np.timedelta64(1000000000, 'ns') # use nanoseconds to get highest possible precision in output seconds = timedelta / one_second return seconds # Fetch table from postgres -> Games Table games_df = fetch_table(select_games_table) games_df = pd.DataFrame(games_df, columns=[ "Date", "Game_Num_Year", "Enemy", "Enemy_Abr", "Location", "Result", "Points_For", "Points_Against", "Game_Type" ]) games_df["Date"] = pd.to_datetime(games_df["Date"], format='%Y-%m-%d') # Fetch table from postgres -> Roster Table roster_df = fetch_table(select_clean_roster_table) roster_df = pd.DataFrame(roster_df, columns=["Player Name", "Position", "Group"]) # Fetch table from postgres -> Accel Table accel_df = fetch_table(select_clean_accel_table) accel_df = pd.DataFrame(accel_df, columns=[ "Player Name", "Period Name", "Period Number", "Date", "Total Duration", "Medium Decelerations", "Medium Accelerations", "High Accelerations", "High Decelerations" ]) accel_df["Date"] = pd.to_datetime(accel_df["Date"], format='%Y-%m-%d') accel_df["Total Duration"] = pd.to_datetime(accel_df["Total Duration"], format='%H:%M:%S') # Fetch table from postgres -> IMA Table ima_df = fetch_table(select_clean_ima_table) ima_df = pd.DataFrame(ima_df, columns=[ "Player Name", "Period Name", "Period Number", "Date", "Total Duration", "IMA CoD Right High", "IMA CoD Left High", "IMA Accel High", "IMA Decel High", "IMA Jump Count High Band", "High IMAs", "IMA Explosive %", "Total Jumps", "Hard CoD" ]) ima_df["Date"] = pd.to_datetime(ima_df["Date"], format='%Y-%m-%d') ima_df["Total Duration"] = pd.to_datetime(ima_df["Total Duration"], format='%H:%M:%S') ima_df["IMA Explosive %"] = pd.to_numeric(ima_df["IMA Explosive %"]) # Fetch table from postgres -> Linemen Contacts Table contacts_df = fetch_table(select_clean_line_contacts_table) contacts_df = pd.DataFrame( contacts_df, columns=[ "Player Name", "Period Name", "Period Number", "Date", "Total Duration", "Total Contact Load", "Total Contacts", "Light Contact Load", "Medium Contact Load", "Hard Contact Load", "Light Contacts", "Medium Contacts", "Hard Contacts", "Hard Contact Load %", "Hard Contacts %", "Active Contact Load", "Active Contacts" ]) contacts_df["Date"] = pd.to_datetime(contacts_df["Date"], format='%Y-%m-%d') contacts_df["Total Duration"] = pd.to_datetime( contacts_df["Total Duration"], format='%H:%M:%S') contacts_df["Total Contact Load"] = pd.to_numeric( contacts_df["Total Contact Load"]) contacts_df["Light Contact Load"] = pd.to_numeric( contacts_df["Light Contact Load"]) contacts_df["Medium Contact Load"] = pd.to_numeric( contacts_df["Medium Contact Load"]) contacts_df["Hard Contact Load"] = pd.to_numeric( contacts_df["Hard Contact Load"]) contacts_df["Active Contact Load"] = pd.to_numeric( contacts_df["Active Contact Load"]) contacts_df["Hard Contact Load %"] = pd.to_numeric( contacts_df["Hard Contact Load %"]) contacts_df["Hard Contacts %"] = pd.to_numeric( contacts_df["Hard Contact Load %"]) # Fetch table from postgres -> Metabolic Power Table met_power_df = fetch_table(select_clean_met_power_table) met_power_df = pd.DataFrame( met_power_df, columns=[ "Player Name", "Period Name", "Period Number", "Date", "Total Duration", "Meta Energy (Cal/kg)", "Total Metabolic Power Average Power", "Active Metabolic Power Average Power", "High Metabolic Power Average Power", "Total Metabolic Power Distance", "Active Metabolic Power Distance", "Explosive Distance", "High Metabolic Power Distance", "Total Metabolic Power Efforts", "Active Metabolic Power Efforts", "High Metabolic Power Efforts" ]) met_power_df["Date"] = pd.to_datetime(met_power_df["Date"], format='%Y-%m-%d') met_power_df["Total Duration"] = pd.to_datetime( met_power_df["Total Duration"], format='%H:%M:%S') met_power_df["Meta Energy (Cal/kg)"] = pd.to_numeric( met_power_df["Meta Energy (Cal/kg)"]) met_power_df["Total Metabolic Power Average Power"] = pd.to_numeric( met_power_df["Total Metabolic Power Average Power"]) met_power_df["Active Metabolic Power Average Power"] = pd.to_numeric( met_power_df["Active Metabolic Power Average Power"]) met_power_df["High Metabolic Power Average Power"] = pd.to_numeric( met_power_df["High Metabolic Power Average Power"]) met_power_df["Total Metabolic Power Distance"] = pd.to_numeric( met_power_df["Total Metabolic Power Distance"]) met_power_df["Active Metabolic Power Distance"] = pd.to_numeric( met_power_df["Active Metabolic Power Distance"]) met_power_df["Explosive Distance"] = pd.to_numeric( met_power_df["Explosive Distance"]) met_power_df["High Metabolic Power Distance"] = pd.to_numeric( met_power_df["High Metabolic Power Distance"]) met_power_df["Total Metabolic Power Efforts"] = pd.to_numeric( met_power_df["Total Metabolic Power Efforts"]) met_power_df["Active Metabolic Power Efforts"] = pd.to_numeric( met_power_df["Active Metabolic Power Efforts"]) met_power_df["High Metabolic Power Efforts"] = pd.to_numeric( met_power_df["High Metabolic Power Efforts"]) # Fetch table from postgres -> Player Load Table pl_df = fetch_table(select_clean_pl_table) pl_df = pd.DataFrame(pl_df, columns=[ "Player Name", "Period Name", "Period Number", "Date", "Total Duration", "Total Player Load", "Active Player Load", "Medium Player Load", "High Player Load" ]) pl_df["Date"] = pd.to_datetime(pl_df["Date"], format='%Y-%m-%d') pl_df["Total Duration"] = pd.to_datetime(pl_df["Total Duration"], format='%H:%M:%S') pl_df["Total Player Load"] = pd.to_numeric(pl_df["Total Player Load"]) pl_df["Active Player Load"] = pd.to_numeric(pl_df["Active Player Load"]) pl_df["Medium Player Load"] = pd.to_numeric(pl_df["Medium Player Load"]) pl_df["High Player Load"] = pd.to_numeric(pl_df["High Player Load"]) # Fetch table from postgres -> Symmetry Table symm_df = fetch_table(select_clean_symm_table) symm_df = pd.DataFrame(symm_df, columns=[ "Player Name", "Period Name", "Period Number", "Date", "Footstrikes", "Running Deviation", "Running Imbalance", "Running Series Count" ]) symm_df["Date"] = pd.to_datetime(symm_df["Date"], format='%Y-%m-%d') symm_df["Footstrikes"] = pd.to_numeric(symm_df["Footstrikes"]) symm_df["Running Deviation"] = pd.to_numeric(symm_df["Running Deviation"]) symm_df["Running Imbalance"] = pd.to_numeric(symm_df["Running Imbalance"]) symm_df["Running Series Count"] = pd.to_numeric( symm_df["Running Series Count"]) # Fetch table from postgres -> Throws Table throws_df = fetch_table(select_clean_throws_table) throws_df = pd.DataFrame(throws_df, columns=[ "Player Name", "Period Name", "Period Number", "Date", "Total Duration", "Total Throw Count", "Total Throw Load", "Hard Throws", "Hard Throw Load", "Active Throws", "Active Throw Load", "Hard Throw %", "Hard Throw Load %" ]) throws_df["Date"] = pd.to_datetime(throws_df["Date"], format='%Y-%m-%d') throws_df["Total Duration"] = pd.to_datetime(throws_df["Total Duration"], format='%H:%M:%S') throws_df["Total Throw Count"] = pd.to_numeric( throws_df["Total Throw Count"]) throws_df["Total Throw Load"] = pd.to_numeric( throws_df["Total Throw Load"]) throws_df["Hard Throws"] = pd.to_numeric(throws_df["Hard Throws"]) throws_df["Hard Throw Load"] = pd.to_numeric(throws_df["Hard Throw Load"]) throws_df["Active Throws"] = pd.to_numeric(throws_df["Active Throws"]) throws_df["Active Throw Load"] = pd.to_numeric( throws_df["Active Throw Load"]) throws_df["Hard Throw %"] = pd.to_numeric(throws_df["Hard Throw %"]) throws_df["Hard Throw Load %"] = pd.to_numeric( throws_df["Hard Throw Load %"]) # Fetch table from postgres -> Velocity Table velo_df = fetch_table(select_clean_velo_table) velo_df = pd.DataFrame(velo_df, columns=[ "Player Name", "Period Name", "Period Number", "Date", "Total Duration", "Total Distance", "Active Distance", "Maximum Velocity", "Distance > 5 mph", "Distance > 12 mph", "Distance > 16 mph", "Efforts > 5 mph", "Efforts > 12 mph", "Efforts > 16 mph" ]) velo_df["Date"] = pd.to_datetime(velo_df["Date"], format='%Y-%m-%d') velo_df["Total Duration"] = pd.to_datetime(velo_df["Total Duration"], format='%H:%M:%S') velo_df["Total Distance"] = pd.to_numeric(velo_df["Total Distance"]) velo_df["Active Distance"] = pd.to_numeric(velo_df["Active Distance"]) velo_df["Maximum Velocity"] = pd.to_numeric(velo_df["Maximum Velocity"]) velo_df["Distance > 5 mph"] = pd.to_numeric(velo_df["Distance > 5 mph"]) velo_df["Distance > 12 mph"] = pd.to_numeric(velo_df["Distance > 12 mph"]) velo_df["Distance > 16 mph"] = pd.to_numeric(velo_df["Distance > 16 mph"]) velo_df["Efforts > 5 mph"] = pd.to_numeric(velo_df["Efforts > 5 mph"]) velo_df["Efforts > 12 mph"] = pd.to_numeric(velo_df["Efforts > 12 mph"]) velo_df["Efforts > 16 mph"] = pd.to_numeric(velo_df["Efforts > 16 mph"]) # Fetch table from postgres -> All-Time Max Speed Table all_time_df = fetch_table(select_clean_all_max_speed_table) all_time_df = pd.DataFrame(all_time_df, columns=[ "Player Name", "Date", "Daily Max Velocity (mph)", "All-Time Max Velocity (mph)", "% All-Time Max Velocity" ]) all_time_df["Date"] = pd.to_datetime(all_time_df["Date"], format='%Y-%m-%d') all_time_df["Daily Max Velocity (mph)"] = pd.to_numeric( all_time_df["Daily Max Velocity (mph)"]) all_time_df["All-Time Max Velocity (mph)"] = pd.to_numeric( all_time_df["All-Time Max Velocity (mph)"]) all_time_df["% All-Time Max Velocity"] = pd.to_numeric( all_time_df["% All-Time Max Velocity"]) # Fetch table from postgres -> Schedule Table schedule_df = fetch_table(select_schedule_table) schedule_df = pd.DataFrame(schedule_df, columns=[ "Date", "Day", "Enemy", "Day_Type", "Day_ID", "Attire", "MD_Minus" ]) schedule_df["Date"] = pd.to_datetime(schedule_df["Date"], format='%Y-%m-%d') # Fetch table from postgres -> Bodyweight Table bw_df = fetch_table(select_bw_table) bw_df = pd.DataFrame( bw_df, columns=["Player Name", "Bodyweight (lbs)", "Bodyweight (kg)"]) bw_df["Bodyweight (lbs)"] = pd.to_numeric(bw_df["Bodyweight (lbs)"]) bw_df["Bodyweight (kg)"] = pd.to_numeric(bw_df["Bodyweight (kg)"]) # Fetch table from postgres -> Week Table week_df = fetch_table(select_week_table) week_df = pd.DataFrame(week_df, columns=["Date", "Phase", "Week_Num"]) week_df["Date"] = pd.to_datetime(week_df["Date"], format='%Y-%m-%d') # Merge all dataframes into one massive dataframe!!! merged_df = accel_df.merge(ima_df, how='left', on=[ 'Player Name', 'Period Name', 'Period Number', 'Date', 'Total Duration' ]) merged_df = merged_df.merge(contacts_df, how='left', on=[ 'Player Name', 'Period Name', 'Period Number', 'Date', 'Total Duration' ]) merged_df = merged_df.merge(met_power_df, how='left', on=[ 'Player Name', 'Period Name', 'Period Number', 'Date', 'Total Duration' ]) merged_df = merged_df.merge(pl_df, how='left', on=[ 'Player Name', 'Period Name', 'Period Number', 'Date', 'Total Duration' ]) merged_df = merged_df.merge( symm_df, how='left', on=['Player Name', 'Period Name', 'Period Number', 'Date']) merged_df = merged_df.merge(throws_df, how='left', on=[ 'Player Name', 'Period Name', 'Period Number', 'Date', 'Total Duration' ]) merged_df = merged_df.merge(velo_df, how='left', on=[ 'Player Name', 'Period Name', 'Period Number', 'Date', 'Total Duration' ]) merged_df = merged_df.merge(roster_df, how='left', on=['Player Name']) merged_df = merged_df.merge(games_df, how='left', on=['Date']) merged_df = merged_df.merge(all_time_df, how='left', on=['Player Name', 'Date']) merged_df = merged_df.merge(schedule_df, how='left', on=['Date']) merged_df = merged_df.merge(week_df, how='left', on=['Date']) merged_df = merged_df.merge(bw_df, how='left', on=['Player Name']) # Rename the enemy column merged_df = merged_df.rename(columns={ "Enemy_x": "Opponent", "Enemy_y": "Enemy" }) # Calculate Calories from Meta Energy (Cal/kg) merged_df['Calories'] = merged_df['Meta Energy (Cal/kg)'] * merged_df[ 'Bodyweight (kg)'] # Create Total Duration in minutes zero_date = dt.datetime(1900, 1, 1, 0, 0) merged_df[ 'Total Duration (mins)'] = merged_df['Total Duration'] - zero_date merged_df['Minutes'] = total_seconds( merged_df['Total Duration (mins)']) / 60 # Create a per minute dataframe per_minute_df = merged_df # Create metrics per minute # Accel per_minute_df['High Accelerations per Minute'] = per_minute_df[ 'High Accelerations'] / per_minute_df['Minutes'] per_minute_df['High Decelerations per Minute'] = per_minute_df[ 'High Decelerations'] / per_minute_df['Minutes'] # IMA per_minute_df['High IMAs per Minute'] = per_minute_df[ 'High IMAs'] / per_minute_df['Minutes'] # Contact per_minute_df['Total Contact Load per Minute'] = per_minute_df[ 'Total Contact Load'] / per_minute_df['Minutes'] per_minute_df['Total Contacts per Minute'] = per_minute_df[ 'Total Contacts'] / per_minute_df['Minutes'] per_minute_df['Active Contact Load per Minute'] = per_minute_df[ 'Active Contact Load'] / per_minute_df['Minutes'] per_minute_df['Active Contacts per Minute'] = per_minute_df[ 'Active Contacts'] / per_minute_df['Minutes'] # Metabolic Power per_minute_df['Total Metabolic Power Distance per Minute'] = per_minute_df[ 'Total Metabolic Power Distance'] / per_minute_df['Minutes'] per_minute_df[ 'Active Metabolic Power Distance per Minute'] = per_minute_df[ 'Active Metabolic Power Distance'] / per_minute_df['Minutes'] per_minute_df['Explosive Distance per Minute'] = per_minute_df[ 'Explosive Distance'] / per_minute_df['Minutes'] # Player Load per_minute_df['Total Player Load per Minute'] = per_minute_df[ 'Total Player Load'] / per_minute_df['Minutes'] per_minute_df['Active Player Load per Minute'] = per_minute_df[ 'Active Player Load'] / per_minute_df['Minutes'] per_minute_df['Medium Player Load per Minute'] = per_minute_df[ 'Medium Player Load'] / per_minute_df['Minutes'] per_minute_df['High Player Load per Minute'] = per_minute_df[ 'High Player Load'] / per_minute_df['Minutes'] # Throws per_minute_df['Total Throw Count per Minute'] = per_minute_df[ 'Total Throw Count'] / per_minute_df['Minutes'] per_minute_df['Total Throw Load per Minute'] = per_minute_df[ 'Total Throw Load'] / per_minute_df['Minutes'] per_minute_df['Hard Throws per Minute'] = per_minute_df[ 'Hard Throws'] / per_minute_df['Minutes'] per_minute_df['Hard Throw Load per Minute'] = per_minute_df[ 'Hard Throw Load'] / per_minute_df['Minutes'] per_minute_df['Active Throws per Minute'] = per_minute_df[ 'Active Throws'] / per_minute_df['Minutes'] per_minute_df['Active Throw Load per Minute'] = per_minute_df[ 'Active Throw Load'] / per_minute_df['Minutes'] # Distance per_minute_df['Total Distance per Minute'] = per_minute_df[ 'Total Distance'] / per_minute_df['Minutes'] per_minute_df['Active Distance per Minute'] = per_minute_df[ 'Active Distance'] / per_minute_df['Minutes'] per_minute_df['Distance > 5 mph per Minute'] = per_minute_df[ 'Distance > 5 mph'] / per_minute_df['Minutes'] per_minute_df['Distance > 12 mph per Minute'] = per_minute_df[ 'Distance > 12 mph'] / per_minute_df['Minutes'] per_minute_df['Distance > 16 mph per Minute'] = per_minute_df[ 'Distance > 16 mph'] / per_minute_df['Minutes'] per_minute_df['Efforts > 5 mph per Minute'] = per_minute_df[ 'Efforts > 5 mph'] / per_minute_df['Minutes'] per_minute_df['Efforts > 12 mph per Minute'] = per_minute_df[ 'Efforts > 12 mph'] / per_minute_df['Minutes'] per_minute_df['Efforts > 16 mph per Minute'] = per_minute_df[ 'Efforts > 16 mph'] / per_minute_df['Minutes'] # Drop columns we don't need from per minute per_minute_df = per_minute_df.drop([ 'Medium Decelerations', 'Medium Accelerations', 'High Accelerations', 'High Decelerations', 'IMA CoD Right High' ], axis=1) per_minute_df = per_minute_df.drop([ 'IMA CoD Left High', 'IMA Accel High', 'IMA Decel High', 'IMA Jump Count High Band', 'High IMAs' ], axis=1) per_minute_df = per_minute_df.drop([ 'IMA Explosive %', 'Total Jumps', 'Hard CoD', 'Total Contact Load', 'Total Contacts' ], axis=1) per_minute_df = per_minute_df.drop([ 'Daily Max Velocity (mph)', 'All-Time Max Velocity (mph)', '% All-Time Max Velocity' ], axis=1) per_minute_df = per_minute_df.drop([ 'Light Contact Load', 'Medium Contact Load', 'Hard Contact Load', 'Light Contacts', 'Medium Contacts', 'Hard Contacts', 'Hard Contact Load %', 'Hard Contacts %', 'Active Contact Load', 'Active Contacts', 'Meta Energy (Cal/kg)', 'Total Metabolic Power Average Power', 'Active Metabolic Power Average Power', 'High Metabolic Power Average Power', 'Total Metabolic Power Distance', 'Active Metabolic Power Distance', 'Explosive Distance', 'High Metabolic Power Distance', 'Total Metabolic Power Efforts', 'Active Metabolic Power Efforts', 'High Metabolic Power Efforts', 'Total Player Load', 'Active Player Load', 'Medium Player Load', 'High Player Load', 'Footstrikes', 'Running Deviation', 'Running Imbalance', 'Running Series Count', 'Total Throw Count', 'Total Throw Load', 'Hard Throws', 'Hard Throw Load', 'Active Throws', 'Active Throw Load', 'Hard Throw %', 'Hard Throw Load %', 'Total Distance', 'Active Distance', 'Maximum Velocity', 'Distance > 5 mph', 'Distance > 12 mph', 'Distance > 16 mph', 'Efforts > 5 mph', 'Efforts > 12 mph', 'Efforts > 16 mph', 'Total Duration (mins)' ], axis=1) per_minute_df = per_minute_df.drop([ 'Position', 'Group', 'Game_Num_Year', 'Opponent', 'Enemy_Abr', 'Location', 'Result', 'Points_For', 'Points_Against', 'Game_Type', 'Day', 'Enemy', 'Day_Type', 'Day_ID', 'Attire', 'MD_Minus', 'Phase', 'Week_Num', 'Bodyweight (lbs)', 'Bodyweight (kg)', ], axis=1) # Drop columns we don't need from merged merged_df = merged_df.drop([ 'High Accelerations per Minute', 'High Decelerations per Minute', 'High IMAs per Minute', 'Total Contact Load per Minute', 'Total Contacts per Minute', 'Active Contact Load per Minute', 'Active Contacts per Minute', 'Total Metabolic Power Distance per Minute', 'Active Metabolic Power Distance per Minute', 'Explosive Distance per Minute', 'Total Player Load per Minute', 'Active Player Load per Minute', 'Medium Player Load per Minute', 'High Player Load per Minute', 'Total Throw Count per Minute', 'Total Throw Load per Minute', 'Hard Throws per Minute', 'Hard Throw Load per Minute', 'Active Throws per Minute', 'Active Throw Load per Minute', 'Total Distance per Minute', 'Active Distance per Minute', 'Distance > 5 mph per Minute', 'Distance > 12 mph per Minute', 'Distance > 16 mph per Minute', 'Efforts > 5 mph per Minute', 'Efforts > 12 mph per Minute', 'Efforts > 16 mph per Minute' ], axis=1) per_minute_df = per_minute_df.replace(to_replace=[np.inf, -np.inf], value=1) if not os.path.exists(PER_MIN_LIST): per_minute_df.to_csv(PER_MIN_LIST, index=False, mode='w') # Set the file permissions so that anyone can use it oschmod.set_mode(PER_MIN_LIST, stat.S_IRWXO or stat.S_IRWXU or stat.S_IRWXG)
def week_metrics(): # Fetch table from postgres -> Roster Table roster_df = fetch_table(select_clean_roster_table) roster_df = pd.DataFrame(roster_df, columns=["Player Name", "Position", "Group"]) # Fetch table from postgres -> Schedule Table schedule_df = fetch_table(select_schedule_table) schedule_df = pd.DataFrame(schedule_df, columns=[ "Date", "Day", "Enemy", "Day_Type", "Day_ID", "Attire", "MD_Minus" ]) schedule_df["Date"] = pd.to_datetime(schedule_df["Date"], format='%Y-%m-%d') # Fetch table from postgres -> Week Table week_df = fetch_table(select_week_table) week_df = pd.DataFrame(week_df, columns=["Date", "Phase", "Week_Num"]) week_df["Date"] = pd.to_datetime(week_df["Date"], format='%Y-%m-%d') week_df['Week_Num'] = week_df['Week_Num'].astype(str) # Fetch table from postgres -> Accel Table accel_df = fetch_table(select_clean_accel_table) accel_df = pd.DataFrame(accel_df, columns=[ "Player Name", "Period Name", "Period Number", "Date", "Total Duration", "Medium Decelerations", "Medium Accelerations", "High Decelerations", "High Accelerations" ]) accel_df["Date"] = pd.to_datetime(accel_df["Date"], format='%Y-%m-%d') accel_df["Total Duration"] = pd.to_datetime(accel_df["Total Duration"], format='%H:%M:%S') # Fetch table from postgres -> Linemen Contacts Table line_contacts_df = fetch_table(select_clean_line_contacts_table) line_contacts_df = pd.DataFrame( line_contacts_df, columns=[ "Player Name", "Period Name", "Period Number", "Date", "Total Duration", "Total Contact Load", "Total Contacts", "Light Contact Load", "Medium Contact Load", "Hard Contact Load", "Light Contacts", "Medium Contacts", "Hard Contacts", "Hard Contact Load %", "Hard Contacts %", "Active Contact Load", "Active Contacts" ]) line_contacts_df["Date"] = pd.to_datetime(line_contacts_df["Date"], format='%Y-%m-%d') line_contacts_df["Total Duration"] = pd.to_datetime( line_contacts_df["Total Duration"], format='%H:%M:%S') line_contacts_df = line_contacts_df.drop( columns=['Hard Contact Load %', 'Hard Contacts %'], axis=1) line_contacts_df["Total Contact Load"] = pd.to_numeric( line_contacts_df["Total Contact Load"]) line_contacts_df["Light Contact Load"] = pd.to_numeric( line_contacts_df["Total Contact Load"]) line_contacts_df["Medium Contact Load"] = pd.to_numeric( line_contacts_df["Total Contact Load"]) line_contacts_df["Hard Contact Load"] = pd.to_numeric( line_contacts_df["Total Contact Load"]) line_contacts_df["Active Contact Load"] = pd.to_numeric( line_contacts_df["Active Contact Load"]) # Fetch table from postgres -> Player Load Table pl_df = fetch_table(select_clean_pl_table) pl_df = pd.DataFrame(pl_df, columns=[ "Player Name", "Period Name", "Period Number", "Date", "Total Duration", "Total Player Load", "Active Player Load", "Medium Player Load", "High Player Load" ]) pl_df["Date"] = pd.to_datetime(pl_df["Date"], format='%Y-%m-%d') pl_df["Total Duration"] = pd.to_datetime(pl_df["Total Duration"], format='%H:%M:%S') pl_df["Total Player Load"] = pd.to_numeric(pl_df["Total Player Load"]) pl_df["Active Player Load"] = pd.to_numeric(pl_df["Active Player Load"]) pl_df["Medium Player Load"] = pd.to_numeric(pl_df["Medium Player Load"]) pl_df["High Player Load"] = pd.to_numeric(pl_df["High Player Load"]) # Fetch table from postgres -> Throws Table throws_df = fetch_table(select_clean_throws_table) throws_df = pd.DataFrame(throws_df, columns=[ "Player Name", "Period Name", "Period Number", "Date", "Total Duration", "Total Throw Count", "Total Throw Load", "Hard Throws", "Hard Throw Load", "Active Throws", "Active Throw Load", "Hard Throw %", "Hard Throw Load %" ]) throws_df["Date"] = pd.to_datetime(throws_df["Date"], format='%Y-%m-%d') throws_df["Total Duration"] = pd.to_datetime(throws_df["Total Duration"], format='%H:%M:%S') throws_df = throws_df.drop(columns=['Hard Throw %', 'Hard Throw Load %'], axis=1) throws_df["Total Throw Load"] = pd.to_numeric( throws_df["Total Throw Load"]) throws_df["Hard Throw Load"] = pd.to_numeric(throws_df["Hard Throw Load"]) throws_df["Active Throw Load"] = pd.to_numeric( throws_df["Active Throw Load"]) # Fetch table from postgres -> Velocity Table velo_df = fetch_table(select_clean_velo_table) velo_df = pd.DataFrame(velo_df, columns=[ "Player Name", "Period Name", "Period Number", "Date", "Total Duration", "Total Distance", "Active Distance", "Max Velocity (mph)", "Distance > 5 mph", "Distance > 12 mph", "Distance > 16 mph", "Efforts > 5 mph", "Efforts > 12 mph", "Efforts > 16 mph" ]) velo_df["Date"] = pd.to_datetime(velo_df["Date"], format='%Y-%m-%d') velo_df["Total Duration"] = pd.to_datetime(velo_df["Total Duration"], format='%H:%M:%S') velo_df["Total Distance"] = pd.to_numeric(velo_df["Total Distance"]) velo_df["Active Distance"] = pd.to_numeric(velo_df["Active Distance"]) velo_df["Max Velocity (mph)"] = pd.to_numeric( velo_df["Max Velocity (mph)"]) velo_df["Distance > 5 mph"] = pd.to_numeric(velo_df["Distance > 5 mph"]) velo_df["Distance > 12 mph"] = pd.to_numeric(velo_df["Distance > 12 mph"]) velo_df["Distance > 16 mph"] = pd.to_numeric(velo_df["Distance > 16 mph"]) velo_df["Efforts > 5 mph"] = pd.to_numeric(velo_df["Efforts > 5 mph"]) velo_df["Efforts > 12 mph"] = pd.to_numeric(velo_df["Efforts > 12 mph"]) velo_df["Efforts > 16 mph"] = pd.to_numeric(velo_df["Efforts > 16 mph"]) # Set velo_df to only give session period name and period number "0" velo_df = velo_df[velo_df['Period Name'] == 'Session'] velo_df = velo_df[velo_df['Period Number'] == 0] # Get Year schedule_df['Year'] = pd.to_datetime(schedule_df['Date']).dt.to_period('Y') week_df['Year'] = pd.to_datetime(week_df['Date']).dt.to_period('Y') velo_df['Year'] = pd.to_datetime(velo_df['Date']).dt.to_period('Y') schedule_df['Year'] = schedule_df['Year'].astype(str) week_df['Year'] = week_df['Year'].astype(str) velo_df['Year'] = velo_df['Year'].astype(str) # Merge this stuff together right meow merged_df = schedule_df.merge(week_df, how='left', on=['Date', 'Year']) merged_df = merged_df.merge(velo_df, how='left', on=['Date', 'Year']) merged_df = merged_df.merge(roster_df, how='left', on=['Player Name']) merged_df = merged_df.merge(throws_df, how='left', on=[ 'Player Name', 'Period Name', 'Period Number', 'Date', 'Total Duration' ]) merged_df = merged_df.merge(pl_df, how='left', on=[ 'Player Name', 'Period Name', 'Period Number', 'Date', 'Total Duration' ]) merged_df = merged_df.merge(line_contacts_df, how='left', on=[ 'Player Name', 'Period Name', 'Period Number', 'Date', 'Total Duration' ]) merged_df = merged_df.merge(accel_df, how='left', on=[ 'Player Name', 'Period Name', 'Period Number', 'Date', 'Total Duration' ]) # Fix max velocity so it is max speed for the week max_vel_df = merged_df.groupby( by=['Player Name', 'Week_Num', 'Year', 'Position', 'Phase'], as_index=False)['Max Velocity (mph)'].max() week_dist_df = merged_df.groupby( by=['Player Name', 'Week_Num', 'Year', 'Position', 'Phase'], as_index=False).sum() week_dist_df = week_dist_df.drop(columns=['Max Velocity (mph)'], axis=1) remerged_df = week_dist_df.merge( max_vel_df, how='left', on=['Player Name', 'Week_Num', 'Year', 'Position', 'Phase']) remerged_df = remerged_df.drop( columns=['Distance > 5 mph', 'Period Number'], axis=1) if not os.path.exists(WEEK_LIST): remerged_df.to_csv(WEEK_LIST, index=False, mode='w') # Set the file permissions so that anyone can use it oschmod.set_mode(WEEK_LIST, stat.S_IRWXO or stat.S_IRWXU or stat.S_IRWXG)