Esempio n. 1
0
def raw_symm_cleaner(symm_directory):
    clean_list = []
    for file in os.listdir(symm_directory):

        # Read in the test file
        dataset = pd.read_csv(file, header=9)
        dataset = pd.DataFrame(data=dataset)

        # Change data types to correct ones
        dataset['Date'] = pd.to_datetime(dataset['Date'], format='%m/%d/%Y')

        # Remove last column
        dataset = dataset.iloc[:, :-1]

        clean_list.append(dataset)
        print(file)

    # Concatenate all raw files into a single .csv file with clean data
    big_clean_list = pd.concat(clean_list)

    if not os.path.exists(SYMM_LIST):
        big_clean_list.to_csv(SYMM_LIST, index=False, mode='w')
        # Set the file permissions so that anyone can use it
        oschmod.set_mode(SYMM_LIST, stat.S_IRWXO or stat.S_IRWXU
                         or stat.S_IRWXG)
Esempio n. 2
0
    def _create_relay_info_file(self):
        relay_info_dir = self.credentials_folder
        relay_info_filename = None
        if not os.path.isdir(relay_info_dir):
            os.makedirs(relay_info_dir)

        if self.vm_name and self.resource_group_name:
            relay_info_filename = self.resource_group_name + "-" + self.vm_name + "-relay_info"

        relay_info_path = os.path.join(relay_info_dir, relay_info_filename)
        # Overwrite relay_info if it already exists in that folder.
        file_utils.delete_file(relay_info_path, f"{relay_info_path} already exists, and couldn't be overwritten.")
        file_utils.write_to_file(relay_info_path, 'w', connectivity_utils.format_relay_info_string(self.relay_info),
                                 f"Couldn't write relay information to file {relay_info_path}.", 'utf-8')
        oschmod.set_mode(relay_info_path, 0o644)
        # pylint: disable=broad-except
        try:
            expiration = datetime.datetime.fromtimestamp(self.relay_info.expires_on)
            expiration = expiration.strftime("%Y-%m-%d %I:%M:%S %p")
            colorama.init()
            print(Fore.GREEN + f"Generated relay information {relay_info_path} is valid until {expiration} "
                  "in local time." + Style.RESET_ALL)
        except Exception as e:
            logger.warning("Couldn't determine relay information expiration. Error: %s", str(e))

        return relay_info_path
Esempio n. 3
0
def raw_accel_cleaner(accel_directory):
    clean_list = []
    for file in os.listdir(accel_directory):

        # Read in the test file
        dataset = pd.read_csv(file, header=9)
        dataset = pd.DataFrame(data=dataset)

        # Change data types to correct ones
        dataset['Date'] = pd.to_datetime(dataset['Date'], format='%m/%d/%Y')
        dataset['Total Duration'] = pd.to_datetime(dataset['Total Duration'],
                                                   format='%H:%M:%S')

        # Remove last column
        dataset = dataset.iloc[:, :-1]

        # Change names of Acceleration Bands
        dataset["Medium Decelerations"] = dataset[
            "Acceleration Band 3 Total Effort Count"]
        dataset = dataset.drop(
            columns=['Acceleration Band 3 Total Effort Count'], axis=1)

        dataset["Medium Accelerations"] = dataset[
            "Acceleration Band 6 Total Effort Count"]
        dataset = dataset.drop(
            columns=['Acceleration Band 6 Total Effort Count'], axis=1)

        dataset["High Accelerations"] = dataset[
            "Acceleration Band 7 Total Effort Count"] + dataset[
                "Acceleration Band 8 Total Effort Count"]
        dataset = dataset.drop(columns=[
            'Acceleration Band 7 Total Effort Count',
            'Acceleration Band 8 Total Effort Count'
        ],
                               axis=1)

        dataset["High Decelerations"] = dataset[
            "Acceleration Band 1 Total Effort Count"] + dataset[
                "Acceleration Band 2 Total Effort Count"]
        dataset = dataset.drop(columns=[
            'Acceleration Band 1 Total Effort Count',
            'Acceleration Band 2 Total Effort Count'
        ],
                               axis=1)

        clean_list.append(dataset)
        print(file)

    # Concatenate all raw files into a single .csv file with clean data
    big_clean_list = pd.concat(clean_list)

    if not os.path.exists(ACCEL_LIST):
        big_clean_list.to_csv(ACCEL_LIST, index=False, mode='w')
        # Set the file permissions so that anyone can use it
        oschmod.set_mode(ACCEL_LIST, stat.S_IRWXO or stat.S_IRWXU
                         or stat.S_IRWXG)
def generate_grobid(overwrite=False):
    ''' Convert a pdf to a .tei.xml file via Grobid

    '''
    base = 'https://github.com/kermitt2/grobid/'
    # get latest Grobid release
    version = requests.get(base + 'releases/latest').url.split('/')[-1]

    if not os.path.exists(f'./cache/grobid-{version}'):
        print('\nInstalling Grobid!')
        try:
            print('Downloading and extracting...')
            zip_path, _ = urllib.request.urlretrieve(
                f'{base}archive/refs/tags/{version}.zip')
            with zipfile.ZipFile(zip_path, 'r') as f:
                f.extractall('./cache')

            print('Installing...')
            oschmod.set_mode(f'./cache/grobid-{version}/gradlew', '+x')
            subprocess.run(
                f'cd ./cache/grobid-{version} '
                '&& ./gradlew clean install',
                shell=True)
            exec_dir = f'./cache/grobid-{version}/grobid-home/'
            for folder in [exec_dir + 'pdf2xml', exec_dir + 'pdfalto']:
                for root, _, files in os.walk(folder):
                    for f in files:
                        oschmod.set_mode(os.path.join(root, f), '+x')

        except Exception as e:
            print(e)
            print('\nFailed to install Grobid!')

    print('\nConverting PDFs to XMLs via Grobid - this may take some time...')

    # Kill untracked server if exists
    subprocess.run(['./gradlew', '--stop'],
                   cwd=f'./cache/grobid-{version}',
                   stderr=subprocess.DEVNULL)

    p = subprocess.Popen(['./gradlew', 'run'],
                         cwd=f'./cache/grobid-{version}',
                         stdout=subprocess.DEVNULL)
    for _ in tqdm(range(20), desc='Initiating Grobid server'):
        time.sleep(1)  # wait for Grodid to run, might need to be longer

    if overwrite:
        shutil.rmtree('./cache/xml')

    client = GrobidClient(config_path='./resources/config.json')
    client.process('processFulltextDocument',
                   pdf_src,
                   tei_coordinates=False,
                   output=xml_src,
                   force=overwrite)
    p.terminate()
Esempio n. 5
0
 def test_safe_write(self, fixed_lines, expec_code, expec_newlines,
                     expec_err, chmod):
     with pytest.raises(expec_err):
         with sysu.reopenable_temp_file("".join(fixed_lines)) as tmp_path:
             set_mode(str(tmp_path), chmod)
             iou.safe_write(tmp_path, fixed_lines, "utf-8", expec_newlines)
             with open(tmp_path) as tmp:
                 assert tmp.read() == expec_code
             with open(tmp_path, "rb") as tmp:
                 assert expec_newlines.encode() in tmp.readline()
         raise sysu.Pass()
Esempio n. 6
0
def make_log_dir(log_dir):
    """
    Create logging directory if it does not exist.

    Args:
        log_dir: (:obj:`str`)
        Path to a directory.

    """
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)
        oschmod.set_mode(log_dir, 0o700)
Esempio n. 7
0
 def test_safe_read(self, content, expec_code, expec_newlines, expec_err,
                    chmod):
     with pytest.raises(expec_err):
         if expec_newlines:
             content = content.replace(os.linesep, expec_newlines)
         with sysu.reopenable_temp_file(content) as tmp_path:
             set_mode(str(tmp_path), chmod)
             # default param: permissions: tuple = (os.R_OK, os.W_OK).
             source_code, _, newlines = iou.safe_read(tmp_path)
             assert source_code == expec_code
             assert newlines == expec_newlines
         raise sysu.Pass()
Esempio n. 8
0
def main():
    """Provide main function for CLI."""
    parser = argparse.ArgumentParser(
        description='Change the mode (permissions) of a file or directory')
    parser.add_argument('-R', action='store_true',
                        help='apply mode recursively')
    parser.add_argument(
        'mode', nargs=1, help='octal or symbolic mode of the object')
    parser.add_argument('object', nargs=1, help='file or directory')

    args = parser.parse_args()
    mode = args.mode[0]
    obj = args.object[0]
    if args.R:
        oschmod.set_mode_recursive(obj, mode)
    else:
        oschmod.set_mode(obj, mode)
Esempio n. 9
0
    def run(self):
        event_handler = ContainerFilesHandler()
        event_handler.log_filename = self.log_filename
        event_handler.EXCLUDES = self.EXCLUDES
        event_handler.module_name = self.module_name

        # set 777 permission and allow container read/write/execute
        oschmod.set_mode(self.DIRECTORY_TO_WATCH, '777')

        self.observer.schedule(event_handler,
                               self.DIRECTORY_TO_WATCH,
                               recursive=True)
        self.observer.start()
        while not self.stop_execution:
            try:
                time.sleep(0.1)
            except Exception:
                pass
Esempio n. 10
0
def bw():
    # Read in the test file
    dataset = pd.read_csv(
        'C:\\Users\\Kyle Voigt\\Desktop\\fb_catapult\\raw_files\\bodyweight.csv'
    )
    dataset = pd.DataFrame(data=dataset)

    # Change data types to correct ones
    dataset['Date'] = pd.to_datetime(dataset['Date'], format='%m/%d/%Y')
    dataset['Bodyweight (lbs)'] = pd.to_numeric(dataset['Bodyweight (lbs)'])
    dataset = dataset[dataset['Bodyweight (lbs)'].notna()]
    dataset['Bodyweight (kg)'] = dataset['Bodyweight (lbs)'] / 2.205

    # Find each athlete's median bodyweight in kg
    kg_df = dataset.groupby(['Player Name'], as_index=False).median()

    if not os.path.exists(BW_LIST):
        kg_df.to_csv(BW_LIST, index=False, mode='w')
        # Set the file permissions so that anyone can use it
        oschmod.set_mode(BW_LIST, stat.S_IRWXO or stat.S_IRWXU or stat.S_IRWXG)
Esempio n. 11
0
    def install(self):
        """
        Execute the watchmaker workers against the system.

        Upon successful execution, the system will be properly provisioned,
        according to the defined configuration and workers.
        """
        self.log.info('Start time: %s', datetime.datetime.now())
        self.log.info('Workers to execute: %s', self.config.keys())

        # Create watchmaker directories
        try:
            os.makedirs(self.system_params['workingdir'])
            oschmod.set_mode(self.system_params['prepdir'], 0o700)
        except OSError:
            if not os.path.exists(self.system_params['workingdir']):
                msg = ('Unable to create directory - {0}'.format(
                    self.system_params['workingdir']))
                self.log.critical(msg)
                raise

        workers_manager = self.workers_manager(
            system_params=self.system_params, workers=self.config)

        try:
            workers_manager.worker_cadence()
        except Exception:
            msg = 'Execution of the workers cadence has failed.'
            self.log.critical(msg)
            raise

        if self.no_reboot:
            self.log.info('Detected `no-reboot` switch. System will not be '
                          'rebooted.')
        else:
            self.log.info(
                'Reboot scheduled. System will reboot after the script '
                'exits.')
            subprocess.call(self.system_params['restart'], shell=True)
        self.log.info('Stop time: %s', datetime.datetime.now())
def generate_teis(missing_jats):
    # Put xmls in a temp dur for batch conversion
    temp_dir = os.getcwd() + '/cache/temp_jats/'
    for f in missing_jats:
        os.renames(jats_src + f, temp_dir + f)
    try:
        print('\nConverting XMLs from JAR to TEI!')
        oschmod.set_mode('./resources/Pub2TEI/Samples/saxon9he.jar', '+x')
        xslt_args = [
            '--parserFeature?uri=http%3A//apache.org/xml/features/nonvalidating/load-external-dtd:false',
            '-dtd:off', '-a:off', '-expand:off',
            '-xsl:./Stylesheets/Publishers.xsl', f'-s:{temp_dir}',
            f'-o:{xml_src}'
        ]
        subprocess.run(['java', '-jar', './Samples/saxon9he.jar', *xslt_args],
                       cwd=f'./resources/Pub2TEI')
    except Exception as e:
        print('An error occured: ', e)
        quit()

    # Return xmls to jar dir
    for f in missing_jats:
        os.renames(temp_dir + f, jats_src + f)
Esempio n. 13
0
def test_set_recursive():
    """Check file permissions are recursively set."""
    # create dirs
    topdir = 'testdir1'
    testdir = os.path.join(topdir, 'testdir2', 'testdir3')
    os.makedirs(testdir)

    # create files
    fileh = open(os.path.join(topdir, 'file1'), "w+")
    fileh.write("contents")
    fileh.close()

    fileh = open(os.path.join(testdir, 'file2'), "w+")
    fileh.write("contents")
    fileh.close()

    # set permissions to badness
    triple7 = stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR | stat.S_IRGRP\
        | stat.S_IWGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IWOTH\
        | stat.S_IXOTH
    oschmod.set_mode(topdir, triple7)
    oschmod.set_mode(testdir, triple7)
    oschmod.set_mode(os.path.join(topdir, 'file1'), triple7)
    oschmod.set_mode(os.path.join(testdir, 'file2'), triple7)
    time.sleep(1)  # modes aren't always ready to go immediately

    # set permissions - the test
    file_mode = 0o600
    dir_mode = 0o700
    oschmod.set_mode_recursive(topdir, file_mode, dir_mode)
    time.sleep(1)  # modes aren't always ready to go immediately

    # check it out
    assert oschmod.get_mode(topdir) == dir_mode
    assert oschmod.get_mode(os.path.join(topdir, 'testdir2')) == dir_mode
    assert oschmod.get_mode(testdir) == dir_mode
    assert oschmod.get_mode(os.path.join(topdir, 'file1')) == file_mode
    assert oschmod.get_mode(os.path.join(testdir, 'file2')) == file_mode

    # clean up
    shutil.rmtree(topdir)
Esempio n. 14
0
def test_permissions():
    """Tests for stuff."""
    test_dir = "tests"
    path = os.path.join(
        test_dir,
        ''.join(random.choice(string.ascii_letters)
                for i in range(10)) + '.txt')
    file_hdl = open(path, 'w+')
    file_hdl.write(path)
    file_hdl.close()
    oschmod.set_mode(path, stat.S_IRUSR | stat.S_IWUSR)
    assert oschmod.get_mode(path) == stat.S_IRUSR | stat.S_IWUSR

    path = os.path.join(
        test_dir,
        ''.join(random.choice(string.ascii_letters)
                for i in range(10)) + '.txt')
    file_hdl = open(path, 'w+')
    file_hdl.write(path)
    file_hdl.close()
    mode = stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR | stat.S_IRGRP | \
        stat.S_IWGRP | stat.S_IROTH | stat.S_IWOTH
    oschmod.set_mode(path, mode)
    assert oschmod.get_mode(path) == mode

    path = os.path.join(
        test_dir,
        ''.join(random.choice(string.ascii_letters)
                for i in range(10)) + '.txt')
    file_hdl = open(path, 'w+')
    file_hdl.write(path)
    file_hdl.close()
    mode = stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR | stat.S_IRGRP | \
        stat.S_IWGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IWOTH | \
        stat.S_IXOTH
    oschmod.set_mode(path, mode)
    assert oschmod.get_mode(path) == mode

    file_list = glob.glob(os.path.join(test_dir, "*txt"))
    for file_path in file_list:
        try:
            os.remove(file_path)
        except FileNotFoundError:
            print("Error while deleting file : ", file_path)
Esempio n. 15
0
def prepare_logging(log_dir, log_level):
    """
    Prepare the logger for handling messages to a file and/or to stdout.

    Args:
        log_dir: (:obj:`str`)
            Path to a directory. If set, Watchmaker logs to a file named
            ``watchmaker.log`` in the specified directory. Both the directory
            and the file will be created if necessary. If the file already
            exists, Watchmaker appends to it rather than overwriting it. If
            this argument evaluates to ``False``, then logging to a file is
            disabled. Watchmaker will always output to stdout/stderr.

        log_level: (:obj:`str`)
            Level to log at. Case-insensitive. Valid options include,
            from least to most verbose:

            - ``critical``
            - ``error``
            - ``warning``
            - ``info``
            - ``debug``

    """
    logformat = (
        '%(asctime)s [%(name)s][%(levelname)-5s][%(process)s]: %(message)s'
    )
    level = LOG_LEVELS[str(log_level).lower()]

    logging.basicConfig(format=logformat, level=level)

    if not log_dir:
        logging.warning(
            'Watchmaker will not be logging to a file!'
        )
    else:
        make_log_dir(log_dir)
        log_filename = os.sep.join((log_dir, 'watchmaker.log'))
        hdlr = logging.FileHandler(log_filename)
        oschmod.set_mode(log_filename, 0o600)
        hdlr.setLevel(level)
        hdlr.setFormatter(logging.Formatter(logformat))
        logging.getLogger().addHandler(hdlr)

    if HAS_PYWIN32:
        ehdlr = logging.handlers.NTEventLogHandler('Watchmaker')
        ehdlr.setLevel(level)
        ehdlr.setFormatter(logging.Formatter(logformat))
        logging.getLogger().addHandler(ehdlr)

    if HAS_PYWIN32 and EC2_CONFIG_DEPS:
        try:
            _enable_ec2_config_event_log()
            _configure_ec2_config_event_log()
        except (IOError, OSError) as exc:       # noqa: B014
            if exc.errno == errno.ENOENT:
                # PY2/PY3-compatible check for FileNotFoundError
                # EC2_CONFIG or EC2_LOG_CONFIG do not exist
                pass
            else:
                raise

    if HAS_PYWIN32 and EC2_LAUNCH_DEPS:
        try:
            _configure_ec2_launch_event_log()
            _schedule_ec2_launch_event_log()
        except (IOError, OSError) as exc:       # noqa: B014
            if exc.errno == errno.ENOENT:
                # PY2/PY3-compatible check for FileNotFoundError
                # EC2_LAUNCH_LOG_CONFIG or 'powershell.exe' do not exist
                pass
            else:
                raise
        except subprocess.CalledProcessError:
            # EC2_LAUNCH_SEND_EVENTS does not exist
            pass

# This function is designed to turn 8-bit codes into words - copyright belongs to @manmoleculo
def bit2word(bits):
    chars = []
    for b in range(int(len(bits) / 8)):
        byte = bits[b * 8:(b + 1) * 8]
        chars.append(chr(int(''.join([str(bit) for bit in byte]), 2)))
    return ''.join(chars)


# Get the secret message from the user and put it between { and }
message = "{" + raw_input("Enter your secret message: ") + '}'
print("Your message is : " + message)
msg_bit = (word2bit(message))
print("Your message will turn into ==> " + (''.join(str(x) for x in msg_bit)))
time.sleep(1)

print('\nTransfer begins now...')
for i in msg_bit:
    if i:
        oschmod.set_mode(filename, 0o777)  # Hex for -rwxrwxrwx access mode
        # os.chmod(filename, 0o777)  # in case oschmod does not work
    else:
        oschmod.set_mode(filename, 0o444)  # Hex for -r--r--r-- access mode
        # os.chmod(filename, 0o444)  # in case oschmod does not work
    time.sleep(0.1)
    sys.stdout.flush()  # You did not see anything, did you?!
    print(i, end='')
print('\n')
Esempio n. 17
0
def raw_pl_cleaner(pl_directory):
    clean_list = []
    for file in os.listdir(pl_directory):

        # Read in the test file
        dataset = pd.read_csv(file, header=9)
        dataset = pd.DataFrame(data=dataset)

        # Change data types to correct ones
        dataset['Date'] = pd.to_datetime(dataset['Date'], format='%m/%d/%Y')
        dataset['Total Duration'] = pd.to_datetime(dataset['Total Duration'],
                                                   format='%H:%M:%S')
        dataset['Player Load Band 1 Total Player Load'] = pd.to_numeric(
            dataset['Player Load Band 1 Total Player Load'])
        dataset['Player Load Band 2 Total Player Load'] = pd.to_numeric(
            dataset['Player Load Band 2 Total Player Load'])
        dataset['Player Load Band 3 Total Player Load'] = pd.to_numeric(
            dataset['Player Load Band 3 Total Player Load'])
        dataset['Player Load Band 4 Total Player Load'] = pd.to_numeric(
            dataset['Player Load Band 4 Total Player Load'])
        dataset['Player Load Band 5 Total Player Load'] = pd.to_numeric(
            dataset['Player Load Band 5 Total Player Load'])
        dataset['Player Load Band 6 Total Player Load'] = pd.to_numeric(
            dataset['Player Load Band 6 Total Player Load'])
        dataset['Player Load Band 7 Total Player Load'] = pd.to_numeric(
            dataset['Player Load Band 7 Total Player Load'])
        dataset['Player Load Band 8 Total Player Load'] = pd.to_numeric(
            dataset['Player Load Band 8 Total Player Load'])

        # Remove last column
        dataset = dataset.iloc[:, :-1]

        # Change names of Contact Load Bands
        dataset["Active Player Load"] = (
            dataset["Player Load Band 2 Total Player Load"] +
            dataset["Player Load Band 3 Total Player Load"] +
            dataset["Player Load Band 4 Total Player Load"] +
            dataset["Player Load Band 5 Total Player Load"] +
            dataset["Player Load Band 6 Total Player Load"] +
            dataset["Player Load Band 7 Total Player Load"] +
            dataset["Player Load Band 8 Total Player Load"])
        dataset["Medium Player Load"] = (
            dataset["Player Load Band 4 Total Player Load"] +
            dataset["Player Load Band 5 Total Player Load"] +
            dataset["Player Load Band 6 Total Player Load"] +
            dataset["Player Load Band 7 Total Player Load"] +
            dataset["Player Load Band 8 Total Player Load"])
        dataset["High Player Load"] = (
            dataset["Player Load Band 6 Total Player Load"] +
            dataset["Player Load Band 7 Total Player Load"] +
            dataset["Player Load Band 8 Total Player Load"])
        dataset = dataset.drop(columns=[
            'Player Load Band 1 Total Player Load',
            'Player Load Band 2 Total Player Load',
            'Player Load Band 3 Total Player Load',
            'Player Load Band 4 Total Player Load',
            'Player Load Band 5 Total Player Load',
            'Player Load Band 6 Total Player Load',
            'Player Load Band 7 Total Player Load',
            'Player Load Band 8 Total Player Load'
        ],
                               axis=1)

        clean_list.append(dataset)
        print(file)

    # Concatenate all raw files into a single .csv file with clean data
    big_clean_list = pd.concat(clean_list)

    if not os.path.exists(PL_LIST):
        big_clean_list.to_csv(PL_LIST, index=False, mode='w')
        # Set the file permissions so that anyone can use it
        oschmod.set_mode(PL_LIST, stat.S_IRWXO or stat.S_IRWXU or stat.S_IRWXG)
Esempio n. 18
0
def raw_throws_cleaner(throws_directory):
    clean_list = []
    for file in os.listdir(throws_directory):

        # Read in the test file
        dataset = pd.read_csv(file, header=9)
        dataset = pd.DataFrame(data=dataset)

        # Change data types to correct ones
        dataset['Date'] = pd.to_datetime(dataset['Date'], format='%m/%d/%Y')
        dataset['Total Duration'] = pd.to_datetime(dataset['Total Duration'],
                                                   format='%H:%M:%S')

        # Remove last column
        dataset = dataset.iloc[:, :-1]

        # Change names of different throw bands
        dataset['Easy Throws'] = dataset['Throw Load Band 1 Total Throws']
        dataset = dataset.drop(columns=['Throw Load Band 1 Total Throws'],
                               axis=1)
        dataset['Moderate Throws'] = dataset['Throw Load Band 2 Total Throws']
        dataset = dataset.drop(columns=['Throw Load Band 2 Total Throws'],
                               axis=1)
        dataset['Hard Throws'] = dataset[
            'Throw Load Band 3 Total Throws'] + dataset[
                'Throw Load Band 4 Total Throws'] + dataset[
                    'Throw Load Band 5 Total Throws']
        dataset = dataset.drop(columns=[
            'Throw Load Band 3 Total Throws', 'Throw Load Band 4 Total Throws',
            'Throw Load Band 5 Total Throws'
        ],
                               axis=1)

        # Change names of different throw load bands
        dataset['Easy Throw Load'] = dataset['Throw Load Band 1 Total Load']
        dataset = dataset.drop(columns=['Throw Load Band 1 Total Load'],
                               axis=1)
        dataset['Moderate Throw Load'] = dataset[
            'Throw Load Band 2 Total Load']
        dataset = dataset.drop(columns=['Throw Load Band 2 Total Load'],
                               axis=1)
        dataset['Hard Throw Load'] = dataset[
            'Throw Load Band 3 Total Load'] + dataset[
                'Throw Load Band 4 Total Load'] + dataset[
                    'Throw Load Band 5 Total Load']
        dataset = dataset.drop(columns=[
            'Throw Load Band 3 Total Load', 'Throw Load Band 4 Total Load',
            'Throw Load Band 5 Total Load'
        ],
                               axis=1)

        # Create metric that gets rid of easy throws
        dataset['Active Throws'] = dataset['Moderate Throws'] + dataset[
            'Hard Throws']
        dataset['Active Throw Load'] = dataset[
            'Moderate Throw Load'] + dataset['Hard Throw Load']
        dataset = dataset.drop(columns=['Easy Throws', 'Moderate Throws'],
                               axis=1)
        dataset = dataset.drop(
            columns=['Easy Throw Load', 'Moderate Throw Load'], axis=1)

        # Create metrics for % hard throws and % hard throw load
        dataset['Hard Throw %'] = (dataset['Hard Throws'] /
                                   dataset['Total Throw Count']) * 100
        dataset['Hard Throw Load %'] = (dataset['Hard Throw Load'] /
                                        dataset['Total Throw Load']) * 100

        clean_list.append(dataset)
        print(file)

    # Concatenate all raw files into a single .csv file with clean data
    big_clean_list = pd.concat(clean_list)

    if not os.path.exists(THROWS_LIST):
        big_clean_list.to_csv(THROWS_LIST, index=False, mode='w')
        # Set the file permissions so that anyone can use it
        oschmod.set_mode(THROWS_LIST, stat.S_IRWXO or stat.S_IRWXU
                         or stat.S_IRWXG)
Esempio n. 19
0
def raw_nordic_cleaner(nordic_directory):
    clean_list = []
    for file in os.listdir(nordic_directory):

        # Read in the test file
        dataset = pd.read_csv(file)
        dataset = pd.DataFrame(data=dataset)
        dataset['Date'] = pd.to_datetime(dataset['Date UTC'],
                                         format='%d/%m/%Y')
        dataset['Time PST'] = pd.to_datetime(dataset['Time UTC'],
                                             format='%I:%M %p')
        dataset = dataset.drop(
            columns=['ExternalId', 'Notes', 'Date UTC', 'Time UTC'], axis=1)
        dataset = dataset.rename(columns={"Name": "Player Name"})

        # Make sure player names are correct
        dataset = dataset.replace({
            'Abdul-Malik McClain': 'Malik McClain',
            'Adonis Russell-Otey': 'Adonis Otey',
            'Briton  Allen': 'Briton Allen',
            'Gary Bryant Jr.': 'Gary Bryant',
            'James  Toland': 'James Toland',
            'Joshua Jackson, Jr.': 'Josh Jackson',
            'Justin  Dedich': 'Justin Dedich',
            'Keoantay Ingram': 'Keaontay Ingram',
            'Maxzell Williams': 'Max Williams',
            'Thomas Maurice': 'Tommy Maurice',
            'Matt  Boermeester': 'Matt Boermeester',
            'Michael Jackson III': 'Michael Jackson',
            'Taylor  McNamara': 'Taylor McNamara',
            'Trevor  Trout': 'Trevor Trout',
            'Tuli Tuipulotu ': 'Tuli Tuipulotu',
            'Tyler  Petite': 'Tyler Petite'
        })

        # Fetch bodyweight table
        bw_df = fetch_table(select_bw_table)
        bw_df = pd.DataFrame(
            bw_df,
            columns=["Player Name", "Bodyweight (lbs)", "Bodyweight (kg)"])

        bw_df["Bodyweight (lbs)"] = pd.to_numeric(bw_df["Bodyweight (lbs)"])
        bw_df["Bodyweight (kg)"] = pd.to_numeric(bw_df["Bodyweight (kg)"])

        # Fetch roster table
        roster_df = fetch_table(select_clean_roster_table)
        roster_df = pd.DataFrame(roster_df,
                                 columns=["Player Name", "Position", "Group"])

        # Fetch schedule table
        schedule_df = fetch_table(select_schedule_table)
        schedule_df = pd.DataFrame(schedule_df,
                                   columns=[
                                       "Date", "Day", "Enemy", "Day_Type",
                                       "Day_ID", "Attire", "MD_Minus"
                                   ])
        schedule_df["Date"] = pd.to_datetime(schedule_df["Date"],
                                             format='%Y-%m-%d')

        # Create force divided by bodyweight metrics (lbs and kgs)
        merged_df = dataset.merge(bw_df, how='left', on=['Player Name'])
        merged_df = merged_df.merge(roster_df, how='left', on=['Player Name'])

        # Create force divided by bodyweight metrics (lbs and kgs)
        merged_df['L Max Force / BW (lbs)'] = merged_df[
            'L Max Force (N)'] / merged_df['Bodyweight (lbs)']
        merged_df['R Max Force / BW (lbs)'] = merged_df[
            'R Max Force (N)'] / merged_df['Bodyweight (lbs)']
        merged_df['L Max Force / BW (kg)'] = merged_df[
            'L Max Force (N)'] / merged_df['Bodyweight (kg)']
        merged_df['R Max Force / BW (kg)'] = merged_df[
            'R Max Force (N)'] / merged_df['Bodyweight (kg)']

        merged_df['L Avg Force / BW (lbs)'] = merged_df[
            'L Avg Force (N)'] / merged_df['Bodyweight (lbs)']
        merged_df['R Avg Force / BW (lbs)'] = merged_df[
            'R Avg Force (N)'] / merged_df['Bodyweight (lbs)']
        merged_df['L Avg Force / BW (kg)'] = merged_df[
            'L Avg Force (N)'] / merged_df['Bodyweight (kg)']
        merged_df['R Avg Force / BW (kg)'] = merged_df[
            'R Avg Force (N)'] / merged_df['Bodyweight (kg)']

        merged_df = merged_df.sort_values(by=['Player Name', 'Date'])

        clean_list.append(merged_df)
        print(file)

    # Concatenate all raw files into a single .csv file with clean data
    big_clean_list = pd.concat(clean_list)

    if not os.path.exists(NORDIC_LIST):
        big_clean_list.to_csv(NORDIC_LIST, index=False, mode='w')
        # Set the file permissions so that anyone can use it
        oschmod.set_mode(NORDIC_LIST, stat.S_IRWXO or stat.S_IRWXU
                         or stat.S_IRWXG)
Esempio n. 20
0
def raw_velo_cleaner(velo_directory):
    clean_list = []
    for file in os.listdir(velo_directory):

        # Read in the test file
        dataset = pd.read_csv(file, header=9)
        dataset = pd.DataFrame(data=dataset)

        # Change data types to correct ones
        dataset['Date'] = pd.to_datetime(dataset['Date'], format='%m/%d/%Y')
        dataset['Total Duration'] = pd.to_datetime(dataset['Total Duration'],
                                                   format='%H:%M:%S')

        # Remove last column
        dataset = dataset.iloc[:, :-1]

        # Change names of different velocity distance bands
        dataset['Distance > 5 mph'] = (
            dataset['Velocity Band 2 Total Distance'] +
            dataset['Velocity Band 3 Total Distance'] +
            dataset['Velocity Band 4 Total Distance'] +
            dataset['Velocity Band 5 Total Distance'] +
            dataset['Velocity Band 6 Total Distance'] +
            dataset['Velocity Band 7 Total Distance'] +
            dataset['Velocity Band 8 Total Distance'])
        dataset = dataset.drop(columns=[
            'Velocity Band 1 Total Distance', 'Velocity Band 2 Total Distance',
            'Velocity Band 3 Total Distance', 'Velocity Band 4 Total Distance'
        ],
                               axis=1)

        dataset['Distance > 12 mph'] = (
            dataset['Velocity Band 5 Total Distance'] +
            dataset['Velocity Band 6 Total Distance'] +
            dataset['Velocity Band 7 Total Distance'] +
            dataset['Velocity Band 8 Total Distance'])
        dataset = dataset.drop(columns=[
            'Velocity Band 5 Total Distance', 'Velocity Band 6 Total Distance'
        ],
                               axis=1)

        dataset['Distance > 16 mph'] = (
            dataset['Velocity Band 7 Total Distance'] +
            dataset['Velocity Band 8 Total Distance'])
        dataset = dataset.drop(columns=[
            'Velocity Band 7 Total Distance', 'Velocity Band 8 Total Distance'
        ],
                               axis=1)

        # Change names of different velocity effort bands
        dataset['Efforts > 5 mph'] = (
            dataset['Velocity Band 2 Total Effort Count'] +
            dataset['Velocity Band 3 Total Effort Count'] +
            dataset['Velocity Band 4 Total Effort Count'] +
            dataset['Velocity Band 5 Total Effort Count'] +
            dataset['Velocity Band 6 Total Effort Count'] +
            dataset['Velocity Band 7 Total Effort Count'] +
            dataset['Velocity Band 8 Total Effort Count'])
        dataset = dataset.drop(columns=[
            'Velocity Band 2 Total Effort Count',
            'Velocity Band 3 Total Effort Count',
            'Velocity Band 4 Total Effort Count'
        ],
                               axis=1)

        dataset['Efforts > 12 mph'] = (
            dataset['Velocity Band 5 Total Effort Count'] +
            dataset['Velocity Band 6 Total Effort Count'] +
            dataset['Velocity Band 7 Total Effort Count'] +
            dataset['Velocity Band 8 Total Effort Count'])
        dataset = dataset.drop(columns=[
            'Velocity Band 5 Total Effort Count',
            'Velocity Band 6 Total Effort Count'
        ],
                               axis=1)

        dataset['Efforts > 16 mph'] = (
            dataset['Velocity Band 7 Total Effort Count'] +
            dataset['Velocity Band 8 Total Effort Count'])
        dataset = dataset.drop(columns=[
            'Velocity Band 7 Total Effort Count',
            'Velocity Band 8 Total Effort Count',
            'Relative Max Horizontal Power'
        ],
                               axis=1)

        clean_list.append(dataset)
        print(file)

    # Concatenate all raw files into a single .csv file with clean data
    big_clean_list = pd.concat(clean_list)

    if not os.path.exists(VELO_LIST):
        big_clean_list.to_csv(VELO_LIST, index=False, mode='w')
        # Set the file permissions so that anyone can use it
        oschmod.set_mode(VELO_LIST, stat.S_IRWXO or stat.S_IRWXU
                         or stat.S_IRWXG)
Esempio n. 21
0
def _write_cert_file(certificate_contents, cert_file):
    with open(cert_file, 'w', encoding='utf-8') as f:
        f.write(f"[email protected] {certificate_contents}")
    oschmod.set_mode(cert_file, 0o644)
    return cert_file
Esempio n. 22
0
def ewma_calculation():
    def total_seconds(timedelta):
        try:
            seconds = timedelta.total_seconds()
        except AttributeError:  # no method total_seconds
            one_second = np.timedelta64(1000000000, 'ns')
            # use nanoseconds to get highest possible precision in output
            seconds = timedelta / one_second
        return seconds

    # Fetch table from postgres -> Games Table
    games_df = fetch_table(select_games_table)
    games_df = pd.DataFrame(games_df,
                            columns=[
                                "Date", "Game_Num_Year", "Enemy", "Enemy_Abr",
                                "Location", "Result", "Points_For",
                                "Points_Against", "Game_Type"
                            ])
    games_df["Date"] = pd.to_datetime(games_df["Date"], format='%Y-%m-%d')

    # Fetch table from postgres -> Roster Table
    roster_df = fetch_table(select_clean_roster_table)
    roster_df = pd.DataFrame(roster_df,
                             columns=["Player Name", "Position", "Group"])

    # Fetch table from postgres -> Accel Table
    accel_df = fetch_table(select_clean_accel_table)
    accel_df = pd.DataFrame(accel_df,
                            columns=[
                                "Player Name", "Period Name", "Period Number",
                                "Date", "Total Duration",
                                "Medium Decelerations", "Medium Accelerations",
                                "High Accelerations", "High Decelerations"
                            ])

    accel_df["Date"] = pd.to_datetime(accel_df["Date"], format='%Y-%m-%d')
    accel_df["Total Duration"] = pd.to_datetime(accel_df["Total Duration"],
                                                format='%H:%M:%S')

    accel_df = accel_df[accel_df['Period Name'] == 'Session']
    accel_df = accel_df[accel_df['Period Number'] == 0]
    accel_df = accel_df.drop(columns=['Period Name', 'Period Number'], axis=1)

    accel_df = accel_df.groupby(by=['Player Name', 'Date'],
                                as_index=False).sum()

    # Fetch table from postgres -> IMA Table
    ima_df = fetch_table(select_clean_ima_table)
    ima_df = pd.DataFrame(ima_df,
                          columns=[
                              "Player Name", "Period Name", "Period Number",
                              "Date", "Total Duration", "IMA CoD Right High",
                              "IMA CoD Left High", "IMA Accel High",
                              "IMA Decel High", "IMA Jump Count High Band",
                              "High IMAs", "IMA Explosive %", "Total Jumps",
                              "Hard CoD"
                          ])

    ima_df["Date"] = pd.to_datetime(ima_df["Date"], format='%Y-%m-%d')
    ima_df["Total Duration"] = pd.to_datetime(ima_df["Total Duration"],
                                              format='%H:%M:%S')
    ima_df["IMA Explosive %"] = pd.to_numeric(ima_df["IMA Explosive %"])

    ima_df = ima_df[ima_df['Period Name'] == 'Session']
    ima_df = ima_df[ima_df['Period Number'] == 0]
    ima_df = ima_df.drop(
        columns=['Period Name', 'Period Number', 'IMA Explosive %'], axis=1)

    ima_df = ima_df.groupby(by=['Player Name', 'Date'], as_index=False).sum()

    # Fetch table from postgres -> Linemen Contacts Table
    contacts_df = fetch_table(select_clean_line_contacts_table)
    contacts_df = pd.DataFrame(
        contacts_df,
        columns=[
            "Player Name", "Period Name", "Period Number", "Date",
            "Total Duration", "Total Contact Load", "Total Contacts",
            "Light Contact Load", "Medium Contact Load", "Hard Contact Load",
            "Light Contacts", "Medium Contacts", "Hard Contacts",
            "Hard Contact Load %", "Hard Contacts %", "Active Contact Load",
            "Active Contacts"
        ])

    contacts_df["Date"] = pd.to_datetime(contacts_df["Date"],
                                         format='%Y-%m-%d')
    contacts_df["Total Duration"] = pd.to_datetime(
        contacts_df["Total Duration"], format='%H:%M:%S')
    contacts_df["Total Contact Load"] = pd.to_numeric(
        contacts_df["Total Contact Load"])
    contacts_df["Light Contact Load"] = pd.to_numeric(
        contacts_df["Light Contact Load"])
    contacts_df["Medium Contact Load"] = pd.to_numeric(
        contacts_df["Medium Contact Load"])
    contacts_df["Hard Contact Load"] = pd.to_numeric(
        contacts_df["Hard Contact Load"])
    contacts_df["Active Contact Load"] = pd.to_numeric(
        contacts_df["Active Contact Load"])
    contacts_df["Hard Contact Load %"] = pd.to_numeric(
        contacts_df["Hard Contact Load %"])
    contacts_df["Hard Contacts %"] = pd.to_numeric(
        contacts_df["Hard Contact Load %"])

    contacts_df = contacts_df[contacts_df['Period Name'] == 'Session']
    contacts_df = contacts_df[contacts_df['Period Number'] == 0]
    contacts_df = contacts_df.drop(columns=[
        'Period Name', 'Period Number', 'Hard Contact Load %',
        'Hard Contacts %'
    ],
                                   axis=1)

    contacts_df = contacts_df.groupby(by=['Player Name', 'Date'],
                                      as_index=False).sum()

    # Fetch table from postgres -> Metabolic Power Table
    met_power_df = fetch_table(select_clean_met_power_table)
    met_power_df = pd.DataFrame(
        met_power_df,
        columns=[
            "Player Name", "Period Name", "Period Number", "Date",
            "Total Duration", "Meta Energy (Cal/kg)",
            "Total Metabolic Power Average Power",
            "Active Metabolic Power Average Power",
            "High Metabolic Power Average Power",
            "Total Metabolic Power Distance",
            "Active Metabolic Power Distance", "Explosive Distance",
            "High Metabolic Power Distance", "Total Metabolic Power Efforts",
            "Active Metabolic Power Efforts", "High Metabolic Power Efforts"
        ])

    met_power_df["Date"] = pd.to_datetime(met_power_df["Date"],
                                          format='%Y-%m-%d')
    met_power_df["Total Duration"] = pd.to_datetime(
        met_power_df["Total Duration"], format='%H:%M:%S')
    met_power_df["Meta Energy (Cal/kg)"] = pd.to_numeric(
        met_power_df["Meta Energy (Cal/kg)"])
    met_power_df["Total Metabolic Power Average Power"] = pd.to_numeric(
        met_power_df["Total Metabolic Power Average Power"])
    met_power_df["Active Metabolic Power Average Power"] = pd.to_numeric(
        met_power_df["Active Metabolic Power Average Power"])
    met_power_df["High Metabolic Power Average Power"] = pd.to_numeric(
        met_power_df["High Metabolic Power Average Power"])
    met_power_df["Total Metabolic Power Distance"] = pd.to_numeric(
        met_power_df["Total Metabolic Power Distance"])
    met_power_df["Active Metabolic Power Distance"] = pd.to_numeric(
        met_power_df["Active Metabolic Power Distance"])
    met_power_df["Explosive Distance"] = pd.to_numeric(
        met_power_df["Explosive Distance"])
    met_power_df["High Metabolic Power Distance"] = pd.to_numeric(
        met_power_df["High Metabolic Power Distance"])
    met_power_df["Total Metabolic Power Efforts"] = pd.to_numeric(
        met_power_df["Total Metabolic Power Efforts"])
    met_power_df["Active Metabolic Power Efforts"] = pd.to_numeric(
        met_power_df["Active Metabolic Power Efforts"])
    met_power_df["High Metabolic Power Efforts"] = pd.to_numeric(
        met_power_df["High Metabolic Power Efforts"])

    met_power_df = met_power_df[met_power_df['Period Name'] == 'Session']
    met_power_df = met_power_df[met_power_df['Period Number'] == 0]
    met_power_df = met_power_df.drop(
        columns=['Period Name', 'Period Number', 'Meta Energy (Cal/kg)'],
        axis=1)

    met_power_df = met_power_df.groupby(by=['Player Name', 'Date'],
                                        as_index=False).sum()

    # Fetch table from postgres -> Player Load Table
    pl_df = fetch_table(select_clean_pl_table)
    pl_df = pd.DataFrame(pl_df,
                         columns=[
                             "Player Name", "Period Name", "Period Number",
                             "Date", "Total Duration", "Total Player Load",
                             "Active Player Load", "Medium Player Load",
                             "High Player Load"
                         ])

    pl_df["Date"] = pd.to_datetime(pl_df["Date"], format='%Y-%m-%d')
    pl_df["Total Duration"] = pd.to_datetime(pl_df["Total Duration"],
                                             format='%H:%M:%S')
    pl_df["Total Player Load"] = pd.to_numeric(pl_df["Total Player Load"])
    pl_df["Active Player Load"] = pd.to_numeric(pl_df["Active Player Load"])
    pl_df["Medium Player Load"] = pd.to_numeric(pl_df["Medium Player Load"])
    pl_df["High Player Load"] = pd.to_numeric(pl_df["High Player Load"])

    pl_df = pl_df[pl_df['Period Name'] == 'Session']
    pl_df = pl_df[pl_df['Period Number'] == 0]
    pl_df = pl_df.drop(columns=['Period Name', 'Period Number'], axis=1)

    pl_df = pl_df.groupby(by=['Player Name', 'Date'], as_index=False).sum()

    # Fetch table from postgres -> Throws Table
    throws_df = fetch_table(select_clean_throws_table)
    throws_df = pd.DataFrame(throws_df,
                             columns=[
                                 "Player Name", "Period Name", "Period Number",
                                 "Date", "Total Duration", "Total Throw Count",
                                 "Total Throw Load", "Hard Throws",
                                 "Hard Throw Load", "Active Throws",
                                 "Active Throw Load", "Hard Throw %",
                                 "Hard Throw Load %"
                             ])

    throws_df["Date"] = pd.to_datetime(throws_df["Date"], format='%Y-%m-%d')
    throws_df["Total Duration"] = pd.to_datetime(throws_df["Total Duration"],
                                                 format='%H:%M:%S')
    throws_df["Total Throw Count"] = pd.to_numeric(
        throws_df["Total Throw Count"])
    throws_df["Total Throw Load"] = pd.to_numeric(
        throws_df["Total Throw Load"])
    throws_df["Hard Throws"] = pd.to_numeric(throws_df["Hard Throws"])
    throws_df["Hard Throw Load"] = pd.to_numeric(throws_df["Hard Throw Load"])
    throws_df["Active Throws"] = pd.to_numeric(throws_df["Active Throws"])
    throws_df["Active Throw Load"] = pd.to_numeric(
        throws_df["Active Throw Load"])
    throws_df["Hard Throw %"] = pd.to_numeric(throws_df["Hard Throw %"])
    throws_df["Hard Throw Load %"] = pd.to_numeric(
        throws_df["Hard Throw Load %"])

    throws_df = throws_df[throws_df['Period Name'] == 'Session']
    throws_df = throws_df[throws_df['Period Number'] == 0]
    throws_df = throws_df.drop(columns=[
        'Period Name', 'Period Number', 'Hard Throw %', 'Hard Throw Load %'
    ],
                               axis=1)

    throws_df = throws_df.groupby(by=['Player Name', 'Date'],
                                  as_index=False).sum()

    # Fetch table from postgres -> Velocity Table
    velo_df = fetch_table(select_clean_velo_table)
    velo_df = pd.DataFrame(velo_df,
                           columns=[
                               "Player Name", "Period Name", "Period Number",
                               "Date", "Total Duration", "Total Distance",
                               "Active Distance", "Maximum Velocity",
                               "Distance > 5 mph", "Distance > 12 mph",
                               "Distance > 16 mph", "Efforts > 5 mph",
                               "Efforts > 12 mph", "Efforts > 16 mph"
                           ])

    velo_df["Date"] = pd.to_datetime(velo_df["Date"], format='%Y-%m-%d')
    velo_df["Total Duration"] = pd.to_datetime(velo_df["Total Duration"],
                                               format='%H:%M:%S')

    velo_df["Total Distance"] = pd.to_numeric(velo_df["Total Distance"])
    velo_df["Active Distance"] = pd.to_numeric(velo_df["Active Distance"])
    velo_df["Maximum Velocity"] = pd.to_numeric(velo_df["Maximum Velocity"])

    velo_df["Distance > 5 mph"] = pd.to_numeric(velo_df["Distance > 5 mph"])
    velo_df["Distance > 12 mph"] = pd.to_numeric(velo_df["Distance > 12 mph"])
    velo_df["Distance > 16 mph"] = pd.to_numeric(velo_df["Distance > 16 mph"])

    velo_df["Efforts > 5 mph"] = pd.to_numeric(velo_df["Efforts > 5 mph"])
    velo_df["Efforts > 12 mph"] = pd.to_numeric(velo_df["Efforts > 12 mph"])
    velo_df["Efforts > 16 mph"] = pd.to_numeric(velo_df["Efforts > 16 mph"])

    velo_df = velo_df[velo_df['Period Name'] == 'Session']
    velo_df = velo_df[velo_df['Period Number'] == 0]
    velo_df = velo_df.drop(columns=[
        'Period Name', 'Period Number', 'Maximum Velocity', 'Distance > 5 mph'
    ],
                           axis=1)

    velo_df = velo_df.groupby(by=['Player Name', 'Date'], as_index=False).sum()

    # Fetch table from postgres -> Schedule Table
    schedule_df = fetch_table(select_schedule_table)
    schedule_df = pd.DataFrame(schedule_df,
                               columns=[
                                   "Date", "Day", "Enemy", "Day_Type",
                                   "Day_ID", "Attire", "MD_Minus"
                               ])

    schedule_df["Date"] = pd.to_datetime(schedule_df["Date"],
                                         format='%Y-%m-%d')

    # Merge all dataframes into one massive dataframe!!!
    merged_df = schedule_df.merge(games_df, how='outer', on=['Date', 'Enemy'])
    merged_df = merged_df.merge(velo_df, how='left', on=['Date'])
    merged_df = merged_df.merge(roster_df, how='left', on=['Player Name'])
    merged_df = merged_df.merge(throws_df,
                                how='left',
                                on=['Date', 'Player Name'])
    merged_df = merged_df.merge(pl_df, how='left', on=['Date', 'Player Name'])
    merged_df = merged_df.merge(met_power_df,
                                how='left',
                                on=['Date', 'Player Name'])
    merged_df = merged_df.merge(contacts_df,
                                how='left',
                                on=['Date', 'Player Name'])
    merged_df = merged_df.merge(ima_df, how='left', on=['Date', 'Player Name'])
    merged_df = merged_df.merge(accel_df,
                                how='left',
                                on=['Date', 'Player Name'])

    # Trying out the rolling average method first
    merged_df = merged_df.set_index(merged_df['Date'])
    merged_df = merged_df.sort_index()
    merged_df = merged_df.drop(
        columns=['Date', 'Points_For', 'Points_Against', 'Game_Num_Year'],
        axis=1)
    rolling_7D_df = merged_df.groupby(by=['Player Name']).rolling('7D').sum()
    rolling_28D_df = merged_df.groupby(
        by=['Player Name']).rolling('28D').sum() / 4

    rolling_df = rolling_7D_df.merge(rolling_28D_df,
                                     how='left',
                                     on=['Player Name', 'Date'],
                                     suffixes=("_ATL", "_CTL"))

    rolling_df['TD ACWR'] = rolling_df['Total Distance_ATL'] / rolling_df[
        'Total Distance_CTL']
    rolling_df['AD ACWR'] = rolling_df['Active Distance_ATL'] / rolling_df[
        'Active Distance_CTL']
    rolling_df['D>12 ACWR'] = rolling_df['Distance > 12 mph_ATL'] / rolling_df[
        'Distance > 12 mph_CTL']
    rolling_df['D>16 ACWR'] = rolling_df['Distance > 16 mph_ATL'] / rolling_df[
        'Distance > 16 mph_CTL']
    rolling_df['E>12 ACWR'] = rolling_df['Efforts > 12 mph_ATL'] / rolling_df[
        'Efforts > 12 mph_CTL']
    rolling_df['E>16 ACWR'] = rolling_df['Efforts > 16 mph_ATL'] / rolling_df[
        'Efforts > 16 mph_CTL']
    rolling_df['TPL ACWR'] = rolling_df['Total Player Load_ATL'] / rolling_df[
        'Total Player Load_CTL']
    rolling_df['APL ACWR'] = rolling_df['Active Player Load_ATL'] / rolling_df[
        'Active Player Load_CTL']
    rolling_df['Hard Accel ACWR'] = rolling_df[
        'High Accelerations_ATL'] / rolling_df['High Accelerations_CTL']
    rolling_df['Hard Decel ACWR'] = rolling_df[
        'High Decelerations_ATL'] / rolling_df['High Decelerations_CTL']
    rolling_df['IMA Accel High ACWR'] = rolling_df[
        'IMA Accel High_ATL'] / rolling_df['IMA Accel High_CTL']
    rolling_df['IMA Decel High ACWR'] = rolling_df[
        'IMA Decel High_ATL'] / rolling_df['IMA Decel High_CTL']
    rolling_df['Expl Dist ACWR'] = rolling_df[
        'Explosive Distance_ATL'] / rolling_df['Explosive Distance_CTL']
    rolling_df['High IMA ACWR'] = rolling_df['High IMAs_ATL'] / rolling_df[
        'High IMAs_CTL']
    rolling_df['Total Contact Load ACWR'] = rolling_df[
        'Total Contact Load_ATL'] / rolling_df['Total Contact Load_CTL']
    rolling_df['Active Contact Load ACWR'] = rolling_df[
        'Active Contact Load_ATL'] / rolling_df['Active Contact Load_CTL']
    rolling_df['Total Contacts ACWR'] = rolling_df[
        'Total Contacts_ATL'] / rolling_df['Total Contacts_CTL']
    rolling_df['Active Contacts ACWR'] = rolling_df[
        'Active Contacts_ATL'] / rolling_df['Active Contacts_CTL']

    # Trying out the exponentially weighted moving average
    ewma_7D_df = merged_df.groupby(by=['Player Name']).ewm(
        span=7, adjust=False).mean()
    ewma_28D_df = merged_df.groupby(by=['Player Name']).ewm(
        span=35, adjust=False).mean()
    ewma_df = ewma_7D_df.merge(ewma_28D_df,
                               how='left',
                               on=['Player Name', 'Date'],
                               suffixes=("_EW_ATL", "_EW_CTL"))

    ewma_df['TD EWMA'] = ewma_df['Total Distance_EW_ATL'] / ewma_df[
        'Total Distance_EW_CTL']
    ewma_df['AD EWMA'] = ewma_df['Active Distance_EW_ATL'] / ewma_df[
        'Active Distance_EW_CTL']
    ewma_df['D>12 EWMA'] = ewma_df['Distance > 12 mph_EW_ATL'] / ewma_df[
        'Distance > 12 mph_EW_CTL']
    ewma_df['D>16 EWMA'] = ewma_df['Distance > 16 mph_EW_ATL'] / ewma_df[
        'Distance > 16 mph_EW_CTL']
    ewma_df['E>12 EWMA'] = ewma_df['Efforts > 12 mph_EW_ATL'] / ewma_df[
        'Efforts > 12 mph_EW_CTL']
    ewma_df['E>16 EWMA'] = ewma_df['Efforts > 16 mph_EW_ATL'] / ewma_df[
        'Efforts > 16 mph_EW_CTL']
    ewma_df['TPL EWMA'] = ewma_df['Total Player Load_EW_ATL'] / ewma_df[
        'Total Player Load_EW_CTL']
    ewma_df['APL EWMA'] = ewma_df['Active Player Load_EW_ATL'] / ewma_df[
        'Active Player Load_EW_CTL']
    ewma_df['Hard Accel EWMA'] = ewma_df[
        'High Accelerations_EW_ATL'] / ewma_df['High Accelerations_EW_CTL']
    ewma_df['Hard Decel EWMA'] = ewma_df[
        'High Decelerations_EW_ATL'] / ewma_df['High Decelerations_EW_CTL']
    ewma_df['IMA Accel High EWMA'] = ewma_df[
        'IMA Accel High_EW_ATL'] / ewma_df['IMA Accel High_EW_CTL']
    ewma_df['IMA Decel High EWMA'] = ewma_df[
        'IMA Decel High_EW_ATL'] / ewma_df['IMA Decel High_EW_CTL']
    ewma_df['Expl Dist EWMA'] = ewma_df['Explosive Distance_EW_ATL'] / ewma_df[
        'Explosive Distance_EW_CTL']
    ewma_df['High IMA EWMA'] = ewma_df['High IMAs_EW_ATL'] / ewma_df[
        'High IMAs_EW_CTL']
    ewma_df['Total Contact Load EWMA'] = ewma_df[
        'Total Contact Load_EW_ATL'] / ewma_df['Total Contact Load_EW_CTL']
    ewma_df['Active Contact Load EWMA'] = ewma_df[
        'Active Contact Load_EW_ATL'] / ewma_df['Active Contact Load_EW_CTL']
    ewma_df['Total Contacts EWMA'] = ewma_df[
        'Total Contacts_EW_ATL'] / ewma_df['Total Contacts_EW_CTL']
    ewma_df['Active Contacts EWMA'] = ewma_df[
        'Active Contacts_EW_ATL'] / ewma_df['Active Contacts_EW_CTL']

    pd.set_option('display.max_columns', None)
    pd.set_option('display.max_rows', None)

    # Merge ACWR and EWMA tables together
    remerged_df = rolling_df.merge(ewma_df,
                                   how='left',
                                   on=['Player Name', 'Date'])

    # Drop all the EWMA columns for ATL and CTL
    remerged_df = remerged_df.drop(columns=[
        'Total Distance_EW_ATL', 'Active Distance_EW_ATL',
        'Distance > 12 mph_EW_ATL', 'Distance > 16 mph_EW_ATL',
        'Efforts > 5 mph_EW_ATL', 'Efforts > 12 mph_EW_ATL',
        'Efforts > 16 mph_EW_ATL', 'Total Throw Count_EW_ATL',
        'Total Throw Load_EW_ATL', 'Hard Throws_EW_ATL',
        'Hard Throw Load_EW_ATL', 'Active Throws_EW_ATL',
        'Active Throw Load_EW_ATL', 'Total Player Load_EW_ATL',
        'Active Player Load_EW_ATL', 'Medium Player Load_EW_ATL',
        'High Player Load_EW_ATL',
        'Total Metabolic Power Average Power_EW_ATL',
        'Active Metabolic Power Average Power_EW_ATL',
        'High Metabolic Power Average Power_EW_ATL',
        'Total Metabolic Power Distance_EW_ATL',
        'Active Metabolic Power Distance_EW_ATL', 'Explosive Distance_EW_ATL',
        'High Metabolic Power Distance_EW_ATL',
        'Total Metabolic Power Efforts_EW_ATL',
        'Active Metabolic Power Efforts_EW_ATL',
        'High Metabolic Power Efforts_EW_ATL', 'Total Contact Load_EW_ATL',
        'Total Contacts_EW_ATL', 'Light Contact Load_EW_ATL',
        'Medium Contact Load_EW_ATL', 'Hard Contact Load_EW_ATL',
        'Light Contacts_EW_ATL', 'Medium Contacts_EW_ATL',
        'Hard Contacts_EW_ATL', 'Active Contact Load_EW_ATL',
        'Active Contacts_EW_ATL', 'IMA CoD Right High_EW_ATL',
        'IMA CoD Left High_EW_ATL', 'IMA Accel High_EW_ATL',
        'IMA Decel High_EW_ATL', 'IMA Jump Count High Band_EW_ATL',
        'High IMAs_EW_ATL', 'Total Jumps_EW_ATL', 'Hard CoD_EW_ATL',
        'Medium Decelerations_EW_ATL', 'Medium Accelerations_EW_ATL',
        'High Accelerations_EW_ATL', 'High Decelerations_EW_ATL',
        'Total Distance_EW_CTL', 'Active Distance_EW_CTL',
        'Distance > 12 mph_EW_CTL', 'Distance > 16 mph_EW_CTL',
        'Efforts > 5 mph_EW_CTL', 'Efforts > 12 mph_EW_CTL',
        'Efforts > 16 mph_EW_CTL', 'Total Throw Count_EW_CTL',
        'Total Throw Load_EW_CTL', 'Hard Throws_EW_CTL',
        'Hard Throw Load_EW_CTL', 'Active Throws_EW_CTL',
        'Active Throw Load_EW_CTL', 'Total Player Load_EW_CTL',
        'Active Player Load_EW_CTL', 'Medium Player Load_EW_CTL',
        'High Player Load_EW_CTL',
        'Total Metabolic Power Average Power_EW_CTL',
        'Active Metabolic Power Average Power_EW_CTL',
        'High Metabolic Power Average Power_EW_CTL',
        'Total Metabolic Power Distance_EW_CTL',
        'Active Metabolic Power Distance_EW_CTL', 'Explosive Distance_EW_CTL',
        'High Metabolic Power Distance_EW_CTL',
        'Total Metabolic Power Efforts_EW_CTL',
        'Active Metabolic Power Efforts_EW_CTL',
        'High Metabolic Power Efforts_EW_CTL', 'Total Contact Load_EW_CTL',
        'Total Contacts_EW_CTL', 'Light Contact Load_EW_CTL',
        'Medium Contact Load_EW_CTL', 'Hard Contact Load_EW_CTL',
        'Light Contacts_EW_CTL', 'Medium Contacts_EW_CTL',
        'Hard Contacts_EW_CTL', 'Active Contact Load_EW_CTL',
        'Active Contacts_EW_CTL', 'IMA CoD Right High_EW_CTL',
        'IMA CoD Left High_EW_CTL', 'IMA Accel High_EW_CTL',
        'IMA Decel High_EW_CTL', 'IMA Jump Count High Band_EW_CTL',
        'High IMAs_EW_CTL', 'Total Jumps_EW_CTL', 'Hard CoD_EW_CTL',
        'Medium Decelerations_EW_CTL', 'Medium Accelerations_EW_CTL',
        'High Accelerations_EW_CTL', 'High Decelerations_EW_CTL'
    ],
                                   axis=1)

    remerged_df = remerged_df.replace(to_replace=[np.inf, -np.inf], value=1)

    # Get indexes for rolling averages as columns
    remerged_df = remerged_df.reset_index()

    if not os.path.exists(EWMA_LIST):
        remerged_df.to_csv(EWMA_LIST, index=False, mode='w')
        # Set the file permissions so that anyone can use it
        oschmod.set_mode(EWMA_LIST, stat.S_IRWXO or stat.S_IRWXU
                         or stat.S_IRWXG)
Esempio n. 23
0
def raw_met_power_cleaner(met_power_directory):
    clean_list = []
    for file in os.listdir(met_power_directory):

        # Read in the test file
        dataset = pd.read_csv(file, header=9)
        dataset = pd.DataFrame(data=dataset)

        # Change data types to correct ones
        dataset['Date'] = pd.to_datetime(dataset['Date'], format='%m/%d/%Y')
        dataset['Total Duration'] = pd.to_datetime(dataset['Total Duration'],
                                                   format='%H:%M:%S')
        dataset['Metabolic Power Band 1 Total Duration'] = pd.to_datetime(
            dataset['Metabolic Power Band 1 Total Duration'],
            format='%H:%M:%S')
        dataset['Metabolic Power Band 2 Total Duration'] = pd.to_datetime(
            dataset['Metabolic Power Band 2 Total Duration'],
            format='%H:%M:%S')
        dataset['Metabolic Power Band 3 Total Duration'] = pd.to_datetime(
            dataset['Metabolic Power Band 3 Total Duration'],
            format='%H:%M:%S')
        dataset['Metabolic Power Band 4 Total Duration'] = pd.to_datetime(
            dataset['Metabolic Power Band 4 Total Duration'],
            format='%H:%M:%S')
        dataset['Metabolic Power Band 5 Total Duration'] = pd.to_datetime(
            dataset['Metabolic Power Band 5 Total Duration'],
            format='%H:%M:%S')
        dataset['Metabolic Power Band 6 Total Duration'] = pd.to_datetime(
            dataset['Metabolic Power Band 6 Total Duration'],
            format='%H:%M:%S')

        # Remove last column
        dataset = dataset.iloc[:, :-1]

        # Change names of metabolic power average power bands
        dataset["Total Metabolic Power Average Power"] = (
            dataset["Metabolic Power Band 1 Average Power"] +
            dataset["Metabolic Power Band 2 Average Power"] +
            dataset["Metabolic Power Band 3 Average Power"] +
            dataset["Metabolic Power Band 4 Average Power"] +
            dataset["Metabolic Power Band 5 Average Power"] +
            dataset["Metabolic Power Band 6 Average Power"])
        dataset["Active Metabolic Power Average Power"] = (
            dataset["Metabolic Power Band 2 Average Power"] +
            dataset["Metabolic Power Band 3 Average Power"] +
            dataset["Metabolic Power Band 4 Average Power"] +
            dataset["Metabolic Power Band 5 Average Power"] +
            dataset["Metabolic Power Band 6 Average Power"])
        dataset["High Metabolic Power Average Power"] = (
            dataset['Metabolic Power Band 5 Average Power'] +
            dataset['Metabolic Power Band 6 Average Power'])
        dataset = dataset.drop(columns=[
            'Metabolic Power Band 1 Average Power',
            'Metabolic Power Band 2 Average Power',
            'Metabolic Power Band 3 Average Power',
            'Metabolic Power Band 4 Average Power',
            'Metabolic Power Band 5 Average Power',
            'Metabolic Power Band 6 Average Power'
        ],
                               axis=1)

        # Change names of metabolic power total distance bands
        dataset["Total Metabolic Power Distance"] = (
            dataset["Metabolic Power Band 1 Total Distance"] +
            dataset["Metabolic Power Band 2 Total Distance"] +
            dataset["Metabolic Power Band 3 Total Distance"] +
            dataset["Metabolic Power Band 4 Total Distance"] +
            dataset["Metabolic Power Band 5 Total Distance"] +
            dataset["Metabolic Power Band 6 Total Distance"])
        dataset["Active Metabolic Power Distance"] = (
            dataset["Metabolic Power Band 2 Total Distance"] +
            dataset["Metabolic Power Band 3 Total Distance"] +
            dataset["Metabolic Power Band 4 Total Distance"] +
            dataset["Metabolic Power Band 5 Total Distance"] +
            dataset["Metabolic Power Band 6 Total Distance"])
        dataset["Explosive Distance"] = (
            dataset["Metabolic Power Band 3 Total Distance"] +
            dataset["Metabolic Power Band 4 Total Distance"] +
            dataset["Metabolic Power Band 5 Total Distance"] +
            dataset["Metabolic Power Band 6 Total Distance"])
        dataset["High Metabolic Power Distance"] = (
            dataset['Metabolic Power Band 5 Total Distance'] +
            dataset['Metabolic Power Band 6 Total Distance'])
        dataset = dataset.drop(columns=[
            'Metabolic Power Band 1 Total Distance',
            'Metabolic Power Band 2 Total Distance',
            'Metabolic Power Band 3 Total Distance',
            'Metabolic Power Band 4 Total Distance',
            'Metabolic Power Band 5 Total Distance',
            'Metabolic Power Band 6 Total Distance'
        ],
                               axis=1)

        # Get rid of of metabolic power total duration bands
        dataset = dataset.drop(columns=[
            'Metabolic Power Band 1 Total Duration',
            'Metabolic Power Band 2 Total Duration',
            'Metabolic Power Band 3 Total Duration',
            'Metabolic Power Band 4 Total Duration',
            'Metabolic Power Band 5 Total Duration',
            'Metabolic Power Band 6 Total Duration'
        ],
                               axis=1)

        # Change names of metabolic power total effort bands
        dataset["Total Metabolic Power Efforts"] = (
            dataset["Metabolic Power Band 2 Total # Efforts"] +
            dataset["Metabolic Power Band 3 Total # Efforts"] +
            dataset["Metabolic Power Band 4 Total # Efforts"] +
            dataset["Metabolic Power Band 5 Total # Efforts"] +
            dataset["Metabolic Power Band 6 Total # Efforts"])
        dataset["Active Metabolic Power Efforts"] = (
            dataset["Metabolic Power Band 3 Total # Efforts"] +
            dataset["Metabolic Power Band 4 Total # Efforts"] +
            dataset["Metabolic Power Band 5 Total # Efforts"] +
            dataset["Metabolic Power Band 6 Total # Efforts"])
        dataset["High Metabolic Power Efforts"] = (
            dataset['Metabolic Power Band 5 Total # Efforts'] +
            dataset['Metabolic Power Band 6 Total # Efforts'])
        dataset = dataset.drop(columns=[
            'Metabolic Power Band 2 Total # Efforts',
            'Metabolic Power Band 3 Total # Efforts',
            'Metabolic Power Band 4 Total # Efforts',
            'Metabolic Power Band 5 Total # Efforts',
            'Metabolic Power Band 6 Total # Efforts'
        ],
                               axis=1)

        clean_list.append(dataset)
        print(file)

    # Concatenate all raw files into a single .csv file with clean data
    big_clean_list = pd.concat(clean_list)

    if not os.path.exists(MET_POWER_LIST):
        big_clean_list.to_csv(MET_POWER_LIST, index=False, mode='w')
        # Set the file permissions so that anyone can use it
        oschmod.set_mode(MET_POWER_LIST, stat.S_IRWXO or stat.S_IRWXU
                         or stat.S_IRWXG)
Esempio n. 24
0
def all_time_speed():
    # Fetch table from postgres -> Roster Table
    roster_df = fetch_table(select_clean_roster_table)
    roster_df = pd.DataFrame(roster_df,
                             columns=["Player Name", "Position", "Group"])

    # Fetch table from postgres -> Velocity Table
    velo_df = fetch_table(select_clean_velo_table)
    velo_df = pd.DataFrame(velo_df,
                           columns=[
                               "Player Name", "Period Name", "Period Number",
                               "Date", "Total Duration", "Total Distance",
                               "Active Distance", "Max Velocity (mph)",
                               "Distance > 5 mph", "Distance > 12 mph",
                               "Distance > 16 mph", "Efforts > 5 mph",
                               "Efforts > 12 mph", "Efforts > 16 mph"
                           ])
    velo_df["Date"] = pd.to_datetime(velo_df["Date"], format='%Y-%m-%d')
    velo_df["Total Duration"] = pd.to_datetime(velo_df["Total Duration"],
                                               format='%H:%M:%S')

    velo_df["Total Distance"] = pd.to_numeric(velo_df["Total Distance"])
    velo_df["Active Distance"] = pd.to_numeric(velo_df["Active Distance"])
    velo_df["Max Velocity (mph)"] = pd.to_numeric(
        velo_df["Max Velocity (mph)"])

    velo_df["Distance > 5 mph"] = pd.to_numeric(velo_df["Distance > 5 mph"])
    velo_df["Distance > 12 mph"] = pd.to_numeric(velo_df["Distance > 12 mph"])
    velo_df["Distance > 16 mph"] = pd.to_numeric(velo_df["Distance > 16 mph"])

    velo_df["Efforts > 5 mph"] = pd.to_numeric(velo_df["Efforts > 5 mph"])
    velo_df["Efforts > 12 mph"] = pd.to_numeric(velo_df["Efforts > 12 mph"])
    velo_df["Efforts > 16 mph"] = pd.to_numeric(velo_df["Efforts > 16 mph"])

    # Merge this stuff together right meow
    merged_df = velo_df.merge(roster_df, how='left', on=['Player Name'])

    # Drop everything but the max speed, player name, period name, period number, and date
    merged_df = merged_df.drop(columns=[
        "Total Duration", "Total Distance", "Active Distance",
        "Distance > 5 mph", "Distance > 12 mph", "Distance > 16 mph",
        "Efforts > 5 mph", "Efforts > 12 mph", "Efforts > 16 mph", "Position",
        "Group"
    ],
                               axis=1)

    # Get max speed for each date
    merged_df = merged_df[merged_df['Period Number'] == 0]
    merged_df = merged_df.groupby(by=['Player Name', 'Date'],
                                  as_index=False).max()

    merged_df = merged_df.drop(columns=["Period Name", "Period Number"],
                               axis=1)

    max_speed_df = merged_df.groupby(by=['Player Name'], as_index=False).max()
    max_speed_df = max_speed_df.drop(columns=["Date"], axis=1)

    # Merge both datasets
    remerged_df = merged_df.merge(max_speed_df, how='left', on=['Player Name'])

    # Rename columns for max velocity (mph)
    remerged_df = remerged_df.rename(
        columns={
            "Max Velocity (mph)_x": "Daily Max Velocity (mph)",
            "Max Velocity (mph)_y": "All-Time Max Velocity (mph)"
        })

    remerged_df['% All-Time Max Velocity'] = (
        remerged_df["Daily Max Velocity (mph)"] /
        remerged_df["All-Time Max Velocity (mph)"]) * 100

    if not os.path.exists(MAX_SPEED_LIST):
        remerged_df.to_csv(MAX_SPEED_LIST, index=False, mode='w')
        # Set the file permissions so that anyone can use it
        oschmod.set_mode(MAX_SPEED_LIST, stat.S_IRWXO or stat.S_IRWXU
                         or stat.S_IRWXG)
Esempio n. 25
0
def raw_ima_cleaner(ima_directory):
    clean_list = []
    for file in os.listdir(ima_directory):

        # Read in the test file
        dataset = pd.read_csv(file, header=9)
        dataset = pd.DataFrame(data=dataset)

        # Change data types to correct ones
        dataset['Date'] = pd.to_datetime(dataset['Date'], format='%m/%d/%Y')
        dataset['Total Duration'] = pd.to_datetime(dataset['Total Duration'],
                                                   format='%H:%M:%S')

        # Remove last column
        dataset = dataset.iloc[:, :-1]

        # Change names of IMA Bands
        dataset["Low IMAs"] = dataset["IMA CoD Right Low"] + dataset[
            "IMA CoD Left Low"] + dataset["IMA Accel Low"] + dataset[
                "IMA Decel Low"]
        dataset["Medium IMAs"] = dataset["IMA CoD Right Medium"] + dataset[
            "IMA CoD Left Medium"] + dataset["IMA Accel Medium"] + dataset[
                "IMA Decel Medium"]
        dataset["High IMAs"] = dataset["IMA CoD Right High"] + dataset[
            "IMA CoD Left High"] + dataset["IMA Accel High"] + dataset[
                "IMA Decel High"]

        # Create IMA explosive percentage
        dataset['IMA Explosive %'] = (
            (dataset["High IMAs"]) /
            (dataset["High IMAs"] + dataset["Medium IMAs"])) * 100

        # Create IMA high jump percentage
        dataset['Total Jumps'] = dataset['IMA Jump Count Low Band'] + dataset[
            'IMA Jump Count Med Band'] + dataset['IMA Jump Count High Band']

        # Create Hard CoD
        dataset['Hard CoD'] = dataset['IMA CoD Right High'] + dataset[
            'IMA CoD Left High']

        # Drop a bunch of columns
        dataset = dataset.drop(columns=[
            'IMA CoD Right Low', 'IMA CoD Right Medium', 'IMA CoD Left Low',
            'IMA CoD Left Medium', 'IMA Accel Low', 'IMA Accel Medium'
        ],
                               axis=1)
        dataset = dataset.drop(columns=[
            'IMA Decel Low', 'IMA Decel Medium', 'IMA Jump Count Low Band',
            'IMA Jump Count Med Band', 'Low IMAs', 'Medium IMAs'
        ],
                               axis=1)

        clean_list.append(dataset)
        print(file)

    # Concatenate all raw files into a single .csv file with clean data
    big_clean_list = pd.concat(clean_list)

    if not os.path.exists(IMA_LIST):
        big_clean_list.to_csv(IMA_LIST, index=False, mode='w')
        # Set the file permissions so that anyone can use it
        oschmod.set_mode(IMA_LIST, stat.S_IRWXO or stat.S_IRWXU
                         or stat.S_IRWXG)
Esempio n. 26
0
def per_minute():
    def total_seconds(timedelta):
        """Convert timedeltas to seconds
        In Python, time differences can take many formats. This function can take
        timedeltas in any format and return the corresponding number of seconds, as
        a float.
        Beware! Representing timedeltas as floats is not as precise as representing
        them as a timedelta object in datetime, numpy, or pandas.
        Parameters
        ----------
        timedelta : various
            Time delta from python's datetime library or from numpy or pandas. If
            it is from numpy, it can be an ndarray with dtype datetime64. If it is
            from pandas, it can also be a Series of datetimes. However, this
            function cannot operate on entire pandas DataFrames. To convert a
            DataFrame, do df.apply(to_seconds)
        Returns
        -------
        seconds : various
            Returns the total seconds in the input timedelta object(s) as float.
            If the input is a numpy ndarray or pandas Series, the output is the
            same, but with a float datatype.
        """
        try:
            seconds = timedelta.total_seconds()
        except AttributeError:  # no method total_seconds
            one_second = np.timedelta64(1000000000, 'ns')
            # use nanoseconds to get highest possible precision in output
            seconds = timedelta / one_second
        return seconds

    # Fetch table from postgres -> Games Table
    games_df = fetch_table(select_games_table)
    games_df = pd.DataFrame(games_df,
                            columns=[
                                "Date", "Game_Num_Year", "Enemy", "Enemy_Abr",
                                "Location", "Result", "Points_For",
                                "Points_Against", "Game_Type"
                            ])
    games_df["Date"] = pd.to_datetime(games_df["Date"], format='%Y-%m-%d')

    # Fetch table from postgres -> Roster Table
    roster_df = fetch_table(select_clean_roster_table)
    roster_df = pd.DataFrame(roster_df,
                             columns=["Player Name", "Position", "Group"])

    # Fetch table from postgres -> Accel Table
    accel_df = fetch_table(select_clean_accel_table)
    accel_df = pd.DataFrame(accel_df,
                            columns=[
                                "Player Name", "Period Name", "Period Number",
                                "Date", "Total Duration",
                                "Medium Decelerations", "Medium Accelerations",
                                "High Accelerations", "High Decelerations"
                            ])

    accel_df["Date"] = pd.to_datetime(accel_df["Date"], format='%Y-%m-%d')
    accel_df["Total Duration"] = pd.to_datetime(accel_df["Total Duration"],
                                                format='%H:%M:%S')

    # Fetch table from postgres -> IMA Table
    ima_df = fetch_table(select_clean_ima_table)
    ima_df = pd.DataFrame(ima_df,
                          columns=[
                              "Player Name", "Period Name", "Period Number",
                              "Date", "Total Duration", "IMA CoD Right High",
                              "IMA CoD Left High", "IMA Accel High",
                              "IMA Decel High", "IMA Jump Count High Band",
                              "High IMAs", "IMA Explosive %", "Total Jumps",
                              "Hard CoD"
                          ])

    ima_df["Date"] = pd.to_datetime(ima_df["Date"], format='%Y-%m-%d')
    ima_df["Total Duration"] = pd.to_datetime(ima_df["Total Duration"],
                                              format='%H:%M:%S')
    ima_df["IMA Explosive %"] = pd.to_numeric(ima_df["IMA Explosive %"])

    # Fetch table from postgres -> Linemen Contacts Table
    contacts_df = fetch_table(select_clean_line_contacts_table)
    contacts_df = pd.DataFrame(
        contacts_df,
        columns=[
            "Player Name", "Period Name", "Period Number", "Date",
            "Total Duration", "Total Contact Load", "Total Contacts",
            "Light Contact Load", "Medium Contact Load", "Hard Contact Load",
            "Light Contacts", "Medium Contacts", "Hard Contacts",
            "Hard Contact Load %", "Hard Contacts %", "Active Contact Load",
            "Active Contacts"
        ])

    contacts_df["Date"] = pd.to_datetime(contacts_df["Date"],
                                         format='%Y-%m-%d')
    contacts_df["Total Duration"] = pd.to_datetime(
        contacts_df["Total Duration"], format='%H:%M:%S')
    contacts_df["Total Contact Load"] = pd.to_numeric(
        contacts_df["Total Contact Load"])
    contacts_df["Light Contact Load"] = pd.to_numeric(
        contacts_df["Light Contact Load"])
    contacts_df["Medium Contact Load"] = pd.to_numeric(
        contacts_df["Medium Contact Load"])
    contacts_df["Hard Contact Load"] = pd.to_numeric(
        contacts_df["Hard Contact Load"])
    contacts_df["Active Contact Load"] = pd.to_numeric(
        contacts_df["Active Contact Load"])
    contacts_df["Hard Contact Load %"] = pd.to_numeric(
        contacts_df["Hard Contact Load %"])
    contacts_df["Hard Contacts %"] = pd.to_numeric(
        contacts_df["Hard Contact Load %"])

    # Fetch table from postgres -> Metabolic Power Table
    met_power_df = fetch_table(select_clean_met_power_table)
    met_power_df = pd.DataFrame(
        met_power_df,
        columns=[
            "Player Name", "Period Name", "Period Number", "Date",
            "Total Duration", "Meta Energy (Cal/kg)",
            "Total Metabolic Power Average Power",
            "Active Metabolic Power Average Power",
            "High Metabolic Power Average Power",
            "Total Metabolic Power Distance",
            "Active Metabolic Power Distance", "Explosive Distance",
            "High Metabolic Power Distance", "Total Metabolic Power Efforts",
            "Active Metabolic Power Efforts", "High Metabolic Power Efforts"
        ])

    met_power_df["Date"] = pd.to_datetime(met_power_df["Date"],
                                          format='%Y-%m-%d')
    met_power_df["Total Duration"] = pd.to_datetime(
        met_power_df["Total Duration"], format='%H:%M:%S')
    met_power_df["Meta Energy (Cal/kg)"] = pd.to_numeric(
        met_power_df["Meta Energy (Cal/kg)"])
    met_power_df["Total Metabolic Power Average Power"] = pd.to_numeric(
        met_power_df["Total Metabolic Power Average Power"])
    met_power_df["Active Metabolic Power Average Power"] = pd.to_numeric(
        met_power_df["Active Metabolic Power Average Power"])
    met_power_df["High Metabolic Power Average Power"] = pd.to_numeric(
        met_power_df["High Metabolic Power Average Power"])
    met_power_df["Total Metabolic Power Distance"] = pd.to_numeric(
        met_power_df["Total Metabolic Power Distance"])
    met_power_df["Active Metabolic Power Distance"] = pd.to_numeric(
        met_power_df["Active Metabolic Power Distance"])
    met_power_df["Explosive Distance"] = pd.to_numeric(
        met_power_df["Explosive Distance"])
    met_power_df["High Metabolic Power Distance"] = pd.to_numeric(
        met_power_df["High Metabolic Power Distance"])
    met_power_df["Total Metabolic Power Efforts"] = pd.to_numeric(
        met_power_df["Total Metabolic Power Efforts"])
    met_power_df["Active Metabolic Power Efforts"] = pd.to_numeric(
        met_power_df["Active Metabolic Power Efforts"])
    met_power_df["High Metabolic Power Efforts"] = pd.to_numeric(
        met_power_df["High Metabolic Power Efforts"])

    # Fetch table from postgres -> Player Load Table
    pl_df = fetch_table(select_clean_pl_table)
    pl_df = pd.DataFrame(pl_df,
                         columns=[
                             "Player Name", "Period Name", "Period Number",
                             "Date", "Total Duration", "Total Player Load",
                             "Active Player Load", "Medium Player Load",
                             "High Player Load"
                         ])

    pl_df["Date"] = pd.to_datetime(pl_df["Date"], format='%Y-%m-%d')
    pl_df["Total Duration"] = pd.to_datetime(pl_df["Total Duration"],
                                             format='%H:%M:%S')
    pl_df["Total Player Load"] = pd.to_numeric(pl_df["Total Player Load"])
    pl_df["Active Player Load"] = pd.to_numeric(pl_df["Active Player Load"])
    pl_df["Medium Player Load"] = pd.to_numeric(pl_df["Medium Player Load"])
    pl_df["High Player Load"] = pd.to_numeric(pl_df["High Player Load"])

    # Fetch table from postgres -> Symmetry Table
    symm_df = fetch_table(select_clean_symm_table)
    symm_df = pd.DataFrame(symm_df,
                           columns=[
                               "Player Name", "Period Name", "Period Number",
                               "Date", "Footstrikes", "Running Deviation",
                               "Running Imbalance", "Running Series Count"
                           ])

    symm_df["Date"] = pd.to_datetime(symm_df["Date"], format='%Y-%m-%d')
    symm_df["Footstrikes"] = pd.to_numeric(symm_df["Footstrikes"])
    symm_df["Running Deviation"] = pd.to_numeric(symm_df["Running Deviation"])
    symm_df["Running Imbalance"] = pd.to_numeric(symm_df["Running Imbalance"])
    symm_df["Running Series Count"] = pd.to_numeric(
        symm_df["Running Series Count"])

    # Fetch table from postgres -> Throws Table
    throws_df = fetch_table(select_clean_throws_table)
    throws_df = pd.DataFrame(throws_df,
                             columns=[
                                 "Player Name", "Period Name", "Period Number",
                                 "Date", "Total Duration", "Total Throw Count",
                                 "Total Throw Load", "Hard Throws",
                                 "Hard Throw Load", "Active Throws",
                                 "Active Throw Load", "Hard Throw %",
                                 "Hard Throw Load %"
                             ])

    throws_df["Date"] = pd.to_datetime(throws_df["Date"], format='%Y-%m-%d')
    throws_df["Total Duration"] = pd.to_datetime(throws_df["Total Duration"],
                                                 format='%H:%M:%S')
    throws_df["Total Throw Count"] = pd.to_numeric(
        throws_df["Total Throw Count"])
    throws_df["Total Throw Load"] = pd.to_numeric(
        throws_df["Total Throw Load"])
    throws_df["Hard Throws"] = pd.to_numeric(throws_df["Hard Throws"])
    throws_df["Hard Throw Load"] = pd.to_numeric(throws_df["Hard Throw Load"])
    throws_df["Active Throws"] = pd.to_numeric(throws_df["Active Throws"])
    throws_df["Active Throw Load"] = pd.to_numeric(
        throws_df["Active Throw Load"])
    throws_df["Hard Throw %"] = pd.to_numeric(throws_df["Hard Throw %"])
    throws_df["Hard Throw Load %"] = pd.to_numeric(
        throws_df["Hard Throw Load %"])

    # Fetch table from postgres -> Velocity Table
    velo_df = fetch_table(select_clean_velo_table)
    velo_df = pd.DataFrame(velo_df,
                           columns=[
                               "Player Name", "Period Name", "Period Number",
                               "Date", "Total Duration", "Total Distance",
                               "Active Distance", "Maximum Velocity",
                               "Distance > 5 mph", "Distance > 12 mph",
                               "Distance > 16 mph", "Efforts > 5 mph",
                               "Efforts > 12 mph", "Efforts > 16 mph"
                           ])

    velo_df["Date"] = pd.to_datetime(velo_df["Date"], format='%Y-%m-%d')
    velo_df["Total Duration"] = pd.to_datetime(velo_df["Total Duration"],
                                               format='%H:%M:%S')

    velo_df["Total Distance"] = pd.to_numeric(velo_df["Total Distance"])
    velo_df["Active Distance"] = pd.to_numeric(velo_df["Active Distance"])
    velo_df["Maximum Velocity"] = pd.to_numeric(velo_df["Maximum Velocity"])

    velo_df["Distance > 5 mph"] = pd.to_numeric(velo_df["Distance > 5 mph"])
    velo_df["Distance > 12 mph"] = pd.to_numeric(velo_df["Distance > 12 mph"])
    velo_df["Distance > 16 mph"] = pd.to_numeric(velo_df["Distance > 16 mph"])

    velo_df["Efforts > 5 mph"] = pd.to_numeric(velo_df["Efforts > 5 mph"])
    velo_df["Efforts > 12 mph"] = pd.to_numeric(velo_df["Efforts > 12 mph"])
    velo_df["Efforts > 16 mph"] = pd.to_numeric(velo_df["Efforts > 16 mph"])

    # Fetch table from postgres -> All-Time Max Speed Table
    all_time_df = fetch_table(select_clean_all_max_speed_table)
    all_time_df = pd.DataFrame(all_time_df,
                               columns=[
                                   "Player Name", "Date",
                                   "Daily Max Velocity (mph)",
                                   "All-Time Max Velocity (mph)",
                                   "% All-Time Max Velocity"
                               ])

    all_time_df["Date"] = pd.to_datetime(all_time_df["Date"],
                                         format='%Y-%m-%d')

    all_time_df["Daily Max Velocity (mph)"] = pd.to_numeric(
        all_time_df["Daily Max Velocity (mph)"])
    all_time_df["All-Time Max Velocity (mph)"] = pd.to_numeric(
        all_time_df["All-Time Max Velocity (mph)"])
    all_time_df["% All-Time Max Velocity"] = pd.to_numeric(
        all_time_df["% All-Time Max Velocity"])

    # Fetch table from postgres -> Schedule Table
    schedule_df = fetch_table(select_schedule_table)
    schedule_df = pd.DataFrame(schedule_df,
                               columns=[
                                   "Date", "Day", "Enemy", "Day_Type",
                                   "Day_ID", "Attire", "MD_Minus"
                               ])

    schedule_df["Date"] = pd.to_datetime(schedule_df["Date"],
                                         format='%Y-%m-%d')

    # Fetch table from postgres -> Bodyweight Table
    bw_df = fetch_table(select_bw_table)
    bw_df = pd.DataFrame(
        bw_df, columns=["Player Name", "Bodyweight (lbs)", "Bodyweight (kg)"])

    bw_df["Bodyweight (lbs)"] = pd.to_numeric(bw_df["Bodyweight (lbs)"])
    bw_df["Bodyweight (kg)"] = pd.to_numeric(bw_df["Bodyweight (kg)"])

    # Fetch table from postgres -> Week Table
    week_df = fetch_table(select_week_table)
    week_df = pd.DataFrame(week_df, columns=["Date", "Phase", "Week_Num"])

    week_df["Date"] = pd.to_datetime(week_df["Date"], format='%Y-%m-%d')

    # Merge all dataframes into one massive dataframe!!!
    merged_df = accel_df.merge(ima_df,
                               how='left',
                               on=[
                                   'Player Name', 'Period Name',
                                   'Period Number', 'Date', 'Total Duration'
                               ])
    merged_df = merged_df.merge(contacts_df,
                                how='left',
                                on=[
                                    'Player Name', 'Period Name',
                                    'Period Number', 'Date', 'Total Duration'
                                ])
    merged_df = merged_df.merge(met_power_df,
                                how='left',
                                on=[
                                    'Player Name', 'Period Name',
                                    'Period Number', 'Date', 'Total Duration'
                                ])
    merged_df = merged_df.merge(pl_df,
                                how='left',
                                on=[
                                    'Player Name', 'Period Name',
                                    'Period Number', 'Date', 'Total Duration'
                                ])
    merged_df = merged_df.merge(
        symm_df,
        how='left',
        on=['Player Name', 'Period Name', 'Period Number', 'Date'])
    merged_df = merged_df.merge(throws_df,
                                how='left',
                                on=[
                                    'Player Name', 'Period Name',
                                    'Period Number', 'Date', 'Total Duration'
                                ])
    merged_df = merged_df.merge(velo_df,
                                how='left',
                                on=[
                                    'Player Name', 'Period Name',
                                    'Period Number', 'Date', 'Total Duration'
                                ])
    merged_df = merged_df.merge(roster_df, how='left', on=['Player Name'])
    merged_df = merged_df.merge(games_df, how='left', on=['Date'])
    merged_df = merged_df.merge(all_time_df,
                                how='left',
                                on=['Player Name', 'Date'])
    merged_df = merged_df.merge(schedule_df, how='left', on=['Date'])
    merged_df = merged_df.merge(week_df, how='left', on=['Date'])
    merged_df = merged_df.merge(bw_df, how='left', on=['Player Name'])

    # Rename the enemy column
    merged_df = merged_df.rename(columns={
        "Enemy_x": "Opponent",
        "Enemy_y": "Enemy"
    })

    # Calculate Calories from Meta Energy (Cal/kg)
    merged_df['Calories'] = merged_df['Meta Energy (Cal/kg)'] * merged_df[
        'Bodyweight (kg)']

    # Create Total Duration in minutes
    zero_date = dt.datetime(1900, 1, 1, 0, 0)
    merged_df[
        'Total Duration (mins)'] = merged_df['Total Duration'] - zero_date
    merged_df['Minutes'] = total_seconds(
        merged_df['Total Duration (mins)']) / 60

    # Create a per minute dataframe
    per_minute_df = merged_df

    # Create metrics per minute
    # Accel
    per_minute_df['High Accelerations per Minute'] = per_minute_df[
        'High Accelerations'] / per_minute_df['Minutes']
    per_minute_df['High Decelerations per Minute'] = per_minute_df[
        'High Decelerations'] / per_minute_df['Minutes']
    # IMA
    per_minute_df['High IMAs per Minute'] = per_minute_df[
        'High IMAs'] / per_minute_df['Minutes']
    # Contact
    per_minute_df['Total Contact Load per Minute'] = per_minute_df[
        'Total Contact Load'] / per_minute_df['Minutes']
    per_minute_df['Total Contacts per Minute'] = per_minute_df[
        'Total Contacts'] / per_minute_df['Minutes']
    per_minute_df['Active Contact Load per Minute'] = per_minute_df[
        'Active Contact Load'] / per_minute_df['Minutes']
    per_minute_df['Active Contacts per Minute'] = per_minute_df[
        'Active Contacts'] / per_minute_df['Minutes']
    # Metabolic Power
    per_minute_df['Total Metabolic Power Distance per Minute'] = per_minute_df[
        'Total Metabolic Power Distance'] / per_minute_df['Minutes']
    per_minute_df[
        'Active Metabolic Power Distance per Minute'] = per_minute_df[
            'Active Metabolic Power Distance'] / per_minute_df['Minutes']
    per_minute_df['Explosive Distance per Minute'] = per_minute_df[
        'Explosive Distance'] / per_minute_df['Minutes']
    # Player Load
    per_minute_df['Total Player Load per Minute'] = per_minute_df[
        'Total Player Load'] / per_minute_df['Minutes']
    per_minute_df['Active Player Load per Minute'] = per_minute_df[
        'Active Player Load'] / per_minute_df['Minutes']
    per_minute_df['Medium Player Load per Minute'] = per_minute_df[
        'Medium Player Load'] / per_minute_df['Minutes']
    per_minute_df['High Player Load per Minute'] = per_minute_df[
        'High Player Load'] / per_minute_df['Minutes']
    # Throws
    per_minute_df['Total Throw Count per Minute'] = per_minute_df[
        'Total Throw Count'] / per_minute_df['Minutes']
    per_minute_df['Total Throw Load per Minute'] = per_minute_df[
        'Total Throw Load'] / per_minute_df['Minutes']
    per_minute_df['Hard Throws per Minute'] = per_minute_df[
        'Hard Throws'] / per_minute_df['Minutes']
    per_minute_df['Hard Throw Load per Minute'] = per_minute_df[
        'Hard Throw Load'] / per_minute_df['Minutes']
    per_minute_df['Active Throws per Minute'] = per_minute_df[
        'Active Throws'] / per_minute_df['Minutes']
    per_minute_df['Active Throw Load per Minute'] = per_minute_df[
        'Active Throw Load'] / per_minute_df['Minutes']
    # Distance
    per_minute_df['Total Distance per Minute'] = per_minute_df[
        'Total Distance'] / per_minute_df['Minutes']
    per_minute_df['Active Distance per Minute'] = per_minute_df[
        'Active Distance'] / per_minute_df['Minutes']
    per_minute_df['Distance > 5 mph per Minute'] = per_minute_df[
        'Distance > 5 mph'] / per_minute_df['Minutes']
    per_minute_df['Distance > 12 mph per Minute'] = per_minute_df[
        'Distance > 12 mph'] / per_minute_df['Minutes']
    per_minute_df['Distance > 16 mph per Minute'] = per_minute_df[
        'Distance > 16 mph'] / per_minute_df['Minutes']
    per_minute_df['Efforts > 5 mph per Minute'] = per_minute_df[
        'Efforts > 5 mph'] / per_minute_df['Minutes']
    per_minute_df['Efforts > 12 mph per Minute'] = per_minute_df[
        'Efforts > 12 mph'] / per_minute_df['Minutes']
    per_minute_df['Efforts > 16 mph per Minute'] = per_minute_df[
        'Efforts > 16 mph'] / per_minute_df['Minutes']

    # Drop columns we don't need from per minute
    per_minute_df = per_minute_df.drop([
        'Medium Decelerations', 'Medium Accelerations', 'High Accelerations',
        'High Decelerations', 'IMA CoD Right High'
    ],
                                       axis=1)
    per_minute_df = per_minute_df.drop([
        'IMA CoD Left High', 'IMA Accel High', 'IMA Decel High',
        'IMA Jump Count High Band', 'High IMAs'
    ],
                                       axis=1)
    per_minute_df = per_minute_df.drop([
        'IMA Explosive %', 'Total Jumps', 'Hard CoD', 'Total Contact Load',
        'Total Contacts'
    ],
                                       axis=1)
    per_minute_df = per_minute_df.drop([
        'Daily Max Velocity (mph)', 'All-Time Max Velocity (mph)',
        '% All-Time Max Velocity'
    ],
                                       axis=1)
    per_minute_df = per_minute_df.drop([
        'Light Contact Load', 'Medium Contact Load', 'Hard Contact Load',
        'Light Contacts', 'Medium Contacts', 'Hard Contacts',
        'Hard Contact Load %', 'Hard Contacts %', 'Active Contact Load',
        'Active Contacts', 'Meta Energy (Cal/kg)',
        'Total Metabolic Power Average Power',
        'Active Metabolic Power Average Power',
        'High Metabolic Power Average Power', 'Total Metabolic Power Distance',
        'Active Metabolic Power Distance', 'Explosive Distance',
        'High Metabolic Power Distance', 'Total Metabolic Power Efforts',
        'Active Metabolic Power Efforts', 'High Metabolic Power Efforts',
        'Total Player Load', 'Active Player Load', 'Medium Player Load',
        'High Player Load', 'Footstrikes', 'Running Deviation',
        'Running Imbalance', 'Running Series Count', 'Total Throw Count',
        'Total Throw Load', 'Hard Throws', 'Hard Throw Load', 'Active Throws',
        'Active Throw Load', 'Hard Throw %', 'Hard Throw Load %',
        'Total Distance', 'Active Distance', 'Maximum Velocity',
        'Distance > 5 mph', 'Distance > 12 mph', 'Distance > 16 mph',
        'Efforts > 5 mph', 'Efforts > 12 mph', 'Efforts > 16 mph',
        'Total Duration (mins)'
    ],
                                       axis=1)
    per_minute_df = per_minute_df.drop([
        'Position',
        'Group',
        'Game_Num_Year',
        'Opponent',
        'Enemy_Abr',
        'Location',
        'Result',
        'Points_For',
        'Points_Against',
        'Game_Type',
        'Day',
        'Enemy',
        'Day_Type',
        'Day_ID',
        'Attire',
        'MD_Minus',
        'Phase',
        'Week_Num',
        'Bodyweight (lbs)',
        'Bodyweight (kg)',
    ],
                                       axis=1)

    # Drop columns we don't need from merged
    merged_df = merged_df.drop([
        'High Accelerations per Minute', 'High Decelerations per Minute',
        'High IMAs per Minute', 'Total Contact Load per Minute',
        'Total Contacts per Minute', 'Active Contact Load per Minute',
        'Active Contacts per Minute',
        'Total Metabolic Power Distance per Minute',
        'Active Metabolic Power Distance per Minute',
        'Explosive Distance per Minute', 'Total Player Load per Minute',
        'Active Player Load per Minute', 'Medium Player Load per Minute',
        'High Player Load per Minute', 'Total Throw Count per Minute',
        'Total Throw Load per Minute', 'Hard Throws per Minute',
        'Hard Throw Load per Minute', 'Active Throws per Minute',
        'Active Throw Load per Minute', 'Total Distance per Minute',
        'Active Distance per Minute', 'Distance > 5 mph per Minute',
        'Distance > 12 mph per Minute', 'Distance > 16 mph per Minute',
        'Efforts > 5 mph per Minute', 'Efforts > 12 mph per Minute',
        'Efforts > 16 mph per Minute'
    ],
                               axis=1)

    per_minute_df = per_minute_df.replace(to_replace=[np.inf, -np.inf],
                                          value=1)

    if not os.path.exists(PER_MIN_LIST):
        per_minute_df.to_csv(PER_MIN_LIST, index=False, mode='w')
        # Set the file permissions so that anyone can use it
        oschmod.set_mode(PER_MIN_LIST, stat.S_IRWXO or stat.S_IRWXU
                         or stat.S_IRWXG)
Esempio n. 27
0
def week_metrics():
    # Fetch table from postgres -> Roster Table
    roster_df = fetch_table(select_clean_roster_table)
    roster_df = pd.DataFrame(roster_df,
                             columns=["Player Name", "Position", "Group"])

    # Fetch table from postgres -> Schedule Table
    schedule_df = fetch_table(select_schedule_table)
    schedule_df = pd.DataFrame(schedule_df,
                               columns=[
                                   "Date", "Day", "Enemy", "Day_Type",
                                   "Day_ID", "Attire", "MD_Minus"
                               ])
    schedule_df["Date"] = pd.to_datetime(schedule_df["Date"],
                                         format='%Y-%m-%d')

    # Fetch table from postgres -> Week Table
    week_df = fetch_table(select_week_table)
    week_df = pd.DataFrame(week_df, columns=["Date", "Phase", "Week_Num"])
    week_df["Date"] = pd.to_datetime(week_df["Date"], format='%Y-%m-%d')
    week_df['Week_Num'] = week_df['Week_Num'].astype(str)

    # Fetch table from postgres -> Accel Table
    accel_df = fetch_table(select_clean_accel_table)
    accel_df = pd.DataFrame(accel_df,
                            columns=[
                                "Player Name", "Period Name", "Period Number",
                                "Date", "Total Duration",
                                "Medium Decelerations", "Medium Accelerations",
                                "High Decelerations", "High Accelerations"
                            ])
    accel_df["Date"] = pd.to_datetime(accel_df["Date"], format='%Y-%m-%d')
    accel_df["Total Duration"] = pd.to_datetime(accel_df["Total Duration"],
                                                format='%H:%M:%S')

    # Fetch table from postgres -> Linemen Contacts Table
    line_contacts_df = fetch_table(select_clean_line_contacts_table)
    line_contacts_df = pd.DataFrame(
        line_contacts_df,
        columns=[
            "Player Name", "Period Name", "Period Number", "Date",
            "Total Duration", "Total Contact Load", "Total Contacts",
            "Light Contact Load", "Medium Contact Load", "Hard Contact Load",
            "Light Contacts", "Medium Contacts", "Hard Contacts",
            "Hard Contact Load %", "Hard Contacts %", "Active Contact Load",
            "Active Contacts"
        ])
    line_contacts_df["Date"] = pd.to_datetime(line_contacts_df["Date"],
                                              format='%Y-%m-%d')
    line_contacts_df["Total Duration"] = pd.to_datetime(
        line_contacts_df["Total Duration"], format='%H:%M:%S')

    line_contacts_df = line_contacts_df.drop(
        columns=['Hard Contact Load %', 'Hard Contacts %'], axis=1)

    line_contacts_df["Total Contact Load"] = pd.to_numeric(
        line_contacts_df["Total Contact Load"])
    line_contacts_df["Light Contact Load"] = pd.to_numeric(
        line_contacts_df["Total Contact Load"])
    line_contacts_df["Medium Contact Load"] = pd.to_numeric(
        line_contacts_df["Total Contact Load"])
    line_contacts_df["Hard Contact Load"] = pd.to_numeric(
        line_contacts_df["Total Contact Load"])
    line_contacts_df["Active Contact Load"] = pd.to_numeric(
        line_contacts_df["Active Contact Load"])

    # Fetch table from postgres -> Player Load Table
    pl_df = fetch_table(select_clean_pl_table)
    pl_df = pd.DataFrame(pl_df,
                         columns=[
                             "Player Name", "Period Name", "Period Number",
                             "Date", "Total Duration", "Total Player Load",
                             "Active Player Load", "Medium Player Load",
                             "High Player Load"
                         ])
    pl_df["Date"] = pd.to_datetime(pl_df["Date"], format='%Y-%m-%d')
    pl_df["Total Duration"] = pd.to_datetime(pl_df["Total Duration"],
                                             format='%H:%M:%S')

    pl_df["Total Player Load"] = pd.to_numeric(pl_df["Total Player Load"])
    pl_df["Active Player Load"] = pd.to_numeric(pl_df["Active Player Load"])
    pl_df["Medium Player Load"] = pd.to_numeric(pl_df["Medium Player Load"])
    pl_df["High Player Load"] = pd.to_numeric(pl_df["High Player Load"])

    # Fetch table from postgres -> Throws Table
    throws_df = fetch_table(select_clean_throws_table)
    throws_df = pd.DataFrame(throws_df,
                             columns=[
                                 "Player Name", "Period Name", "Period Number",
                                 "Date", "Total Duration", "Total Throw Count",
                                 "Total Throw Load", "Hard Throws",
                                 "Hard Throw Load", "Active Throws",
                                 "Active Throw Load", "Hard Throw %",
                                 "Hard Throw Load %"
                             ])
    throws_df["Date"] = pd.to_datetime(throws_df["Date"], format='%Y-%m-%d')
    throws_df["Total Duration"] = pd.to_datetime(throws_df["Total Duration"],
                                                 format='%H:%M:%S')

    throws_df = throws_df.drop(columns=['Hard Throw %', 'Hard Throw Load %'],
                               axis=1)

    throws_df["Total Throw Load"] = pd.to_numeric(
        throws_df["Total Throw Load"])
    throws_df["Hard Throw Load"] = pd.to_numeric(throws_df["Hard Throw Load"])
    throws_df["Active Throw Load"] = pd.to_numeric(
        throws_df["Active Throw Load"])

    # Fetch table from postgres -> Velocity Table
    velo_df = fetch_table(select_clean_velo_table)
    velo_df = pd.DataFrame(velo_df,
                           columns=[
                               "Player Name", "Period Name", "Period Number",
                               "Date", "Total Duration", "Total Distance",
                               "Active Distance", "Max Velocity (mph)",
                               "Distance > 5 mph", "Distance > 12 mph",
                               "Distance > 16 mph", "Efforts > 5 mph",
                               "Efforts > 12 mph", "Efforts > 16 mph"
                           ])
    velo_df["Date"] = pd.to_datetime(velo_df["Date"], format='%Y-%m-%d')
    velo_df["Total Duration"] = pd.to_datetime(velo_df["Total Duration"],
                                               format='%H:%M:%S')

    velo_df["Total Distance"] = pd.to_numeric(velo_df["Total Distance"])
    velo_df["Active Distance"] = pd.to_numeric(velo_df["Active Distance"])
    velo_df["Max Velocity (mph)"] = pd.to_numeric(
        velo_df["Max Velocity (mph)"])

    velo_df["Distance > 5 mph"] = pd.to_numeric(velo_df["Distance > 5 mph"])
    velo_df["Distance > 12 mph"] = pd.to_numeric(velo_df["Distance > 12 mph"])
    velo_df["Distance > 16 mph"] = pd.to_numeric(velo_df["Distance > 16 mph"])

    velo_df["Efforts > 5 mph"] = pd.to_numeric(velo_df["Efforts > 5 mph"])
    velo_df["Efforts > 12 mph"] = pd.to_numeric(velo_df["Efforts > 12 mph"])
    velo_df["Efforts > 16 mph"] = pd.to_numeric(velo_df["Efforts > 16 mph"])

    # Set velo_df to only give session period name and period number "0"
    velo_df = velo_df[velo_df['Period Name'] == 'Session']
    velo_df = velo_df[velo_df['Period Number'] == 0]

    # Get Year
    schedule_df['Year'] = pd.to_datetime(schedule_df['Date']).dt.to_period('Y')
    week_df['Year'] = pd.to_datetime(week_df['Date']).dt.to_period('Y')
    velo_df['Year'] = pd.to_datetime(velo_df['Date']).dt.to_period('Y')
    schedule_df['Year'] = schedule_df['Year'].astype(str)
    week_df['Year'] = week_df['Year'].astype(str)
    velo_df['Year'] = velo_df['Year'].astype(str)

    # Merge this stuff together right meow
    merged_df = schedule_df.merge(week_df, how='left', on=['Date', 'Year'])
    merged_df = merged_df.merge(velo_df, how='left', on=['Date', 'Year'])
    merged_df = merged_df.merge(roster_df, how='left', on=['Player Name'])
    merged_df = merged_df.merge(throws_df,
                                how='left',
                                on=[
                                    'Player Name', 'Period Name',
                                    'Period Number', 'Date', 'Total Duration'
                                ])
    merged_df = merged_df.merge(pl_df,
                                how='left',
                                on=[
                                    'Player Name', 'Period Name',
                                    'Period Number', 'Date', 'Total Duration'
                                ])
    merged_df = merged_df.merge(line_contacts_df,
                                how='left',
                                on=[
                                    'Player Name', 'Period Name',
                                    'Period Number', 'Date', 'Total Duration'
                                ])
    merged_df = merged_df.merge(accel_df,
                                how='left',
                                on=[
                                    'Player Name', 'Period Name',
                                    'Period Number', 'Date', 'Total Duration'
                                ])

    # Fix max velocity so it is max speed for the week
    max_vel_df = merged_df.groupby(
        by=['Player Name', 'Week_Num', 'Year', 'Position', 'Phase'],
        as_index=False)['Max Velocity (mph)'].max()

    week_dist_df = merged_df.groupby(
        by=['Player Name', 'Week_Num', 'Year', 'Position', 'Phase'],
        as_index=False).sum()
    week_dist_df = week_dist_df.drop(columns=['Max Velocity (mph)'], axis=1)

    remerged_df = week_dist_df.merge(
        max_vel_df,
        how='left',
        on=['Player Name', 'Week_Num', 'Year', 'Position', 'Phase'])
    remerged_df = remerged_df.drop(
        columns=['Distance > 5 mph', 'Period Number'], axis=1)

    if not os.path.exists(WEEK_LIST):
        remerged_df.to_csv(WEEK_LIST, index=False, mode='w')
        # Set the file permissions so that anyone can use it
        oschmod.set_mode(WEEK_LIST, stat.S_IRWXO or stat.S_IRWXU
                         or stat.S_IRWXG)