def test_path_resolution():
    '''Test that paths are correctly resolved'''
    from config import Path
    path = Path()

    expected = pathlib.Path(os.environ['HOME']) / 'bar'
    assert path.validate('~/foo/../bar') == expected
Exemple #2
0
def create_dataset_folder_structure():
    """
    Creates the folder structure for the new dataset_operations.

    """

    path = Path(f'{DATASETS}/{FEATURES_DATASET}')
    if not os.path.exists(path):
        print(
            f'\nWARNING: The path does not exist. Creating new directory...\n{path}\n'
        )
        os.mkdir(path)

    try:
        for path in new_sensor_paths:
            if not os.path.exists(path):
                print(
                    f'\nWARNING: The path does not exist. Creating new directory...\n{path}\n'
                )
                os.mkdir(path)
            else:
                print("\nPath already exists!")
    except:
        return False
    else:
        return True
def test_path_validate(tmp_path):
    from config import Path

    not_existing_path = tmp_path / 'nope'
    existing_dir = tmp_path

    existing_file = tmp_path / 'yes'
    with existing_file.open('w'):
        pass

    item = Path()
    assert item.validate(None) is None
    assert item.validate(not_existing_path) == not_existing_path
    assert item.validate(existing_dir) == existing_dir
    assert item.validate(existing_file) == existing_file

    item = Path(dir_okay=False)
    assert item.validate(None) is None
    assert item.validate(not_existing_path) == not_existing_path
    assert item.validate(existing_file) == existing_file
    with pytest.raises(ConfigError):
        item.validate(existing_dir)


    item = Path(file_okay=False)
    assert item.validate(None) is None
    assert item.validate(not_existing_path) == not_existing_path
    assert item.validate(existing_dir) == existing_dir
    with pytest.raises(ConfigError):
        item.validate(existing_file)

    item = Path(exists=False)
    assert item.validate(None) is None
    assert item.validate(not_existing_path) == not_existing_path
    with pytest.raises(ConfigError):
        item.validate(existing_file)
    with pytest.raises(ConfigError):
        item.validate(existing_dir)


    item = Path(allow_none=False)
    with pytest.raises(ConfigError):
        item.validate(None)
def process_repository(session, status, repository, query_iter):
    query_iter = list(query_iter)
    zip_path = None
    tarzip = None
    if not repository.path.exists():
        if not repository.zip_path.exists():
            repository.processed |= consts.R_UNAVAILABLE_FILES
            session.add(repository)
            status.count += len(query_iter)
            return "Failed. Repository not found: {}".format(repository)
        tarzip =  tarfile.open(str(repository.zip_path))
        zip_path = Path(repository.hash_dir2)

    shell = InteractiveShell.instance()
    group = groupby(
        query_iter,
        lambda x: (x[1])
    )
    for notebook, new_iter in group:
        cells = list(query_iter)
        vprint(1, "Processing notebook: {}. Found {} cells".format(notebook, len(cells)))
        name = notebook.name
        vprint(2, "Loading notebook file")
        if tarzip:
            notebook = nbf.read(
                tarzip.extractfile(tarzip.getmember(str(zip_path / name))),
                nbf.NO_CONVERT
            )
        else:
            with open(str(repository.path / name)) as ofile:
                notebook = nbf.read(ofile, nbf.NO_CONVERT)
        notebook = nbf.convert(notebook, 4)
        metadata = notebook["metadata"]
        language_info = metadata.get("language_info", {})
        language_name = language_info.get("name", "unknown")

        for cell, _, _ in new_iter:
            vprint(2, "Loading cell {}".format(cell.index))

            index = int(cell.index)
            notebook_cell = notebook["cells"][index]
            source = notebook_cell.get("source", "")
            if language_name == "python" and notebook_cell.get("cell_type") == "code":
                try:
                    source = shell.input_transformer_manager.transform_cell(source)
                except (IndentationError, SyntaxError):
                    pass
            cell.source = source
            if cell.processed & consts.C_MARKED_FOR_EXTRACTION:
                cell.processed -= consts.C_MARKED_FOR_EXTRACTION
            session.add(cell)
        session.commit()
    return "ok"
Exemple #5
0
def sort_dataset_by_age():
    """
    Sorts the Dataset created by create_dataset() into a new Age sorted Dataset.

    """

    data = read_csv(Path(f'{data_files_path}/subject_data'))
    limits = get_limits(ageGroups)
    sortedCount = 0

    # For every age bin
    for target_folder, limit in limits.items():
        # Get the indexes of all files to be copied to the target folder
        index_list = list(data[(data['Age'] >= limit[0])
                               & (data['Age'] <= limit[1])].index)
        subjectCount = 0
        # For every file to be copied
        for i in index_list:
            filename = data.iloc[i]['Filename']
            temp = sortedCount
            # Get the source and destination file paths
            for src, dest in zip(new_sensor_paths, sensor_dirs[target_folder]):
                # if the file exists in the source directory
                if os.path.exists(Path(f'{src}/{filename[:-4]}.csv')):
                    # copy it to the destination directory
                    copyfile(Path(f'{src}/{filename[:-4]}.csv'),
                             Path(f'{dest}/{filename[:-4]}.csv'))
                    if temp == sortedCount:
                        sortedCount += 1
                        subjectCount += 1
                        # print(f'src = {src}\ndest = {dest}\n\n')

        print(f'\n# of Subjects in "{target_folder}" = {subjectCount}')

    print(
        f'\nTotal subjects sorted = {sortedCount}  ({round((sortedCount / len(data)) * 100, 2)}% of total data)\n'
    )
Exemple #6
0
def create_dataset(subs_list, indexing=True):
    """
    Creates the New Dataset using features calculated from the base data.

    Parameters
    ----------
    subs_list : list
        list of subjects to create the new dataset_operations for
    indexing : bool, optional
        dataset_operations index column (default = True)

    """

    S = None
    print(
        f'\nProcess - {current_process().name} has {len(subs_list)} files to work on.\n'
    )

    try:
        start = time()
        repo = (Subject(sub) for sub in subs_list)
        for sub in repo:
            S = sub
            for i in range(3):
                filePath = Path(
                    f'{new_sensor_paths[i]}/{sub.subject_id[:-4]}.csv')
                if not os.path.exists(filePath):
                    # Most expensive line of code in the module (Takes hours)
                    col_names, df, _, _, _ = feature_extractor(
                        sub, sensors[i].lower(), output_type='df')
                    df.to_csv(filePath, sep="\t", index=indexing)
                    print(
                        f"File generated - '{sub.subject_id[:-4]}.csv' by process : {current_process().name}"
                    )
                else:
                    print(f'File "{sub.subject_id[:-4]}.csv" already exists!')

        print(
            f'\nTime taken by - {current_process().name} : {time() - start:.2f} secs'
        )
    except Exception as e:
        print(f"Exception occurred in {current_process().name}\n")
        print(f'While working on this portion of the subs_list:\n'
              f'{subs_list}')
        print(f'Error occurred in FILE # {S.subject_id}\n')
        raise e
Exemple #7
0
def create_age_folder_structure():
    """
    Creates the folder structure for the Age Sorted Dataset.

    """

    try:
        new_dataset_path = Path(f'{DATASETS}/{FEATURES_DATASET}_Age_Sorted')
        if not os.path.exists(new_dataset_path):
            print(
                f'\nWARNING: The path does not exist. Creating new directory...\n{new_dataset_path}\n'
            )
            os.mkdir(new_dataset_path)
    except:
        print(
            "ERROR in creating the sorted dataset_operations directory within folder /Data Sets"
        )
        return False

    try:
        for folder, age_dir in age_dirs.items():
            if not os.path.exists(age_dir):
                os.mkdir(age_dir)
            else:
                print(f"The directory {folder} already exists.")
    except:
        print(
            "ERROR in creating age based directories in /Data Sets/Dataset_Age_Sorted"
        )
        return False

    try:
        for sub_folder, sensor_dir in sensor_dirs.items():
            for sub_path in sensor_dir:
                if not os.path.exists(sub_path):
                    os.mkdir(sub_path)
                else:
                    print(f"The directory {sub_path} already exists.")
        return True
    except:
        print(
            "ERROR in creating sensor directories in /Data Sets/Dataset_Age_Sorted/[age_Groups]"
        )
        return False
Exemple #8
0
def main():
    parser = argparse.ArgumentParser(
        description="Check pid")
    parser.add_argument("-c", "--count", action='store_true',
                        help="count active processes")
    parser.add_argument("-e", "--clear", action='store_true',
                        help="clear not running processes")
    parser.add_argument("-s", "--simplify", action='store_true',
                        help="simplify output")
    args = parser.parse_args()

    if not Path(".pid").exists():
        return

    with open(".pid", "r") as fil:
        pids = fil.readlines()

    new_pids = []
    for pid in pids:
        pid = pid.strip()
        if not pid:
            continue
        try:
            process = psutil.Process(int(pid))
            if not args.count:
                cmd = process.cmdline()
                if args.simplify and len(cmd) > 20:
                    cmd = cmd[:20]
                    cmd.append("...")
                print("{}: {}".format(pid, " ".join(cmd)))
            new_pids.append(pid)
        except psutil.NoSuchProcess:
            if not args.count and not args.clear:
                print("{}: <not found>".format(pid))
    if args.count:
        print(len(new_pids))
    if args.clear:
        with open(".pid", "w") as fil:
            fil.write("\n".join(new_pids) + "\n")
Exemple #9
0
def file_exists(subs_list):
    """
    Checks to see if any previous files with feature extracted data exist in the Dataset and returns the
    updated list of files which don't exist in the Dataset.

    This is done because generating the files is expensive and this avoids having to start over from scratch.

    Parameters
    ----------
    subs_list : list
        Complete subjects list

    Returns
    -------
    updated_subs : list
        list of subject files which are not already in the new Dataset

    """
    updated_subs = []
    print(f'Checking for existing files in directories:\n')
    for dir in new_sensor_paths:
        print(f'{dir}')
        updated_subs += subs_list
    print()

    for sub in subs_list:
        for i in range(3):
            filePath = Path(f'{new_sensor_paths[i]}/{sub[:-4]}.csv')
            if not os.path.exists(filePath):
                pass
            else:
                updated_subs.pop(updated_subs.index(sub))
    updated_subs = list(sorted(set(updated_subs)))
    print(f'There were {len(subs_list) - len(updated_subs)} existing files!\n')
    print(
        f'The updated subjects list now contains {len(updated_subs)} entries.\n'
    )
    return updated_subs
 class Foo(Configurable):
     path = Path(allow_none=False)
Exemple #11
0
# Configuration Variables
# ------------------------
GENERATE_DATASET = True
SORT_BY_AGE = False
TESTING = True
TEST_COUNT = 8  # Should be >= 4
# ------------------------

if not TESTING:
    FEATURES_DATASET = FEATURES_DATASET
else:
    FEATURES_DATASET = FEATURES_DATASET + "_TEST"

new_sensor_paths = [
    Path(f"{DATASETS}/{FEATURES_DATASET}/{sensor}") for sensor in sensors
]

if not os.path.exists(DATASETS):
    print(
        f'\nWARNING: The path does not exist. Creating new directory...\n{DATASETS}\n'
    )
    os.mkdir(DATASETS)


def create_dataset_folder_structure():
    """
    Creates the folder structure for the new dataset_operations.

    """
Exemple #12
0
# Performance metric to optimize the model for
SCORING = 'f1_weighted'
# Set to True if TESTING with the Python CONSOLE
TESTING = False
# If True, the dataset_operations is normalized before training & testing
DATA_NORMALIZATION = True
# If True, a selected portion of the entire dataset_operations is used for training+testing (# of rows = row_count)
DATA_REDUCE = False
# If True, generate a .csv file for the feature ranking
GEN_RANKING_FILE = False
# If True, a plot will be generated for the # of features used vs performance metric
PLOT = False
# If True, trained model is exported to TRAINED_MODEL_PATH
EXPORT_MODEL = False

# Paths
# Directory name for new data set which contains the training/testing data for the classifier
PROCESSED_DATASET = "Processed_Dataset"
# Directory path for new data set which contains the training/testing data for the classifier
PROCESSED_DATASET_PATH = Path(f'{DATASETS}/{PROCESSED_DATASET}')
# loading in the actual dataset for the ML classifier
DATA_PATH = Path(f"{PROCESSED_DATASET_PATH}/ds_all.csv")
# Trained Model directory name
TRAINED_MODEL_DIR = 'Trained Models'
# Trained Model directory path
TRAINED_MODEL_PATH = Path(f'{ROOT}/{TRAINED_MODEL_DIR}')
# Trained Model name
TRAINED_MODEL_NAME = 'step_detection_model_test.pkl'
# Trained Normalizer name
TRAINED_NORMALIZER_NAME = 'step_detection_min_max_norm_test.pkl'