Ejemplo n.º 1
0
def test_path_resolution():
    '''Test that paths are correctly resolved'''
    from config import Path
    path = Path()

    expected = pathlib.Path(os.environ['HOME']) / 'bar'
    assert path.validate('~/foo/../bar') == expected
Ejemplo n.º 2
0
class Writer(object):
    """Write HTML fragments generated by docutils parser."""

    def __init__(self, config):
        #self.config = config
        self.path = Path(config)
        self.parser = Parser(config)

    def run(self):
        for source_abspath in self.parser.get_all_files():
            print source_abspath 
            fragment = self.parser.get_fragment(source_abspath)
            fragment_abspath = self.path.get_fragment_abspath(source_abspath)
            self.write_fragment(fragment, fragment_abspath)
        print "Done."

    def write_fragment(self, fragment, fragment_abspath):
        with self.open_fragment_file(fragment_abspath) as fout:
            fout.write(fragment.encode('utf-8') + '\n')
 
    def open_fragment_file(self, fragment_abspath):
        self.make_fragment_dir(fragment_abspath)
        return open(fragment_abspath, "w")

    def make_fragment_dir(self, fragment_abspath):
        fragment_dir = os.path.dirname(fragment_abspath)
        if not os.path.isdir(fragment_dir):
            os.makedirs(fragment_dir)
Ejemplo n.º 3
0
def create_dataset_folder_structure():
    """
    Creates the folder structure for the new dataset_operations.

    """

    path = Path(f'{DATASETS}/{FEATURES_DATASET}')
    if not os.path.exists(path):
        print(
            f'\nWARNING: The path does not exist. Creating new directory...\n{path}\n'
        )
        os.mkdir(path)

    try:
        for path in new_sensor_paths:
            if not os.path.exists(path):
                print(
                    f'\nWARNING: The path does not exist. Creating new directory...\n{path}\n'
                )
                os.mkdir(path)
            else:
                print("\nPath already exists!")
    except:
        return False
    else:
        return True
Ejemplo n.º 4
0
 def __init__(self, config, graph):
     self.config = config
     self.graph = graph
     self.path = Path(config)
     self.parser = Parser(config)
     self.writer = Writer(config)
     self.loader = Loader(config, graph)
def calculate_weigths_labels(dataset, dataloader, num_classes):
    # Create an instance from the data loader
    z = np.zeros((num_classes, ))
    # Initialize tqdm
    tqdm_batch = tqdm(dataloader)
    print('Calculating classes weights')
    for sample in tqdm_batch:
        y = sample['label']
        y = y.detach().cpu().numpy()
        mask = (y >= 0) & (y < num_classes)
        labels = y[mask].astype(np.uint8)
        count_l = np.bincount(labels, minlength=num_classes)
        z += count_l
    tqdm_batch.close()
    total_frequency = np.sum(z)
    class_weights = []
    for frequency in z:
        class_weight = 1 / (np.log(1.02 + (frequency / total_frequency)))
        class_weights.append(class_weight)
    ret = np.array(class_weights)
    classes_weights_path = os.path.join(Path.db_root_dir(dataset),
                                        dataset + '_classes_weights.npy')
    np.save(classes_weights_path, ret)

    return ret
Ejemplo n.º 6
0
    def collect_memory_snapshot(self, pid="sys"):
        """
        Collecting memory snapshot into csv files
        :pid: process or package, by default getting snapshot from system
        """

        meminfo = self._get_sys_meminfo(
        ) if pid == "sys" else self._get_ps_meminfo(pid)
        headers_row = [i for i in meminfo.keys()]
        values_row = [i for i in meminfo.values()]
        path = Path.sys() if pid == "sys" else Path.pid(pid)

        if os.path.isfile(path):
            CSV.append_row(path, values_row)
        else:
            CSV.append_row(path, headers_row)
            CSV.append_row(path, values_row)
Ejemplo n.º 7
0
 def _add_process(pid):
     with open(Path.template_process()) as file:
         soup = BeautifulSoup(file, features="html.parser")
     tag_graph = soup.find(id="graph")
     tag_graph.string = Graph.gen_pid_graph(pid)
     tag_meminfo = soup.find(id="meminfo")
     tag_meminfo.string = "<object height=100% width=100% type='text/plain' data=\"./{pid}.txt\"></object>".format(
         pid=pid)
     return soup
Ejemplo n.º 8
0
    def __init__(self, config, graph):
        #self.changelog = ChangeLog(config)
        self.path = Path(config)

        changelog_dir = self.path.get_working_etc()
        self.changelog = ChangeLog(changelog_dir)
        self.changelog.initialize(config)

        self.parser = Parser(config)
        self.graph = graph
Ejemplo n.º 9
0
def process_repository(session, status, repository, query_iter):
    query_iter = list(query_iter)
    zip_path = None
    tarzip = None
    if not repository.path.exists():
        if not repository.zip_path.exists():
            repository.processed |= consts.R_UNAVAILABLE_FILES
            session.add(repository)
            status.count += len(query_iter)
            return "Failed. Repository not found: {}".format(repository)
        tarzip =  tarfile.open(str(repository.zip_path))
        zip_path = Path(repository.hash_dir2)

    shell = InteractiveShell.instance()
    group = groupby(
        query_iter,
        lambda x: (x[1])
    )
    for notebook, new_iter in group:
        cells = list(query_iter)
        vprint(1, "Processing notebook: {}. Found {} cells".format(notebook, len(cells)))
        name = notebook.name
        vprint(2, "Loading notebook file")
        if tarzip:
            notebook = nbf.read(
                tarzip.extractfile(tarzip.getmember(str(zip_path / name))),
                nbf.NO_CONVERT
            )
        else:
            with open(str(repository.path / name)) as ofile:
                notebook = nbf.read(ofile, nbf.NO_CONVERT)
        notebook = nbf.convert(notebook, 4)
        metadata = notebook["metadata"]
        language_info = metadata.get("language_info", {})
        language_name = language_info.get("name", "unknown")

        for cell, _, _ in new_iter:
            vprint(2, "Loading cell {}".format(cell.index))

            index = int(cell.index)
            notebook_cell = notebook["cells"][index]
            source = notebook_cell.get("source", "")
            if language_name == "python" and notebook_cell.get("cell_type") == "code":
                try:
                    source = shell.input_transformer_manager.transform_cell(source)
                except (IndentationError, SyntaxError):
                    pass
            cell.source = source
            if cell.processed & consts.C_MARKED_FOR_EXTRACTION:
                cell.processed -= consts.C_MARKED_FOR_EXTRACTION
            session.add(cell)
        session.commit()
    return "ok"
Ejemplo n.º 10
0
def sort_dataset_by_age():
    """
    Sorts the Dataset created by create_dataset() into a new Age sorted Dataset.

    """

    data = read_csv(Path(f'{data_files_path}/subject_data'))
    limits = get_limits(ageGroups)
    sortedCount = 0

    # For every age bin
    for target_folder, limit in limits.items():
        # Get the indexes of all files to be copied to the target folder
        index_list = list(data[(data['Age'] >= limit[0])
                               & (data['Age'] <= limit[1])].index)
        subjectCount = 0
        # For every file to be copied
        for i in index_list:
            filename = data.iloc[i]['Filename']
            temp = sortedCount
            # Get the source and destination file paths
            for src, dest in zip(new_sensor_paths, sensor_dirs[target_folder]):
                # if the file exists in the source directory
                if os.path.exists(Path(f'{src}/{filename[:-4]}.csv')):
                    # copy it to the destination directory
                    copyfile(Path(f'{src}/{filename[:-4]}.csv'),
                             Path(f'{dest}/{filename[:-4]}.csv'))
                    if temp == sortedCount:
                        sortedCount += 1
                        subjectCount += 1
                        # print(f'src = {src}\ndest = {dest}\n\n')

        print(f'\n# of Subjects in "{target_folder}" = {subjectCount}')

    print(
        f'\nTotal subjects sorted = {sortedCount}  ({round((sortedCount / len(data)) * 100, 2)}% of total data)\n'
    )
Ejemplo n.º 11
0
    def __init__(
            self,
            args,
            base_dir=Path.db_root_dir('pascal'),
            split='train',
    ):
        """
        :param base_dir: path to VOC dataset directory
        :param split: train/val
        :param transform: transform to apply
        """
        super().__init__()
        self._base_dir = base_dir
        self._image_dir = os.path.join(self._base_dir, 'JPEGImages')
        self._cat_dir = os.path.join(self._base_dir, 'SegmentationClass')

        if isinstance(split, str):
            self.split = [split]
        else:
            split.sort()
            self.split = split

        self.args = args

        _splits_dir = os.path.join(self._base_dir, 'ImageSets', 'Segmentation')

        self.im_ids = []
        self.images = []
        self.categories = []

        for splt in self.split:
            with open(os.path.join(os.path.join(_splits_dir, splt + '.txt')),
                      "r") as f:
                lines = f.read().splitlines()

            for ii, line in enumerate(lines):
                _image = os.path.join(self._image_dir, line + ".jpg")
                _cat = os.path.join(self._cat_dir, line + ".png")
                assert os.path.isfile(_image)
                assert os.path.isfile(_cat)
                self.im_ids.append(line)
                self.images.append(_image)
                self.categories.append(_cat)

        assert (len(self.images) == len(self.categories))

        # Display stats
        print('Number of images in {}: {:d}'.format(split, len(self.images)))
Ejemplo n.º 12
0
    def __init__(
            self,
            args,
            base_dir=Path.db_root_dir('sbd'),
            split='train',
    ):
        """
        :param base_dir: path to VOC dataset directory
        :param split: train/val
        :param transform: transform to apply
        """
        super().__init__()
        self._base_dir = base_dir
        self._dataset_dir = os.path.join(self._base_dir, 'dataset')
        self._image_dir = os.path.join(self._dataset_dir, 'img')
        self._cat_dir = os.path.join(self._dataset_dir, 'cls')

        if isinstance(split, str):
            self.split = [split]
        else:
            split.sort()
            self.split = split

        self.args = args

        # Get list of all images from the split and check that the files exist
        self.im_ids = []
        self.images = []
        self.categories = []
        for splt in self.split:
            with open(os.path.join(self._dataset_dir, splt + '.txt'),
                      "r") as f:
                lines = f.read().splitlines()

            for line in lines:
                _image = os.path.join(self._image_dir, line + ".jpg")
                _categ = os.path.join(self._cat_dir, line + ".mat")
                assert os.path.isfile(_image)
                assert os.path.isfile(_categ)
                self.im_ids.append(line)
                self.images.append(_image)
                self.categories.append(_categ)

        assert (len(self.images) == len(self.categories))

        # Display stats
        print('Number of images: {:d}'.format(len(self.images)))
Ejemplo n.º 13
0
def create_dataset(subs_list, indexing=True):
    """
    Creates the New Dataset using features calculated from the base data.

    Parameters
    ----------
    subs_list : list
        list of subjects to create the new dataset_operations for
    indexing : bool, optional
        dataset_operations index column (default = True)

    """

    S = None
    print(
        f'\nProcess - {current_process().name} has {len(subs_list)} files to work on.\n'
    )

    try:
        start = time()
        repo = (Subject(sub) for sub in subs_list)
        for sub in repo:
            S = sub
            for i in range(3):
                filePath = Path(
                    f'{new_sensor_paths[i]}/{sub.subject_id[:-4]}.csv')
                if not os.path.exists(filePath):
                    # Most expensive line of code in the module (Takes hours)
                    col_names, df, _, _, _ = feature_extractor(
                        sub, sensors[i].lower(), output_type='df')
                    df.to_csv(filePath, sep="\t", index=indexing)
                    print(
                        f"File generated - '{sub.subject_id[:-4]}.csv' by process : {current_process().name}"
                    )
                else:
                    print(f'File "{sub.subject_id[:-4]}.csv" already exists!')

        print(
            f'\nTime taken by - {current_process().name} : {time() - start:.2f} secs'
        )
    except Exception as e:
        print(f"Exception occurred in {current_process().name}\n")
        print(f'While working on this portion of the subs_list:\n'
              f'{subs_list}')
        print(f'Error occurred in FILE # {S.subject_id}\n')
        raise e
Ejemplo n.º 14
0
def create_age_folder_structure():
    """
    Creates the folder structure for the Age Sorted Dataset.

    """

    try:
        new_dataset_path = Path(f'{DATASETS}/{FEATURES_DATASET}_Age_Sorted')
        if not os.path.exists(new_dataset_path):
            print(
                f'\nWARNING: The path does not exist. Creating new directory...\n{new_dataset_path}\n'
            )
            os.mkdir(new_dataset_path)
    except:
        print(
            "ERROR in creating the sorted dataset_operations directory within folder /Data Sets"
        )
        return False

    try:
        for folder, age_dir in age_dirs.items():
            if not os.path.exists(age_dir):
                os.mkdir(age_dir)
            else:
                print(f"The directory {folder} already exists.")
    except:
        print(
            "ERROR in creating age based directories in /Data Sets/Dataset_Age_Sorted"
        )
        return False

    try:
        for sub_folder, sensor_dir in sensor_dirs.items():
            for sub_path in sensor_dir:
                if not os.path.exists(sub_path):
                    os.mkdir(sub_path)
                else:
                    print(f"The directory {sub_path} already exists.")
        return True
    except:
        print(
            "ERROR in creating sensor directories in /Data Sets/Dataset_Age_Sorted/[age_Groups]"
        )
        return False
Ejemplo n.º 15
0
 def __init__(self,
              args,
              base_dir=Path.db_root_dir('coco'),
              split='train',
              year='2017'):
     super().__init__()
     ann_file = os.path.join(
         base_dir, 'annotations/instances_{}{}.json'.format(split, year))
     ids_file = os.path.join(
         base_dir, 'annotations/{}_ids_{}.pth'.format(split, year))
     self.img_dir = os.path.join(base_dir,
                                 'images/{}{}'.format(split, year))
     self.split = split
     self.coco = COCO(ann_file)
     self.coco_mask = mask
     if os.path.exists(ids_file):
         self.ids = torch.load(ids_file)
     else:
         ids = list(self.coco.imgs.keys())
         self.ids = self._preprocess(ids, ids_file)
     self.args = args
Ejemplo n.º 16
0
def make_data_loader(args, **kwargs):

    if args.dataset:
        base_dirs = Path.db_root_dir(args.dataset)

        print('Training data:{}'.format(base_dirs['train']))
        train_loader = DataLoader(dataset=NYUDataset(base_dirs['train'],
                                                     istest=False),
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  num_workers=args.workers)

        print('Validate data:{}'.format(base_dirs['val']))
        val_loader = DataLoader(
            dataset=NYUDataset(base_dirs['val'], istest=True),
            batch_size=args.
            batch_size,  # 1 * torch.cuda.device_count(), 1 for each GPU
            shuffle=False,
            num_workers=args.workers  # 1 * torch.cuda.device_count()
        )

        return train_loader, val_loader
Ejemplo n.º 17
0
def main():
    parser = argparse.ArgumentParser(
        description="Check pid")
    parser.add_argument("-c", "--count", action='store_true',
                        help="count active processes")
    parser.add_argument("-e", "--clear", action='store_true',
                        help="clear not running processes")
    parser.add_argument("-s", "--simplify", action='store_true',
                        help="simplify output")
    args = parser.parse_args()

    if not Path(".pid").exists():
        return

    with open(".pid", "r") as fil:
        pids = fil.readlines()

    new_pids = []
    for pid in pids:
        pid = pid.strip()
        if not pid:
            continue
        try:
            process = psutil.Process(int(pid))
            if not args.count:
                cmd = process.cmdline()
                if args.simplify and len(cmd) > 20:
                    cmd = cmd[:20]
                    cmd.append("...")
                print("{}: {}".format(pid, " ".join(cmd)))
            new_pids.append(pid)
        except psutil.NoSuchProcess:
            if not args.count and not args.clear:
                print("{}: <not found>".format(pid))
    if args.count:
        print(len(new_pids))
    if args.clear:
        with open(".pid", "w") as fil:
            fil.write("\n".join(new_pids) + "\n")
Ejemplo n.º 18
0
def file_exists(subs_list):
    """
    Checks to see if any previous files with feature extracted data exist in the Dataset and returns the
    updated list of files which don't exist in the Dataset.

    This is done because generating the files is expensive and this avoids having to start over from scratch.

    Parameters
    ----------
    subs_list : list
        Complete subjects list

    Returns
    -------
    updated_subs : list
        list of subject files which are not already in the new Dataset

    """
    updated_subs = []
    print(f'Checking for existing files in directories:\n')
    for dir in new_sensor_paths:
        print(f'{dir}')
        updated_subs += subs_list
    print()

    for sub in subs_list:
        for i in range(3):
            filePath = Path(f'{new_sensor_paths[i]}/{sub[:-4]}.csv')
            if not os.path.exists(filePath):
                pass
            else:
                updated_subs.pop(updated_subs.index(sub))
    updated_subs = list(sorted(set(updated_subs)))
    print(f'There were {len(subs_list) - len(updated_subs)} existing files!\n')
    print(
        f'The updated subjects list now contains {len(updated_subs)} entries.\n'
    )
    return updated_subs
Ejemplo n.º 19
0
    def __init__(self,
                 args,
                 root=Path.db_root_dir('cityscapes'),
                 split="train"):

        self.root = root
        self.split = split
        self.args = args
        self.files = {}

        self.images_base = os.path.join(self.root, 'leftImg8bit', self.split)
        self.annotations_base = os.path.join(self.root, 'gtFine_trainvaltest',
                                             'gtFine', self.split)

        self.files[split] = self.recursive_glob(rootdir=self.images_base,
                                                suffix='.png')

        self.void_classes = [
            0, 1, 2, 3, 4, 5, 6, 9, 10, 14, 15, 16, 18, 29, 30, -1
        ]
        self.valid_classes = [
            7, 8, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31,
            32, 33
        ]
        self.class_names = ['unlabelled', 'road', 'sidewalk', 'building', 'wall', 'fence', \
                            'pole', 'traffic_light', 'traffic_sign', 'vegetation', 'terrain', \
                            'sky', 'person', 'rider', 'car', 'truck', 'bus', 'train', \
                            'motorcycle', 'bicycle']

        self.ignore_index = 255
        self.class_map = dict(zip(self.valid_classes, range(self.NUM_CLASSES)))

        if not self.files[split]:
            raise Exception("No files for split=[%s] found in %s" %
                            (split, self.images_base))

        print("Found %d %s images" % (len(self.files[split]), split))
Ejemplo n.º 20
0
 def _save_report(self, name):
     with open(os.path.join(Path.processing_dir(), "{}.html".format(name)),
               "w") as file:
         file.write(
             str(self.soup).replace("&lt;", "<").replace("&gt;", ">"))
Ejemplo n.º 21
0
class Command(object):

    def __init__(self, config, graph):
        self.config = config
        self.graph = graph
        self.path = Path(config)
        self.parser = Parser(config)
        self.writer = Writer(config)
        self.loader = Loader(config, graph)

    # Public methods
       
    def new(self, filename):
        # TODO: parse out docid, maybe sign docid

        try:
            assert filename.endswith(self.config.source_ext)
        except AssertionError as e:
            print "File name must end with %s" % self.config.source_ext
            sys.exit(1)

        source_dir = self.path.get_source_dir()
        source_abspath = os.path.join(source_dir, filename)
        content = self._build_initial_source(filename)

        print "Creating file:  %s" % source_abspath
        self._create_file(source_abspath, content)

        return source_abspath

    def edit(self, filename):
        # Open new file in the editor specified in the yaml config file
        editor = self.config.editor
        source_path = self.new(filename)
        process = "%s %s" % (editor, source_path)
        return subprocess.call(process.split())

    def init(self):
        # Make sure author is pre-loaded in the database
        data = dict(username=self.config.username, name=self.config.name)
        author = self.graph.people.get_or_create("username", self.config.username, data)

    def build(self):
        # Create HTML fragments
        self.writer.run()

    def update(self):
        # Update blog entries
        self.loader.update_entries()

    # Execute one of the above methods

    def _execute(self, command_name, command_args):
        command = getattr(self, command_name)
        return command(*command_args)

    # Private methods

    def _create_file(self, source_abspath, content):
        self._make_dir(source_abspath)
        with open(source_abspath, "w") as fout:
            fout.writelines(content)
                                     
    def _build_initial_source(self, filename):
        # generat the source from template
        template_path = self.path.get_rst_template_path()
        template = get_template(template_path)
        params = self._get_params(filename)
        source = template.substitute(params)
        return source

    def _get_params(self, filename):
        # Get template params
        docid = uuid.uuid4().hex
        date = datetime.datetime.now().strftime("%Y-%m-%d")
        username = self.config.username or getpass.getuser()
        title = self._get_title(filename)
        title_line = "=" * len(title)
        params = dict(title=title, title_line=title_line, docid=docid, author=username, date=date)
        return params

    def _get_title(self, filename):
        stub = os.path.splitext(filename)[0]
        word_list = stub.split(self.config.separator)
        words = " ".join(word_list)
        title = titlecase(words)
        return title
        
    def _write_file(self, file_path, content):
        with open(file_path, "w") as fout:
            fout.write(content.encode('utf-8') + '\n')

    def _make_dir(self, path):
        # mkpath
        dirname = os.path.dirname(path)
        if not os.path.isdir(dirname):
            print "Creating dir:   %s" % dirname
            os.makedirs(dirname)
Ejemplo n.º 22
0
 def initialize(self, config):
     self.config = config
     self.path = Path(config)
     assert self.db_abspath == self.path.get_changelog_abspath()
Ejemplo n.º 23
0
class ChangeLog(PickleDB):

    db_name = "changelog"

    def initialize(self, config):
        self.config = config
        self.path = Path(config)
        assert self.db_abspath == self.path.get_changelog_abspath()

    def update(self):
        # File exists so go ahead and read/write to the changlog
        if self.exists() is False:
            print "CHANGELOG NOT FOUND: Will add/update all entries in database on push."
            # Remove the old changelog from git so it doesn't persist on the server
            self._remove_changelog()
            return 
        
        diff = self._get_diff()   
        if not diff:
            return

        self._write_diff(diff)
        self._display()
        
        return self.data

    def _display(self):
        print "CHANGELOG"
        for filename in self.data:
            status, timestamp = self.data[filename]
            print timestamp, status, filename 
        print
        
    def _write_diff(self, diff):
        source_dir = self.path.get_source_dir()
        start = self.path.get_working_dir()
        source_folder = os.path.relpath(source_dir, start)

        for status, filename in self._split_diff(diff):
            # filter out files that don't include the source_dir
            if re.search(source_folder, filename) and filename.endswith(self.config.source_ext):
                # Git diff is NOT sorted by modified time.
                # We need it ordered by time so use timestamp instead
                timestamp = self._current_timestamp()
                # remove it from the dict and add it back so more recent entries are always last
                self.data.pop(filename, None)
                self.data[filename] = (status, timestamp)
        self.write()

        # Add the changelog to git now that it has been updated.
        self._add_changelog()

        return self.data

    def _current_timestamp(self):
        return int(time.time())

    def _split_diff(self, diff):
        lines = [line.split('\t') for line in diff.strip().split('\n')]
        return lines

    def _get_diff(self):
        # git diff is NOT sorted by modified time
        #command = "git diff --cached --name-only"
        git_dir = self.path.get_git_dir()
        working_dir = self.path.get_working_dir()
        command = "git  diff --cached --name-status"
        return self._execute(command)

    def _add_changelog(self):
        # Add the changelog to git after it has been updated.
        command = "git add %s" % self.path.get_changelog_abspath()
        self._execute(command)

    def _remove_changelog(self):
        # Doing this so the old changelog doesn't persist on the server
        command = "git rm %s" % self.path.get_changelog_abspath()
        print self._execute(command)
        
    def _execute(self, command):
        # Setting Git env vars to ensure proper paths when running outside of working dir
        os.putenv("GIT_DIR", self.path.get_git_dir())
        os.putenv("GIT_WORK_TREE", self.path.get_working_dir()) 
        return execute(command)
Ejemplo n.º 24
0
def test_path_validate(tmp_path):
    from config import Path

    not_existing_path = tmp_path / 'nope'
    existing_dir = tmp_path

    existing_file = tmp_path / 'yes'
    with existing_file.open('w'):
        pass

    item = Path()
    assert item.validate(None) is None
    assert item.validate(not_existing_path) == not_existing_path
    assert item.validate(existing_dir) == existing_dir
    assert item.validate(existing_file) == existing_file

    item = Path(dir_okay=False)
    assert item.validate(None) is None
    assert item.validate(not_existing_path) == not_existing_path
    assert item.validate(existing_file) == existing_file
    with pytest.raises(ConfigError):
        item.validate(existing_dir)


    item = Path(file_okay=False)
    assert item.validate(None) is None
    assert item.validate(not_existing_path) == not_existing_path
    assert item.validate(existing_dir) == existing_dir
    with pytest.raises(ConfigError):
        item.validate(existing_file)

    item = Path(exists=False)
    assert item.validate(None) is None
    assert item.validate(not_existing_path) == not_existing_path
    with pytest.raises(ConfigError):
        item.validate(existing_file)
    with pytest.raises(ConfigError):
        item.validate(existing_dir)


    item = Path(allow_none=False)
    with pytest.raises(ConfigError):
        item.validate(None)
Ejemplo n.º 25
0
# Configuration Variables
# ------------------------
GENERATE_DATASET = True
SORT_BY_AGE = False
TESTING = True
TEST_COUNT = 8  # Should be >= 4
# ------------------------

if not TESTING:
    FEATURES_DATASET = FEATURES_DATASET
else:
    FEATURES_DATASET = FEATURES_DATASET + "_TEST"

new_sensor_paths = [
    Path(f"{DATASETS}/{FEATURES_DATASET}/{sensor}") for sensor in sensors
]

if not os.path.exists(DATASETS):
    print(
        f'\nWARNING: The path does not exist. Creating new directory...\n{DATASETS}\n'
    )
    os.mkdir(DATASETS)


def create_dataset_folder_structure():
    """
    Creates the folder structure for the new dataset_operations.

    """
Ejemplo n.º 26
0
 def __init__(self, config):
     self.config = config
     self.path = Path(config)
Ejemplo n.º 27
0
    def __init__(self, args):
        self.args = args

        self.half = args.half

        self.prev_pred = 0.0
        self.bad_count = 0

        # Define Saver
        self.saver = Saver(args)
        self.saver.save_experiment_config()
        # Define Tensorboard Summary
        self.summary = TensorboardSummary(self.saver.experiment_dir)
        self.writer = self.summary.create_summary()

        # Define Dataloader
        kwargs = {'num_workers': args.workers, 'pin_memory': True}
        self.train_loader, self.val_loader, self.test_loader, self.nclass = make_data_loader(
            args, **kwargs)

        # Define network
        model = DeepLab(num_classes=self.nclass,
                        backbone=args.backbone,
                        output_stride=args.out_stride,
                        sync_bn=args.sync_bn,
                        freeze_bn=args.freeze_bn)

        #train_params = [{'params': model.get_1x_lr_params(), 'lr': args.lr}, {'params': model.get_10x_lr_params(), 'lr': args.lr*10}]
        #optimizer = torch.optim.SGD(train_params, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov)

        optimizer = torch.optim.SGD(params=model.parameters(),
                                    lr=args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay,
                                    nesterov=args.nesterov)
        #optimizer = torch.optim.Adam(params=model.parameters(), lr=args.lr)

        # Define Criterion
        # whether to use class balanced weights
        if args.use_balanced_weights:
            classes_weights_path = os.path.join(
                Path.db_root_dir(args.dataset),
                args.dataset + '_classes_weights.npy')
            if os.path.isfile(classes_weights_path):
                weight = np.load(classes_weights_path)
            else:
                weight = calculate_weigths_labels(args.dataset,
                                                  self.train_loader,
                                                  self.nclass)
            weight = torch.from_numpy(weight.astype(np.float32))
        else:
            weight = None
        self.criterion = SegmentationLosses(
            weight=weight, cuda=args.cuda).build_loss(mode=args.loss_type)
        self.model, self.optimizer = model, optimizer

        # Define lr scheduler
        self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs,
                                      len(self.train_loader))
        # Using cuda
        if args.cuda:
            self.model = torch.nn.DataParallel(self.model,
                                               device_ids=self.args.gpu_ids)
            patch_replication_callback(self.model)
            self.model = self.model.cuda()

        # Resuming checkpoint
        self.best_pred = 0.0
        if args.resume is not None:
            if not os.path.isfile(args.resume):
                raise RuntimeError("=> no checkpoint found at '{}'".format(
                    args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            if args.cuda:
                self.model.module.load_state_dict(checkpoint['state_dict'])
            else:
                self.model.load_state_dict(checkpoint['state_dict'])
            if not args.ft:
                self.optimizer.load_state_dict(checkpoint['optimizer'])
            self.best_pred = checkpoint['best_pred']
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))

        # Clear start epoch if fine-tuning
        if args.ft:
            args.start_epoch = 0
Ejemplo n.º 28
0
 def __init__(self):
     with open(Path.template_report()) as file:
         self.soup = BeautifulSoup(file, features="html.parser")
Ejemplo n.º 29
0
class Loader(object):
    """Load blog entries into Neo4j."""

    def __init__(self, config, graph):
        #self.changelog = ChangeLog(config)
        self.path = Path(config)

        changelog_dir = self.path.get_working_etc()
        self.changelog = ChangeLog(changelog_dir)
        self.changelog.initialize(config)

        self.parser = Parser(config)
        self.graph = graph

    def changelog_exists(self):
        return self.changelog.exists()

    def update_entries(self):
        if self.changelog_exists():
            print "UPDATING CHANGED"
            self.update_changed_entries()
        else:
            print "UPDATING ALL"
            self.update_all_entries()

    def update_all_entries(self):
        for source_abspath in self.parser.get_all_files():
            self.update_entry(source_abspath)

    def update_changed_entries(self):
        update_count = 0

        data = self.changelog.data

        if data is None:
            return update_count

        last_updated = self.get_last_updated()

        # Data is an OrderedDict, most recent changes last
        for source_path in reversed(data):
            status, timestamp = data[source_path]
            if self.old_timestamp(timestamp, last_updated):
                break
            source_abspath = self.path.get_source_abspath(source_path)
            update_count += self.update_entry(source_abspath)

        return update_count

    def old_timestamp(self, timestamp, last_updated):
        # Timestamps with a time before the last_updated time 
        # were updated during the previous push
        return (timestamp <= last_updated)
        
    def update_entry(self, source_abspath):
        data = self.parser.get_data(source_abspath)
        fragment_abspath = self.path.get_fragment_abspath(source_abspath)
        if os.path.exists(fragment_abspath) is False:
            print "WARNING: Fragment Not Found", fragment_abspath
            return False
        # TODO: remove entry if fragment doesn't exist
        entry = self.graph.entries.save(data)
        return True

    def set_last_updated(self, last_updated):
        # Metadata methods are Neo4j-only right now
        self.graph.set_metadata("entries:last_updated", last_updated)

    def get_last_updated(self):
        # Metadata methods are Neo4j-only right now
        result = self.graph.get_metadata("entries:last_updated")
        last_updated = result.raw
        return last_updated
Ejemplo n.º 30
0
 def __init__(self, config):
     #self.config = config
     self.path = Path(config)
     self.parser = Parser(config)
Ejemplo n.º 31
0
 def _get_scatter_trace_values(pid, collumn):
     raw_csv = CSV.get_csv_values(Path.pid(pid), collumn)
     y = raw_csv[1::]
     x = [i for i in range(0, len(y))]
     title = raw_csv[0]
     return [title, x, y]
Ejemplo n.º 32
0
# Performance metric to optimize the model for
SCORING = 'f1_weighted'
# Set to True if TESTING with the Python CONSOLE
TESTING = False
# If True, the dataset_operations is normalized before training & testing
DATA_NORMALIZATION = True
# If True, a selected portion of the entire dataset_operations is used for training+testing (# of rows = row_count)
DATA_REDUCE = False
# If True, generate a .csv file for the feature ranking
GEN_RANKING_FILE = False
# If True, a plot will be generated for the # of features used vs performance metric
PLOT = False
# If True, trained model is exported to TRAINED_MODEL_PATH
EXPORT_MODEL = False

# Paths
# Directory name for new data set which contains the training/testing data for the classifier
PROCESSED_DATASET = "Processed_Dataset"
# Directory path for new data set which contains the training/testing data for the classifier
PROCESSED_DATASET_PATH = Path(f'{DATASETS}/{PROCESSED_DATASET}')
# loading in the actual dataset for the ML classifier
DATA_PATH = Path(f"{PROCESSED_DATASET_PATH}/ds_all.csv")
# Trained Model directory name
TRAINED_MODEL_DIR = 'Trained Models'
# Trained Model directory path
TRAINED_MODEL_PATH = Path(f'{ROOT}/{TRAINED_MODEL_DIR}')
# Trained Model name
TRAINED_MODEL_NAME = 'step_detection_model_test.pkl'
# Trained Normalizer name
TRAINED_NORMALIZER_NAME = 'step_detection_min_max_norm_test.pkl'
Ejemplo n.º 33
0
 class Foo(Configurable):
     path = Path(allow_none=False)
Ejemplo n.º 34
0
class Parser(object):
    """Parse ReStructuredText source files."""

    def __init__(self, config):
        self.config = config
        self.path = Path(config)
        #self.source_dir = "%s/%s" % (config.project_dir, config.source_folder)

    def get_fragment(self, source_abspath):
        source = self.get_document_source(source_abspath)
        parts = self.get_document_parts(source) 
        return parts['fragment']

    def get_data(self, source_abspath):
        source = self.get_document_source(source_abspath)
        parts = self.get_document_parts(source) 

        data = dict()
        data['title'] = parts['title']
        data['subtitle'] = parts['subtitle']
        data['fragment'] = parts['fragment']
        
        # Extra metadata: docid, author, date, tags
        meta = self._get_metadata(source, source_abspath) 
        data.update(meta)  

        # Derived parts: slug, fragment_path, source_path 
        slug = self.get_slug(source_abspath)
        data['slug'] = slug
        data['fragment_path'] = self.path.get_fragment_path(source_abspath)
        data['source_path'] = self.path.get_source_path(source_abspath)

        return data

    def get_document_source(self, source_abspath):
        def_source = self.get_substitution_definitions()
        doc_source = self.read_source_file(source_abspath)
        source = "\n".join([def_source, doc_source])
        return source
                
    def get_document_parts(self, source):
        # http://docutils.sourceforge.net/docs/api/publisher.html#publish-parts-details
        writer_name = self.config.writer_name
        settings = dict(initial_header_level=2) # do we need this?
        options = dict(source=source, writer_name=writer_name, settings_overrides=settings)
        parts = docutils.core.publish_parts(**options)
        return parts
    
    def get_substitution_definitions(self):
        # Standard substitution definitions
        # http://docutils.sourceforge.net/docs/ref/rst/definitions.html
        module_abspath = os.path.abspath(__file__)
        module_dir = os.path.dirname(module_abspath)
        source = self.read_source_file("%s/etc/substitutions.rst" % module_dir)
        return source

    def read_source_file(self, file_path):
        fin = open(file_path, "r")
        source = fin.read().decode('utf-8')
        return source       

    def get_slug(self, source_abspath):
        start = self.path.get_source_dir()
        #relative_path = file_name.rpartition(source_dir)[-1].lstrip("/") 
        relative_path = os.path.relpath(source_abspath, start)
        slug = os.path.splitext(relative_path)[0]
        return slug

    def _get_metadata(self, source, source_abspath):
        doctree = docutils.core.publish_doctree(source)
        docinfo = doctree.traverse(docutils.nodes.docinfo)
        try:
            meta = self._process_standard_fields(docinfo)
            meta = self._process_custom_fields(meta)
        except IndexError:
            print "ERROR: Source file is missing data: %s" % source_abspath
            raise
        for key, value in meta.items():
            meta[key] = value.astext()
        return meta

    def _process_standard_fields(self,docinfo):
        # Standard fields: date, author, etc.
        meta = {}
        for node in docinfo[0].children:
            key = node.tagname.lower()
            value = node.children[0]
            meta[key] = value
        return meta

    def _process_custom_fields(self, meta):
        # http://repo.or.cz/w/wrigit.git/blob/f045e5e7766e767c0b56bcb7a1ba0582a6f4f176:/rst.py
        field = meta['field']
        meta['tags'] = field.parent.children[1]
        meta['docid'] = field.parent.parent.children[0].children[1]
        del meta['field']
        return meta
        
    def get_all_files(self):
        source_dir = self.path.get_source_dir()
        for root, dirs, files in os.walk(source_dir):
            for filename in files:
                # Ignore pattern: emacs autosave files. TODO: generalize this
                if fnmatch(filename, "*.rst") and not fnmatch(filename, "*.#*"):
                    source_abspath = os.path.join(root, filename)
                    yield source_abspath