def main():
    plist = ProjectList.from_path('projects.yaml')
    print([p for p in plist.project_list if not p.desc])
    topic_map = plist.get_projects_by_type('topic')
    topic_toc_text = format_tag_toc(topic_map)
    projects_by_topic = format_all_categories(topic_map)

    plat_map = plist.get_projects_by_type('platform')
    plat_toc_text = format_tag_toc(plat_map)
    projects_by_plat = format_all_categories(plat_map)

    context = {
        'TOPIC_TOC': topic_toc_text,
        'TOPIC_TEXT': projects_by_topic,
        'PLATFORM_TOC': plat_toc_text,
        'PLATFORM_TEXT': projects_by_plat,
        'TOTAL_COUNT': len(plist.project_list)
    }

    for filename in iter_find_files(TEMPLATES_PATH, '*.tmpl.md'):
        tmpl_text = open(filename).read()
        target_filename = os.path.split(filename)[1].replace('.tmpl', '')
        output_text = tmpl_text.format(**context)
        with atomic_save(target_filename) as f:
            f.write(output_text.encode('utf8'))

    return
def render(plist, pdir):
    "generate the list markdown from the yaml listing"
    topic_map = plist.get_projects_by_type('topic')
    topic_toc_text = format_tag_toc(topic_map)
    projects_by_topic = format_all_categories(topic_map)

    plat_map = plist.get_projects_by_type('platform')
    plat_toc_text = format_tag_toc(plat_map)
    projects_by_plat = format_all_categories(plat_map)

    context = {
        'TOPIC_TOC': topic_toc_text,
        'TOPIC_TEXT': projects_by_topic,
        'PLATFORM_TOC': plat_toc_text,
        'PLATFORM_TEXT': projects_by_plat,
        'TOTAL_COUNT': len(plist.project_list)
    }

    templates_path = pdir + '/templates/'
    if not os.path.isdir(templates_path):
        raise APACLIError('expected "templates" directory at %r' %
                          templates_path)

    for filename in iter_find_files(templates_path, '*.tmpl.md'):
        tmpl_text = open(filename).read()
        target_filename = os.path.split(filename)[1].replace('.tmpl', '')
        output_text = tmpl_text.format(**context)
        with atomic_save(pdir + '/' + target_filename) as f:
            f.write(output_text.encode('utf8'))

    return
Beispiel #3
0
    def make_pairs_dataset(self, path, n_hidden_messages, n_pairs):
        pairs = []
        files_by_speaker = defaultdict(list)
        unfiltered_wav_files = list(fileutils.iter_find_files(path, "*.wav"))
        wav_files = []
        for wav in unfiltered_wav_files:
            # filter out short files
            try:
                if soundfile.read(wav)[0].shape[0] > 3 * 8000:
                    wav_files.append(wav)
            except:
                pass

        for wav in wav_files:
            speaker = int(wav.split('/')[-3])
            files_by_speaker[speaker].append(wav)

        for i in range(n_pairs):
            speaker = random.sample(files_by_speaker.keys(), 1)[0]
            sampled_files = random.sample(files_by_speaker[speaker],
                                          1 + n_hidden_messages)
            carrier_file, hidden_message_files = sampled_files[
                0], sampled_files[1:]
            pairs.append((carrier_file, hidden_message_files))

        return pairs
Beispiel #4
0
def _get_all_metric_mods(check_reqs=True):
    ret = []
    for metric_path in iter_find_files(METRICS_PATH,
                                       '*.py',
                                       ignored='__init__.py'):
        mod_name = os.path.splitext(os.path.split(metric_path)[-1])[0]
        metric_mod = imp.load_source(mod_name, metric_path)
        if not callable(getattr(metric_mod, 'collect', None)):
            print_err('skipping non-metric module at %r' % metric_path)
            continue
        if not check_reqs:
            ret.append(metric_mod)
            continue
        missing_env_vars = _check_required_env_vars(metric_mod)
        missing_cmds = _check_required_cmds(metric_mod)
        if missing_cmds:
            print_err(
                'omitting metric "%s" due to missing commands: %s (see installation instructions above)'
                % (metric_mod.__name__, ', '.join(missing_cmds)))
        elif missing_env_vars:
            print_err('omitting metric "%s" due to missing ENV variables: %s' %
                      (metric_mod.__name__, ', '.join(missing_env_vars)))
        else:
            ret.append(metric_mod)
    return ret
Beispiel #5
0
def _iter_changed_files(entries_path, theme_path, config_path, interval=0.5):
    mtimes = {}
    while True:
        changed = []
        to_check = itertools.chain([config_path],
                                   iter_find_files(entries_path, ENTRY_PATS),
                                   iter_find_files(theme_path, '*'))
        for path in to_check:
            try:
                new_mtime = os.stat(path).st_mtime
            except OSError:
                continue
            old_mtime = mtimes.get(path)
            if not old_mtime or new_mtime > old_mtime:
                mtimes[path] = new_mtime
                changed.append(path)
        if changed:
            yield changed
        time.sleep(interval)
Beispiel #6
0
    def make_pairs_dataset(self, path, n_hidden_messages, n_pairs):
        pairs = []
        wav_files = list(fileutils.iter_find_files(path, "*.wav"))

        for i in range(n_pairs):
            sampled_files = random.sample(wav_files, 1 + n_hidden_messages)
            carrier_file, hidden_message_files = sampled_files[
                0], sampled_files[1:]
            pairs.append((carrier_file, hidden_message_files))
        return pairs
Beispiel #7
0
def _iter_changed_files(entries_path, theme_path, config_path, interval=0.5):
    mtimes = {}
    while True:
        changed = []
        to_check = itertools.chain([config_path],
                                   iter_find_files(entries_path, ENTRY_PATS),
                                   iter_find_files(theme_path, '*'))
        for path in to_check:
            try:
                new_mtime = os.stat(path).st_mtime
            except OSError:
                continue
            old_mtime = mtimes.get(path)
            if not old_mtime or new_mtime > old_mtime:
                mtimes[path] = new_mtime
                changed.append(path)
        if changed:
            yield changed
        time.sleep(interval)
Beispiel #8
0
    def load(self):
        self.last_load = time.time()
        self._load_custom_mod()
        self._call_custom_hook('pre_load')
        self.html_renderer = AshesEnv(paths=[self.theme_path])
        self.html_renderer.load_all()
        self.md_renderer = AshesEnv(paths=[self.theme_path],
                                    exts=['md'],
                                    keep_whitespace=False)
        self.md_renderer.autoescape_filter = ''
        self.md_renderer.load_all()

        entries_path = self.paths['entries_path']
        entry_paths = []
        for entry_path in iter_find_files(entries_path, ENTRY_PATS):
            entry_paths.append(entry_path)
        entry_paths.sort()
        for ep in entry_paths:
            with chlog.info('entry load') as rec:
                try:
                    entry = self._entry_type.from_path(ep)
                    rec['entry_title'] = entry.title
                    rec['entry_length'] = round(entry.get_reading_time(), 1)
                except IOError:
                    rec.exception('unopenable entry path: {}', ep)
                    continue
                except:
                    rec['entry_path'] = ep
                    rec.exception(
                        'entry {entry_path} load error: {exc_message}')
                    continue
                else:
                    rec.success('entry loaded:'
                                ' {entry_title} ({entry_length}m)')
            if entry.is_draft:
                self.draft_entries.append(entry)
            elif entry.is_special:
                self.special_entries.append(entry)
            else:
                self.entries.append(entry)

        # Sorting the EntryLists
        self.entries.sort()
        # sorting drafts/special pages doesn't do much
        self.draft_entries.sort(key=lambda e: os.path.getmtime(e.source_path))
        self.special_entries.sort()

        self._rebuild_tag_map()

        for i, entry in enumerate(self.entries, start=1):
            start_next = max(0, i - NEXT_ENTRY_COUNT)
            entry.next_entries = self.entries[start_next:i - 1][::-1]
            entry.prev_entries = self.entries[i:i + PREV_ENTRY_COUNT]

        self._call_custom_hook('post_load')
Beispiel #9
0
    def load(self):
        self.last_load = time.time()
        self._load_custom_mod()
        self._call_custom_hook('pre_load')
        self.html_renderer = AshesEnv(paths=[self.theme_path])
        self.html_renderer.load_all()
        self.md_renderer = AshesEnv(paths=[self.theme_path],
                                    exts=['md'],
                                    keep_whitespace=False)
        self.md_renderer.autoescape_filter = ''
        self.md_renderer.load_all()

        entries_path = self.paths['entries_path']
        entry_paths = []
        for entry_path in iter_find_files(entries_path, ENTRY_PATS):
            entry_paths.append(entry_path)
        entry_paths.sort()
        for ep in entry_paths:
            with chlog.info('entry load') as rec:
                try:
                    entry = self._entry_type.from_path(ep)
                except IOError:
                    rec.exception('unopenable entry path: {}', ep)
                    continue
                except:
                    rec.exception('entry load error: {exc_message}')
                    continue
                else:
                    rec['entry_title'] = entry.title
                    rec['entry_length'] = round(entry.get_reading_time(), 1)
                    rec.success('entry loaded:'
                                ' {entry_title} ({entry_length}m)')
            if entry.is_draft:
                self.draft_entries.append(entry)
            elif entry.is_special:
                self.special_entries.append(entry)
            else:
                self.entries.append(entry)

        # Sorting the EntryLists
        self.entries.sort()
        # sorting drafts/special pages doesn't do much
        self.draft_entries.sort(key=lambda e: os.path.getmtime(e.source_path))
        self.special_entries.sort()

        self._rebuild_tag_map()

        for i, entry in enumerate(self.entries, start=1):
            start_next = max(0, i - NEXT_ENTRY_COUNT)
            entry.next_entries = self.entries[start_next:i - 1][::-1]
            entry.prev_entries = self.entries[i:i + PREV_ENTRY_COUNT]

        self._call_custom_hook('post_load')
Beispiel #10
0
def inject_noise_folder(wav_folder, noise_levels, n_items):
    if type(noise_levels) == float:
        noise_levels = [noise_levels]
    trg_dir = join(wav_folder, 'out')
    os.makedirs(trg_dir, exist_ok=True)
    wavs = list(fileutils.iter_find_files(wav_folder, "*.wav"))
    for noise_level in noise_levels:
        for i in range(n_items):
            w1, w2 = random.sample(wavs, 2)
            inject_noise_sample(w1, w2,
                                join(trg_dir, f"{i}_{noise_level}_noise.wav"),
                                noise_level)
Beispiel #11
0
    def _make_dataset(self):
        files = []
        wavs = list(iter_find_files(self.wav_path, "*.wav"))
        if self.hparams.devrun:
            wavs = wavs[:self.hparams.devrun_size]

        for wav in tqdm(wavs, desc="loading data into memory"):
            res = self.process_file(wav)
            if res is not None:
                files.append(res)

        return files
Beispiel #12
0
 def getAllImagesFromFolder(self):
     """
     This method will return all PNG/JPG/JPEG Images in Folder
         ./resources/exampleImages/
     :return: List of filepath-strings
     """
     filePath = propertyHolder.imageFolderDir
     fileGenerator = fileutils.iter_find_files(
         filePath, patterns=['*.png', '*jpg', '*jpeg'])
     fileList = []
     for file in fileGenerator:
         fileList.append(file)
     return fileList
Beispiel #13
0
    def generate_stubs(self, path: Path) -> List[Tuple[Path, Path]]:
        """Generate Stub Files from a package.

        Args:
            path (Path): Path to package.

        Returns:
            List[Tuple[Path, Path]]: List of tuples containing
                 a path to the original file and stub, respectively.

        """
        py_files = fileutils.iter_find_files(str(path), patterns="*.py", ignored=self._ignore_stubs)
        stubs = [utils.generate_stub(f) for f in py_files]
        return stubs
Beispiel #14
0
def main(args):
    try:
        parser = argparse.ArgumentParser(description='copy all wav files from all sub dirs to out_dir')
        parser.add_argument('--input_dir', type=str, help='Path to TextGrid dir',required=True)
        parser.add_argument('--output_dir', type=str, help='Path to output dir',required=True)
        args = parser.parse_args(args)


        assert os.path.exists(args.input_dir),f"Invalid Path, couldn't find [{args.input_dir}]"
        assert os.path.exists(args.output_dir),f"Invalid Path, couldn't find [{args.output_dir}]"

        wav_files = list(fileutils.iter_find_files(args.input_dir, "*.wav"))+list(fileutils.iter_find_files(args.input_dir, "*.WAV"))

        counter=0
        files_dict={}
        for file in wav_files:
            files_dict[counter] = file
            if os.path.exists(os.path.join(args.output_dir,f"{counter}.wav")):
                os.remove(os.path.join(args.output_dir,f"{counter}.wav"))
            copyfile(file,os.path.join(args.output_dir,f"{counter}.wav"))


            counter+=1

        print(f"Finished to copy '*.wav' files to {args.output_dir}")
        with open(os.path.join(args.output_dir,files_dict_fname),'w') as f:
            f.write(f"input_dir : {args.input_dir}\n")
            f.write(f"output_dir : {args.output_dir}\n")
            for k,v in files_dict.items():
                f.write(f"{k}:{v}\n")
        print(f"Finished to write the files dictionary to {os.path.join(args.output_dir,files_dict_fname)}")


    except Exception as e:
        print(f"Failed to process the data, error {e}")
        exit(1) #FAIL
Beispiel #15
0
def show_recent_metrics(metrics_dir):
    "shows the most recent metrics collection"
    metrics_files = sorted(iter_find_files(metrics_dir, '*.jsonl'),
                           reverse=True)
    if not metrics_files:
        print_err('no recent metrics found at %s' % metrics_dir)
        return
    metrics_file = metrics_files[0]
    print('#  ' + os.path.basename(metrics_file) + '\n')
    for line in open(metrics_file):
        try:
            print(line, end='')
        except IOError:
            break
    return
Beispiel #16
0
    def from_timestamp(cls, campaign, timestamp, full=True):
        strf_tmpl = STATE_FULL_PATH_TMPL if full else STATE_PATH_TMPL

        # this handles when a date object is passed in for timestamp
        # (instead of a datetime)
        strf_tmpl = strf_tmpl.replace('000000', '*')

        start_pattern = timestamp.strftime(strf_tmpl)
        dir_path = campaign.base_path + os.path.split(start_pattern)[0]
        file_paths = sorted(
            iter_find_files(dir_path,
                            os.path.split(start_pattern)[1]))
        try:
            first_path = file_paths[0]
        except IndexError:
            raise StateNotFound(
                'no state found for campaign %r at timestamp %s' %
                (campaign, timestamp))

        return cls.from_json_path(campaign, first_path, full=full)
Beispiel #17
0
def get_state_filepaths(data_dir, full=True):
    pattern = STATE_FULL_FN_GLOB if full else STATE_FN_GLOB
    return sorted(iter_find_files(data_dir, pattern))
Beispiel #18
0
def render(plist, pdir, pfile):
    "generate the list markdown from the yaml listing"
    normalize(pfile=pfile, plist=plist)
    topic_map = plist.get_projects_by_type('topic')
    topic_toc_text = format_tag_toc(topic_map)
    projects_by_topic = format_all_categories(topic_map)

    plat_map = plist.get_projects_by_type('platform')
    plat_toc_text = format_tag_toc(plat_map)
    projects_by_plat = format_all_categories(plat_map)

    context = {
        'TOPIC_TOC': topic_toc_text,
        'TOPIC_TEXT': projects_by_topic,
        'PLATFORM_TOC': plat_toc_text,
        'PLATFORM_TEXT': projects_by_plat,
        'TOTAL_COUNT': len(plist.project_list)
    }

    templates_path = pdir + '/templates/'
    if not os.path.isdir(templates_path):
        raise APACLIError('expected "templates" directory at %r' %
                          templates_path)

    for filename in iter_find_files(templates_path, '*.tmpl.md'):
        tmpl_text = open(filename).read()
        target_filename = os.path.split(filename)[1].replace('.tmpl', '')
        output_text = tmpl_text.format(**context)
        with atomic_save(pdir + '/' + target_filename) as f:
            f.write(output_text.encode('utf8'))

    feed_tmpl_path = templates_path + '/atom.xml'
    if os.path.exists(feed_tmpl_path):

        def _stderr_log_func(level, name, message):
            import sys
            sys.stderr.write('%s - %s - %s\n' % (level.upper(), name, message))
            sys.stderr.flush()

        ashes_env = AshesEnv([templates_path], log_func=_stderr_log_func)
        proj_dict_list = []
        for proj in plist.project_list:
            cur = proj.to_dict()
            cur['name_slug'] = proj.name_slug
            cur['date_added_utc'] = proj.date_added.isoformat() + 'Z'
            cur['urls'] = get_url_list(proj)
            proj_dict_list.append(cur)
        cur_dt = datetime.datetime.utcnow().replace(
            microsecond=0).isoformat() + 'Z'
        res = ashes_env.render(
            'atom.xml', {
                'projects':
                sorted(proj_dict_list,
                       key=lambda x: x['date_added'],
                       reverse=True),
                'last_generated_utc':
                cur_dt
            })
        with atomic_save(pdir + '/atom.xml') as f:
            f.write(res.encode('utf8'))

    return
Beispiel #19
0
def export_metrics(plist,
                   earliest,
                   metrics_dir,
                   metrics=None,
                   output_path=None,
                   output_format=None,
                   _show_exportable=False):
    "export a csv with metrics collated from previous collect-metrics runs"
    metric_mods = all_metric_mods = _get_all_metric_mods(check_reqs=False)
    if metrics:
        metric_mods = [m for m in metric_mods if m.__name__ in metrics]
    if not metric_mods:
        print_err(
            'failed to collect data. no known metrics selected (available: %s)'
            % ', '.join([m.__name__ for m in all_metric_mods]))
        return

    metrics_map = {(m.__name__, p.name_slug): None
                   for m in all_metric_mods for p in plist.project_list}

    metrics_files = iter_find_files(metrics_dir, '*.jsonl')
    earliest_text = earliest.isoformat()
    files_to_search = []
    for metric_file in metrics_files:
        metric_base_fn = os.path.basename(os.path.splitext(metric_file)[0])
        _, run_dt_text, newest_dt_text, oldest_dt_text = metric_base_fn.split(
            '__')
        if newest_dt_text < earliest_text:
            print('skipping', metric_file)
            continue
        files_to_search.append(metric_file)

        with open(metric_file) as f:
            # TODO: possible optimization when searching for a
            # specific project/metric. search for the project name
            # slug and metric name in the part of the line before the
            # result begins (the jsonl keys are carefully chosen to
            # sort nicely)
            for line_data in JSONLIterator(f):
                metric_name, proj_slug = line_data['metric_name'], line_data[
                    'project']
                try:
                    cur_data = metrics_map[metric_name, proj_slug]
                except KeyError:
                    # not a tracked project/metric
                    continue
                if cur_data is None or cur_data['pull_date'] < line_data[
                        'pull_date']:
                    metrics_map[metric_name, proj_slug] = line_data

    possible_paths = IndexedSet()
    for (metric_name, proj_slug), data in metrics_map.items():
        if data is None:
            continue

        def _visit(path, key, value):
            if not isinstance(value, (list, dict)):
                possible_paths.add((metric_name, ) + path + (key, ))
            return True

        remap(data['result'], visit=_visit)

    # TODO: deal with missing metrics
    # TODO: output csv or something
    ''' --cols 'license.total,evcs.*, sloc.TOTAL_*  --cols-file

    if col.endswith('*'):
       pop the segment with the star, fetch up until that point, then fetch/flatten everything underneath
    '''
    possible_paths = sorted(possible_paths)
    path_texts = ['.'.join('%s' % s for s in path) for path in possible_paths]

    from pprint import pprint
    if _show_exportable:
        print('\n'.join(path_texts))
        print('Showing %s exportable columns.' % len(possible_paths))
        return

    # for each project, output project_name, ...cols..., pull_date
    cols = path_texts
    all_proj_dicts = []
    for project in plist.project_list:
        cur_proj_dict = {'name': project.name_slug}
        for col in cols:
            metric_mod_name, glom_path = col.split('.', 1)
            cur_result_dict = (metrics_map[metric_mod_name, project.name_slug]
                               or {
                                   'result': {}
                               })['result']
            cur_proj_dict[col] = glom.glom(cur_result_dict,
                                           glom_path,
                                           default='')
            cur_proj_dict[col] = cur_proj_dict[col] if cur_proj_dict[
                col] is not None else ''
        all_proj_dicts.append(cur_proj_dict)

    all_cols = [
        'name'
    ] + cols  # TODO: + ['pull_date'] (oldest of all the collated metrics or?

    with open('apatite_export.csv', 'w') as f:
        w = csv.DictWriter(f, all_cols)
        w.writeheader()
        for proj_dict in all_proj_dicts:
            w.writerow(proj_dict)

    print('exported %s columns for %s projects across %s metrics (%s)' %
          (len(all_cols), len(all_proj_dicts), len(metric_mods), ', '.join(
              sorted(m.__name__ for m in metric_mods))))

    return
Beispiel #20
0
 def __init__(self, path):
     self.path = path
     self.data = list(iter_find_files(self.path, "*.wav"))
     super(WavPhnDataset, self).__init__()