class DirectoryLocator(Locator): def __init__(self, path, recurse=False): self.path = Path(path) self.recurse = recurse self._filemap = None @property def filemap(self): """dist -> ver -> path mapping""" if self._filemap is None: self._filemap = defaultdict(lambda: defaultdict(list)) if self.recurse: files = self.path.rglob("*") else: files = self.path.glob("*") for f in files: filetype, dist, ver = parse_filename(f.name) self._filemap[dist][ver].append(str(f.resolve())) return self._filemap def distributions(self): return sorted(self.filemap.keys()) def versions(self, distribution): return sorted(self.filemap[distribution.lower().replace('-','_')].keys()) def get(self, distribution, version): # Reformat the data... return self.filemap[distribution.lower().replace('-','_')][version]
def main(): tables = {} if len(sys.argv) >= 1: repos = [Path(d) for d in sys.argv[1:]] else: print("No arguments given, looking in repos/ folder") p = Path("./repos") # Look one and two levels deep # Second level might only really be necessary, but meh repos = list(p.parent for p in p.glob("./*/.git")) repos += list(p.parent for p in p.glob("./*/*/.git") if p.parent not in repos) print("Found repos: {}".format([str(r) for r in repos])) for path in repos: repo_name, rows = generate_from_repo(str(path)) tables[repo_name] = rows tables["total"] = merge_tables(tables) # Sort the tables by commits for key in tables: tables[key] = OrderedDict(sorted(tables[key].items(), key=lambda item: -item[1]['commits'])) for name, rows in tables.items(): print(name) table_print(rows) html = table2html(rows) save_table(name, html) print()
def update(generated_folder, destination_folder, global_conf, local_conf): """Update data from generated to final folder""" wrapper_files_or_dirs = merge_options(global_conf, local_conf, "wrapper_filesOrDirs") or [] delete_files_or_dirs = merge_options(global_conf, local_conf, "delete_filesOrDirs") or [] generated_relative_base_directory = local_conf.get('generated_relative_base_directory') or \ global_conf.get('generated_relative_base_directory') client_generated_path = Path(generated_folder) if generated_relative_base_directory: client_generated_path = next(client_generated_path.glob(generated_relative_base_directory)) for wrapper_file_or_dir in wrapper_files_or_dirs: for file_path in Path(destination_folder).glob(wrapper_file_or_dir): relative_file_path = file_path.relative_to(destination_folder) file_path_dest = client_generated_path.joinpath(str(relative_file_path)) file_path.replace(file_path_dest) for delete_file_or_dir in delete_files_or_dirs: for file_path in client_generated_path.glob(delete_file_or_dir): if file_path.is_file(): file_path.unlink() else: shutil.rmtree(str(file_path)) shutil.rmtree(destination_folder) client_generated_path.replace(destination_folder)
def dmclooper(p): cmosinit = {'firstrawind':p.cmos[0],'lastrawind':p.cmos[1]} params = {'kineticsec':p.kineticsec,'rotccw':p.rotccw,'transpose':p.transpose, 'flipud':p.flipud,'fliplr':p.fliplr,'fire':p.fire,'sensorloc':p.loc} infn = Path(p.infile).expanduser() if infn.is_file(): flist = [infn] elif infn.is_dir(): flist = sorted(infn.glob('*.DMCdata')) + sorted(infn.glob('*.dat')) else: raise ValueError('Not sure what {} is'.format(infn)) N = len(flist) for i,f in enumerate(flist): ofn = dir2fn(p.output,f,'.h5') if ofn.is_file(): print('\nskipping',ofn,f) continue print('\n file {} / {} {:.1f} % done with {}'.format(i, N, i/N*100., flist[0].parent )) rawImgData,rawind,finf = goRead(f, p.pix,p.bin,p.frames,p.ut1, p.kineticsec,p.startutc,cmosinit,p.verbose,ofn,p.headerbytes) #%% convert vid2h5(None, finf['ut1'], rawind, None, ofn, params, argv) #%% optional plot if p.movie: plots(rawImgData,rawind,finf)
def test_run_shell(): p = Path(os.path.dirname(TMP_SHELL_FILE_PREFIX)) if list(p.glob(os.path.basename(TMP_SHELL_FILE_PREFIX) + '*')): os.system('rm %s*' % TMP_SHELL_FILE_PREFIX) with mock.patch('os.unlink'): # run shell as command process = run_shell('echo "aaa"') out = process.stdout.readlines() assert out[0] == b'aaa\n' # run shell as command process = run_shell(''' echo 'aa"a' ''') out = process.stdout.readlines() assert out[0] == b'aa"a\n' assert list(p.glob(os.path.basename(TMP_SHELL_FILE_PREFIX) + '*')) == [] # run shell as file process = run_shell('echo "aaa" %s echo "bbb"' % os.linesep) out = process.stdout.readlines() assert out[0] == b'aaa\n' assert out[1] == b'bbb\n' assert list(p.glob(os.path.basename(TMP_SHELL_FILE_PREFIX) + '*')) != [] os.system('rm %s*' % TMP_SHELL_FILE_PREFIX) # run shell as file process = run_shell('''echo 'aa"a' ''', 'postgres') out = process.stdout.readlines() assert out[0] == b'aa"a\n' assert list(p.glob(os.path.basename(TMP_SHELL_FILE_PREFIX) + '*')) != [] os.system('rm %s*' % TMP_SHELL_FILE_PREFIX)
def travis_build_package(): """Assumed called on Travis, to prepare a package to be deployed This method prints on stdout for Travis. Return is obj to pass to sys.exit() directly """ travis_tag = os.environ.get('TRAVIS_TAG') if not travis_tag: print("TRAVIS_TAG environment variable is not present") return "TRAVIS_TAG environment variable is not present" try: name, version = travis_tag.split("_") except ValueError: print("TRAVIS_TAG is not '<package_name>_<version>' (tag is: {})".format(travis_tag)) return "TRAVIS_TAG is not '<package_name>_<version>' (tag is: {})".format(travis_tag) try: version = Version(version) except InvalidVersion: print("Version must be a valid PEP440 version (version is: {})".format(version)) return "Version must be a valid PEP440 version (version is: {})".format(version) abs_dist_path = Path(os.environ['TRAVIS_BUILD_DIR'], 'dist') create_package(name, str(abs_dist_path)) print("Produced:\n{}".format(list(abs_dist_path.glob('*')))) pattern = "*{}*".format(version) packages = list(abs_dist_path.glob(pattern)) if not packages: return "Package version does not match tag {}, abort".format(version) pypi_server = os.environ.get("PYPI_SERVER", "default PyPI server") print("Package created as expected and will be pushed to {}".format(pypi_server))
def vendor(ctx, vendor_dir, rewrite=True): log('Reinstalling vendored libraries') is_patched = vendor_dir.name == 'patched' requirements_file = vendor_dir.name # We use --no-deps because we want to ensure that all of our dependencies # are added to vendor.txt, this includes all dependencies recursively up # the chain. ctx.run( 'pip install -t {0} -r {0}/{1}.txt --no-compile --no-deps'.format( str(vendor_dir), requirements_file, ) ) remove_all(vendor_dir.glob('*.dist-info')) remove_all(vendor_dir.glob('*.egg-info')) # Cleanup setuptools unneeded parts drop_dir(vendor_dir / 'bin') drop_dir(vendor_dir / 'tests') # Detect the vendored packages/modules vendored_libs = detect_vendored_libs(_get_vendor_dir(ctx)) patched_libs = detect_vendored_libs(_get_patched_dir(ctx)) log("Detected vendored libraries: %s" % ", ".join(vendored_libs)) # Apply pre-patches log("Applying pre-patches...") patch_dir = Path(__file__).parent / 'patches' / vendor_dir.name if is_patched: for patch in patch_dir.glob('*.patch'): if not patch.name.startswith('_post'): apply_patch(ctx, patch) log("Removing scandir library files...") remove_all(vendor_dir.glob('*.so')) # Global import rewrites log('Renaming specified libs...') for item in vendor_dir.iterdir(): if item.is_dir(): if rewrite: log('Rewriting imports for %s...' % item) rewrite_imports(item, vendored_libs, vendor_dir) rename_if_needed(ctx, vendor_dir, item) elif item.name not in FILE_WHITE_LIST: if rewrite: rewrite_file_imports(item, vendored_libs, vendor_dir) write_backport_imports(ctx, vendor_dir) log('Applying post-patches...') patches = patch_dir.glob('*.patch' if not is_patched else '_post*.patch') for patch in patches: apply_patch(ctx, patch) if is_patched: piptools_vendor = vendor_dir / 'piptools' / '_vendored' if piptools_vendor.exists(): drop_dir(piptools_vendor) msgpack = vendor_dir / 'notpip' / '_vendor' / 'msgpack' if msgpack.exists(): remove_all(msgpack.glob('*.so'))
def get_file_list(): training_path = Path(ctx.training_dir) if ctx.save_box_tiff: log.info("=== Saving box/tiff pairs for training data ===") yield from training_path.glob(f"{ctx.lang_code}*.box") yield from training_path.glob(f"{ctx.lang_code}*.tif") log.info("=== Moving lstmf files for training data ===") yield from training_path.glob(f"{ctx.lang_code}.*.lstmf")
def _strip_binary(self): strip_path = Path(self._dest_dir) files_for_strip = (list(strip_path.glob('libwx*.so.*')) + list(strip_path.glob('wx.*so'))) for fname in files_for_strip: print_info(u'Strip {}'.format(fname)) if os.path.exists(str(fname)): local(u'strip -s -o "{fname}" "{fname}"'.format(fname=fname))
def compbin(dir1: Path, dir2: Path, pat: str): dir1 = Path(dir1).expanduser() dir2 = Path(dir2).expanduser() fl1 = dir1.glob(pat) fl2 = dir2.glob(pat) for f, g in zip(fl1, fl2): if not filecmp.cmp(f, g, False): # type: ignore print('difference:', f.name)
def post_install(base_path, app_path): ret_output = [] ret_errors = [] ret = 0 test_cc = check_compiler(base_path) applib = os.path.join(app_path, 'applib') p = Path(applib) fl = p.glob('**/*.pyx') for pos in fl: pyx_filename = p.joinpath(pos).as_posix() if test_cc: c_filename = pyx_filename.replace('.pyx', '.c') (ret_code, output, err) = py_run(['-m', 'cython', pyx_filename]) if ret_code: ret = ret_code if output: for pos in output: ret_output.append(pos) if err: for pos in err: ret_errors.append(pos) if os.path.exists(c_filename): (ret_code, output, err) = compile(base_path, c_filename, pyd=True) if ret_code: ret = ret_code os.unlink(c_filename) if output: for pos in output: ret_output.append(pos) if err: for pos in err: ret_errors.append(pos) else: out_filename = pyx_filename.replace('.pyx', '.py') with open(pyx_filename, "rt") as f_in: with open(out_filename, "wt") as f_out: f_out.write(f_in.read()) if check_compiler(base_path): fl = p.glob('**/*.c') for pos in fl: c_filename = p.joinpath(pos).as_posix() if os.path.exists(c_filename): (ret_code, output, err) = compile(base_path, c_filename, pyd=False) if ret_code: ret = ret_code if output: for pos in output: ret_output.append(pos) if err: for pos in err: ret_errors.append(pos) return (ret, ret_output, ret_errors)
def _build_latexmk(self, cwd, latex_cwd): # These steps are copied from the Makefile generated by Sphinx >= 1.6 # https://github.com/sphinx-doc/sphinx/blob/master/sphinx/texinputs/Makefile_t latex_path = Path(latex_cwd) images = [] for extension in ('png', 'gif', 'jpg', 'jpeg'): images.extend(latex_path.glob(f'*.{extension}')) # FIXME: instead of checking by language here, what we want to check if # ``latex_engine`` is ``platex`` pdfs = [] if self.project.language == 'ja': # Japanese language is the only one that requires this extra # step. I don't know exactly why but most of the documentation that # I read differentiate this language from the others. I suppose # it's because it mix kanji (Chinese) with its own symbols. pdfs = latex_path.glob('*.pdf') for image in itertools.chain(images, pdfs): self.run( 'extractbb', image.name, cwd=latex_cwd, record=False, ) rcfile = 'latexmkrc' if self.project.language == 'ja': rcfile = 'latexmkjarc' self.run( 'cat', rcfile, cwd=latex_cwd, ) cmd = self.run( 'latexmk', '-r', rcfile, # FIXME: check for platex here as well '-pdfdvi' if self.project.language == 'ja' else '-pdf', '-dvi-', '-ps-', f'-jobname={self.project.slug}', warn_only=True, cwd=latex_cwd, ) self.pdf_file_name = f'{self.project.slug}.pdf' return cmd.successful
def task_compile(): working_directory = Path('.') # Path.glob returns an iterator so turn it into a list headers = list(working_directory.glob('*.h')) for source_file in working_directory.glob('*.c'): object_file = source_file.with_suffix('.o') yield { 'name': object_file.name, 'actions': [['cc', '-c', source_file]], 'file_dep': [source_file] + headers, 'targets': [object_file], }
def stateLogIter (tournamentDir, sessionType='sim'): ''' Returns a generator of state logs extracted from a directory of compressed game logs ''' path = Path(tournamentDir) if year != 2016: return (extractLog(name, sessionType, 'state') for name in path.glob('game-*-{}-logs.tar.gz'.format(sessionType))) elif year == 2016: return (extractLog(name, sessionType, 'state') for name in path.glob('game-*-{}.tar.gz'.format(sessionType)))
def index(request): """ Returns a list of available paths :param request: :type request: :return: :rtype: """ p = Path(settings.DATA_ROOT) json_docs = p.glob('*.json') px_docs = p.glob('*.px') return JsonResponse({ 'pxdocs': [get_px(px_doc, sample=True, meta=True) for px_doc in px_docs] })
def scan(pk3dir, basedir): """ Scan a pk3 files in a folder Check for shader conflicts and build a report of texture usages. Args: pk3dir - Path to a directory containing pk3 files to scan basedir - Path to directory of a clean game installation (basewsw) """ logger = logging.getLogger('scan') pk3path = Path(pk3dir) basepath = Path(basedir) if not pk3path.is_dir(): logger.error('{} is not a valid directory'.format(pk3path)) sys.exit(1) if not basepath.is_dir(): logger.error('{} is not a valid directory'.format(basepath)) sys.exit(1) # Build an index of base game files to check against basefiles = set() for pk3file in basepath.glob('*.pk3'): pk3zip = ZipFile(str(pk3file)) for name in pk3zip.namelist(): if name.endswith('/'): continue elif name.endswith('.shader'): basefiles.update(parse_shader(pk3zip.open(name))) else: basefiles.add(name) # Check if pk3s include same files for pk3file in pk3path.glob('*.pk3'): try: pk3zip = ZipFile(str(pk3file)) except BadZipfile: logging.error('error: {} is not a zipfile!'.format(pk3file)) continue for name in pk3zip.namelist(): if name in basefiles: logging.error('{} overwrites file {}'.format(pk3file, name)) if name.endswith('.shader'): for texture in basefiles & parse_shader(pk3zip.open(name)): logging.error('{} overwrites file {}' \ .format(pk3file, texture))
def _read_hdr_dir(self): """Read the header for basic information. Returns ------- hdr : dict - 'erd': header of .erd file - 'stc': general part of .stc file - 'stamps' : time stamp for each file Also, it adds the attribute _basename : Path the name of the files inside the directory """ foldername = Path(self.filename) stc_file = foldername / (foldername.stem + '.stc') if stc_file.exists(): self._filename = stc_file.with_suffix('') else: # if the folder was renamed stc_file = list(foldername.glob('*.stc')) if len(stc_file) == 1: self._filename = foldername / stc_file[0].stem elif len(stc_file) == 0: raise FileNotFoundError('Could not find any .stc file.') else: raise OSError('Found too many .stc files: ' + '\n'.join(str(x) for x in stc_file)) hdr = {} # use .erd because it has extra info, such as sampling freq # try to read any possible ERD (in case one or two ERD are missing) # don't read very first erd because creation_time is slightly off for erd_file in foldername.glob(self._filename.stem + '_*.erd'): try: hdr['erd'] = _read_hdr_file(erd_file) # we need this to look up stc hdr['erd'].update({'filename': erd_file.stem}) break except (FileNotFoundError, PermissionError): pass stc = _read_stc(self._filename.with_suffix('.stc')) hdr['stc'], hdr['stamps'] = stc return hdr
class FileTemplateAdapter(TemplateAdapter): def __init__(self, n_dims, template_dir=None): self.n_dims = n_dims if template_dir is None: # try the user folder user_templates = p.expanduser(p.join('~', TEMPLATE_DINAME)) if p.isdir(user_templates): template_dir = user_templates else: raise ValueError("No template dir provided and " "{} doesn't exist".format(user_templates)) self.template_dir = Path(p.abspath(p.expanduser(template_dir))) print ('templates: {}'.format(self.template_dir)) def handle_old_templates(self, upgrade_templates=False): old_ids = [t.stem for t in self.template_dir.glob('*' + FileExt.old_template)] if len(old_ids) > 0 and upgrade_templates: print "Converting {} old style templates".format(len(old_ids)) for lm_id in old_ids: fp = safe_join(str(self.template_dir), lm_id + FileExt.old_template) convert_legacy_template(fp) elif len(old_ids) > 0: print(( "\nWARNING: ignored {} old style '.txt' templates in '{}' " + "({}).\n" + "See https://github.com/menpo/landmarkerio-server#templates " + "more information. You can restart with the " + "'--upgrade-templates' flag to convert them automatically " + "(one time operation)\n" ).format( len(old_ids), self.template_dir, ", ".join(['{}.txt'.format(t) for t in old_ids])) ) def template_ids(self): return [t.stem for t in self.template_paths()] def template_paths(self): return self.template_dir.glob('*' + FileExt.template) def load_template(self, lm_id): fp = safe_join(str(self.template_dir), lm_id + FileExt.template) return load_template(fp, self.n_dims)
def collect_json(source_dir): ret = [] source_path = Path(source_dir) for file_path in source_path.glob('*.json'): with file_path.open() as fp: ret.append(json.load(fp)) return sorted(ret, key=lambda each: each['name'])
def test_all(): cwd = Path(getcwd()) all_omts = [p.as_posix() for p in cwd.glob('**/*.omt')] th = TallyHolder() if environ.get('TRAVIS'): if not environ.get('OMV_ENGINE'): tallies = [parse_omt(t) for t in all_omts] else: engine = environ.get('OMV_ENGINE').lower() tallies = [parse_omt(t) for t in all_omts if load_yaml(t)['engine'].lower() == engine] else: tallies = [parse_omt(t) for t in all_omts] for t in tallies: th.add(t) results = [t.all_passed() for t in tallies] inform('') inform("%i test(s) run" % len(tallies), overline='-', underline='-', center=True) inform('') if all(results): inform("All tests passing!", underline='=', center=True) else: failed = [trim_path(t.omt) for t in tallies if not t.all_passed()] inform("Some test(s) failed: ", failed, underline='=') if is_verbose(): print('\n'+th.summary()+'\n') assert all(results)
def main(): requirements_dir = Path(__file__).parent / '..' / 'requirements' for requirement_file in requirements_dir.glob('*.txt'): print(requirement_file.name) with open(str(requirement_file), 'r') as f: for req in f: # Remove trailing and leading whitespace. req = req.strip() if not req: # skip empty or white space only lines continue elif req.startswith('#'): continue # Get the name of the package req = re.split('<|>|=|!|;', req)[0] try: # use pkg_resources to reliably get the version at install # time by package name. pkg_resources needs the name of the # package from pip, and not "import". # e.g. req is 'scikit-learn', not 'sklearn' version = pkg_resources.get_distribution(req).version print(req.rjust(20), version) except pkg_resources.DistributionNotFound: print(req.rjust(20), 'is not installed')
def select_all_scripts(path_string, error_stream=sys.stderr): """Gets the list of the scripts inside the specified package. The package is specified by the path to the folder with scripts. The method seeks the scripts recursively. The result is a list of strings, each entry is a relative path to the script (including file name). Args: path_string: a path string to the package, which scripts are selected. error_stream: a stream to write error to. Returns: list of strings, each entry is a relative to the specified package path (with file name) to the script. Returns an empty list in case of the error. Raises: nothing. """ scripts = [] try: path_object = Path(path_string) script_objects = list(path_object.glob("**/*.py")) for script in script_objects: scripts.append(str(Path(str(script)).relative_to(path_string))) except (OSError, ValueError): safe_write(error_stream, "ioutils.select_all_scripts error.") return scripts
def __init__(self, track_file, frame_dir): self.track_file = track_file self.tracks = None frames = Path(frame_dir) filelist = list(frames.glob('*.tif*')) self.frames = sorted(filelist, key=lambda x: int(x.stem))
def build_assets(config, logger=None): """Build native minecraft assets""" src_root = Path(config.assets.src) / config.version build_path = Path(config.assets.build) / config.version index_path = Path(config.assets.indexes) index_file_path = index_path / "{}.json".format(config.version) if not src_root.exists(): logger.critical("Path {} doesn't exist!".format(src_root.resolve())) exit(1) if not index_path.exists(): index_path.mkdir(parents=True) if build_path.exists(): logger.info("Cleaned up old assets directory {}".format(build_path.resolve())) rmtree(str(build_path)) build_path.mkdir(parents=True) raw_files = [path for path in src_root.glob("**/*") if path.is_file()] transformed = transform_files(src_root, build_path, raw_files) objects = {"objects": {item.to: {"hash": item.hash, "size": item.size} for item in transformed}} with index_file_path.open(mode="w") as index_file: logger.info("Writing index to {}".format(index_file_path.resolve())) json.dump(objects, index_file, indent=2)
def from_config_dir(cls, stash, config_dir=None): config_path = Path(config_dir or '.') # *.auth try: auth_path = next(config_path.glob('*.auth')) except StopIteration: raise FileNotFoundError('No .auth file found in {}'.format(config_path.absolute())) with auth_path.open() as auth_file: auth_data = auth_file.read().splitlines() # hashtags.list hashtags_path = config_path / 'hashtags.list' if not hashtags_path.is_file(): raise FileNotFoundError(hashtags_path) with hashtags_path.open(encoding='utf-8') as hashtags_file: hashtags = hashtags_file.read().splitlines() search_terms = ['#' + hashtag for hashtag in hashtags] # filter.list filter_path = config_path / 'filter.list' if filter_path.is_file(): with filter_path.open(encoding='utf-8') as filter_file: filters = filter_file.read().splitlines() filter_terms = ['#' + hashtag for hashtag in filters] else: filter_terms = [] return cls(stash, auth_data, search_terms, filter_terms=filter_terms)
def generate_module(fullname, **kw): # type: (Text, Dict) -> ModuleType """ :param fullname: dotted name of the module to generate. """ module = imp.new_module(fullname) parent_module_name, name = fullname.rsplit(".", 1) parent_module = import_module(parent_module_name) directory = Path(parent_module.__file__).parent directory = directory / name assert directory.exists() and directory.is_dir() module.__path__ = [str(directory)] # noqa for yml in directory.glob(u"*.yml"): logger.info("Loading: %s", yml) with yml.open("rt", encoding="utf-8") as f: gen = CodeGenerator(yaml_file=f, **kw) gen.init_vocabularies(module) gen.gen_model(module) gen.gen_form(module) return module
def load_images_from_directory_without_marking( images_path: str, max_size: int, scales: list, recurse: bool) -> list: """Загружает все изображения в форматах *.jpg,*.jpeg,*.png из указанной директории без разметки. Данная функция полезна для подготовки данных для тестирования на них детектора. Args: images_path: путь к папке, содержащей изображения max_size: максимальный размер стороны изображения для данного датасета scales: список масштабов recurse: искать изображения рекурсивно во всех поддиректориях Returns: list: Список объектов типа ImageFileSampleCV """ images_dir = Path(images_path) images_files = [] for format in ['*.jpg', '*.jpeg', '*.png', '*.bmp']: if recurse: images_files += list(images_dir.rglob(format.lower())) images_files += list(images_dir.rglob(format.upper())) else: images_files += list(images_dir.glob(format.lower())) images_files += list(images_dir.glob(format.upper())) images_files = sorted(list(set(images_files))) return [ImageFileSampleCV(str(image_name), [], max_size, scales) for image_name in images_files]
async def handle_texture_directory(input_dir: Path, zip_target: zipfile.ZipFile, atlas: bool, atlas_name: str = None, atlas_command: typing.List[str] = None, atlas_out_dir: Path = None): if not atlas: # NOTE: Hardcoding for .png is probably bad. # But it prevents the animation XML files from being copied so... for pngfile in input_dir.glob("**/*.png"): texturepath = Path("Textures").joinpath(pngfile.name) zip_target.write(str(pngfile), str(texturepath)) print(Fore.CYAN + "Wrote {0} -> {1}".format(pngfile, texturepath) + Style.RESET_ALL) return print(Fore.GREEN + "Generating atlas for {0}...".format(atlas_name) + Style.RESET_ALL) process = await asyncio.create_subprocess_exec( *atlas_command, "-n", atlas_name, "-o", str(atlas_out_dir), "-i", str(input_dir) ) print(Fore.GREEN + "Done generating atlas for {0}.".format(atlas_name) + Style.RESET_ALL) await process.wait()
def export_csv(dir, overwrite_existing, template): dir = Path(dir) export = DocumentExport(UserTemplate.load(template) if template else DWC) for p in dir.glob('*' + InselectDocument.EXTENSION): try: debug_print('Loading [{0}]'.format(p)) doc = InselectDocument.load(p) validation = export.validation_problems(doc) csv_path = export.csv_path(doc) if validation.any_problems: print( 'Not exporting metadata for [{0}] because there are ' 'validation problems'.format(p) ) for msg in format_validation_problems(validation): print(msg) elif not overwrite_existing and csv_path.is_file(): print('CSV file [{0}] exists - skipping'.format(csv_path)) else: print('Writing CSV for [{0}]'.format(p)) export.export_csv(doc) except KeyboardInterrupt: raise except Exception: print('Error saving CSV from [{0}]'.format(p)) traceback.print_exc()
def filenames(folder, extension): if extension.startswith('*.'): ext = extension else: ext = '*.' + extension p = Path(folder) return [str(path.resolve()) for path in p.glob(ext)]
import sys import os from datetime import datetime from pathlib import Path STREAM_IN = 'stream-IN' STREAM_OUT = 'stream-OUT' # We first delete all files from the STREAM_IN folder # before starting spark streaming. # This way, all files are new print("Deleting existing files in %s ..." % STREAM_IN) p = Path('.') / STREAM_IN for f in p.glob("*.ordtmp"): os.remove(f) print("... done") from pyspark import SparkContext, SparkConf from pyspark.streaming import StreamingContext sc = SparkContext("local[*]", "CountAndVolumePerBatch") sc.setLogLevel( "WARN") #Make sure warnings and errors observed by spark are printed. ssc = StreamingContext(sc, 5) #generate a mini-batch every 5 seconds filestream = ssc.textFileStream( STREAM_IN) #monitor new files in folder stream-IN def parseOrder(line): '''parses a single line in the orders file'''
class Operator(): def __init__(self, path): self.path = Path(path) self.git_path = self.path / '.git' self.hooks_path = self.git_path / 'hooks' self.gg_path = self.git_path / 'gud' self.last_commit_path = self.gg_path / 'last_commit.txt' self.commits_path = self.gg_path / 'commits.csv' self.level_path = self.gg_path / 'current_level.txt' self.progress_path = self.gg_path / 'progress.json' try: self.repo = Repo(path) except InvalidGitRepositoryError: self.repo = None def add_file_to_index(self, filename): with open(self.path / filename, 'w+') as f: f.write("Hello, I'm an auto-generated file!") self.repo.index.add([filename]) def add_and_commit(self, name, silent=True): commit_msg = "Commit " + name filename = name + '.txt' self.add_file_to_index(filename) commit = self.repo.index.commit(commit_msg, author=actor, committer=actor, skip_hooks=True) if not silent: print_info('Created file "{}"'.format(filename)) mock_simulate('git add {}'.format(filename)) mock_simulate('git commit -m "{}"'.format(commit_msg)) print_info("New Commit: {}".format(commit.hexsha[:7])) return commit def clear_tree_and_index(self): for path in self.path.glob('*'): if path.is_file(): path.unlink() # Remove all directories except .git for path in self.path.iterdir(): if path != self.git_path: shutil.rmtree(path) # Easiest way to clear the index is to commit an empty directory self.repo.git.add(update=True) def shutoff_pager(self): self.repo.config_writer().set_value("core", "pager", '').release() def git_version(self): return Git(self.git_path).version_info def init_gg(self): # Init git if needed try: self.repo = Repo(self.path) except InvalidGitRepositoryError: self.repo = Repo.init(self.path) # Disable pager so "git gud status" can use the output easily self.shutoff_pager() if not self.gg_path.exists(): self.gg_path.mkdir() # Git uses unix-like path separators python_exec = sys.executable.replace('\\', '/') for git_hook_name, module_hook_name, accepts_args in all_hooks: path = self.hooks_path / git_hook_name if accepts_args: forward_stdin = 'cat - | ' passargs = ' "$@"' else: forward_stdin = '' passargs = '' with open(path, 'w+') as hook_file: hook_file.write( "#!/bin/bash\n" "{pipe}{python} -m gitgud.hooks.{hook_module}{args}\n" "if [[ $? -ne 0 ]]\n" "then\n" "\t exit 1\n" "fi\n".format(pipe=forward_stdin, python=python_exec, hook_module=module_hook_name, args=passargs)) # Make the files executable mode = path.stat().st_mode mode |= (mode & 0o444) >> 2 path.chmod(mode) with open(self.progress_path, 'w') as progress_file: json.dump({}, progress_file) def destroy_repo(self): # Clear all in installation directory if self.repo is not None: self.clear_tree_and_index() # Clear all in .git/ directory except .git/gud for path in self.git_path.iterdir(): if path.is_file(): path.unlink() elif path != self.gg_path: shutil.rmtree(path) self.repo = None def use_repo(self): if self.repo is None: self.repo = Repo.init(self.path) def commit(self, commit_message, parents, time_offset): committime = dt.datetime.now(dt.timezone.utc).astimezone() \ .replace(microsecond=0) committime_offset = dt.timedelta(seconds=time_offset) + \ committime.utcoffset() committime_rfc = email.utils.format_datetime(committime - committime_offset) commit_obj = self.repo.index.commit(commit_message, author=actor, committer=actor, author_date=committime_rfc, commit_date=committime_rfc, parent_commits=parents, skip_hooks=True) return commit_obj @normalize_commit_arg @lru_cache(maxsize=None) def get_commit_content(self, commit): commit_content = {} for item in commit.tree.traverse(): if item.type == 'blob': item_content = item.data_stream.read().decode('utf-8') commit_content[item.path] = item_content return DirectoryContent(commit_content) def get_staging_content(self): content = {} for stage, entry_blob in self.repo.index.iter_blobs(): if stage == 0: path = entry_blob.path content[path] = entry_blob.data_stream.read().decode("utf-8") return DirectoryContent(content) def get_working_directory_content(self): content = {} paths = set(self.path.rglob('*')) - set(self.path.glob('.git/**/*')) for path in paths: if path.is_file(): data = path.read_bytes().decode("utf-8") path = str(path.relative_to(self.path).as_posix()) content[path] = data return DirectoryContent(content) def normalize_state(self): # Make sure we're in a normal state try: self.repo.git.rebase('--abort') except GitCommandError: pass try: self.repo.git.bisect('reset') except GitCommandError: pass self.clear_tree_and_index() # Commit so we know we're not on an orphan branch self.repo.index.commit( "Placeholder commit\n\n" "This commit is used when initializing levels." "If you see this, something must have gone wrong", parent_commits=[], skip_hooks=True) # Detach HEAD so we can delete branches self.repo.git.checkout(self.repo.head.commit) def reset_repo(self): self.normalize_state() branches = self.repo.branches for branch in branches: self.repo.delete_head(branch, force=True) self.repo.delete_tag(*self.repo.tags) for remote in self.repo.remotes: self.repo.delete_remote(remote) def create_tree(self, commits, head, details, level_dir): if not details: details = {} self.reset_repo() # if head is an orphan branch and there are commits, this method fails if not commits and head: self.repo.git.checkout('--orphan', head) return commit_objects = {} counter = len(commits) for name, parents, branches, tags in commits: # commit = (name, parents, branches, tags) parents = [commit_objects[parent] for parent in parents] if parents: self.repo.git.checkout(parents[0]) if len(parents) >= 2: assert name[0] == 'M' int(name[1:]) # Fails if not a number if name in details and "message" in details[name]: message = details[name]["message"] if type(message) is list: message = message[0] + '\n\n' + '\n'.join(message[1:]) else: if len(parents) < 2: message = "Commit " + name else: message = "Merge " + name[1:] if name in details and "files" in details[name]: self.clear_tree_and_index() for path, content in details[name]["files"].items(): if type(content) is str: shutil.copyfile(level_dir / content, path) else: with open(path, 'w') as f: f.write('\n'.join(content)) self.repo.index.add([path]) elif len(parents) >= 2: # Merge branches one by one for parent in parents[1:]: merge_base = self.repo.merge_base(parents[0], parent) self.repo.index.merge_tree(parent, base=merge_base) elif name in details and ('add-files' in details[name] or 'remove-files' in details[name]): level_files = set() if 'add-files' in details[name]: for path in details[name]['add-files']: assert path not in level_files level_files.add(path) if 'remove-files' in details[name]: for path in details[name]['remove-files']: assert path not in level_files assert Path(path).exists() level_files.add(path) if 'add-files' in details[name]: for path, content in details[name]['add-files'].items(): if type(content) is str: shutil.copyfile(level_dir / content, path) else: with open(path, 'w') as f: f.write('\n'.join(content)) self.repo.index.add([path]) if 'remove-files' in details[name]: for path in details[name]['remove-files']: Path(path).unlink() self.repo.index.remove([path]) else: self.add_file_to_index(name + '.txt') commit_obj = self.commit(message, parents, counter) commit_objects[name] = commit_obj self.track_commit(name, commit_obj.hexsha) for branch in branches: self.repo.create_head(branch, self.repo.head.commit) for tag in tags: self.repo.create_tag(tag, self.repo.head.commit) counter = counter - 1 head_is_commit = True for branch in self.repo.branches: if branch.name == head: branch.checkout() head_is_commit = False if head_is_commit: self.repo.git.checkout(commit_objects[head]) def get_current_tree(self): # Return a json object with the same structure as in level_json repo = self.repo tree = { 'branches': {}, # Ex: 'branch_name': {'target': 'commit_id', 'id': 'branch_name'} 'tags': {}, # Ex: 'tag_name': {'target': 'commit_id', 'id': 'tag_name'} 'commits': {}, # Ex: '2': {'parents': ['1'], 'id': '1'} 'HEAD': {} # 'target': 'branch_name', 'id': 'HEAD' } commits = set() visited = set() for branch in repo.branches: commits.add(branch.commit) commit_hash = branch.commit.hexsha tree['branches'][branch.name] = { "target": commit_hash, "id": branch.name } for tag in repo.tags: commits.add(tag.commit) commit_hash = tag.commit.hexsha tree['tags'][tag.name] = {'target': commit_hash, 'id': tag.name} while len(commits) > 0: cur_commit = commits.pop() if cur_commit not in visited: for parent in cur_commit.parents: commits.add(parent) visited.add(cur_commit) while len(visited) > 0: cur_commit = visited.pop() commit_hash = cur_commit.hexsha parents = [] for parent in cur_commit.parents: parents.append(parent.hexsha) tree['commits'][commit_hash] = { 'parents': parents, 'id': commit_hash } if repo.head.is_detached: target = repo.commit('HEAD').hexsha else: target = repo.head.ref.name tree['HEAD'] = {'target': target, 'id': 'HEAD'} return tree def read_progress_file(self): with open(self.progress_path) as progress_file: return json.load(progress_file) def update_progress_file(self, data): progress_data = self.read_progress_file() progress_data.update(data) with open(self.progress_path, 'w') as progress_file: json.dump(progress_data, progress_file) def get_level_progress(self, level): progress_data = self.read_progress_file() if level.skill.name in progress_data: skill_progress = progress_data[level.skill.name] if level.name in skill_progress: return skill_progress[level.name] return 'unvisited' def mark_level(self, level, status): progress_data = self.read_progress_file() hierarchy = ["unvisited", "visited", "partial", "complete"] current_progress = self.get_level_progress(level) if hierarchy.index(status) > hierarchy.index(current_progress): if level.skill.name not in progress_data: progress_data[level.skill.name] = {} progress_data[level.skill.name].update({level.name: status}) self.update_progress_file(progress_data) def update_level_completion(self): try: level = self.get_level() except InitializationError: # Can't update if the level doesn't exist return if level._test(): level.mark_complete() def read_level_file(self): with open(self.level_path) as level_file: return level_file.read() def write_level(self, level): with open(self.level_path, 'w') as skill_file: skill_file.write(' '.join([level.skill.name, level.name])) def get_level_identifier(self): return self.read_level_file().split() def get_level(self): skill_name, level_name = self.get_level_identifier() try: return skills.all_skills[skill_name][level_name] except KeyError: raise InitializationError( f"Cannot find data for level: {skill_name} {level_name}") def get_last_commit(self): with open(self.last_commit_path) as last_commit_file: return last_commit_file.read() def write_last_commit(self, name): with open(self.last_commit_path, 'w+') as last_commit_file: last_commit_file.write(name) def clear_tracked_commits(self): with open(self.commits_path, 'w'): pass def track_rebase(self, original_hash, rebase_hash): rebase_name = None with open(self.commits_path, 'r') as commit_file: reader = csv.reader(commit_file) for name, commit_hash in reader: if commit_hash == original_hash: rebase_name = name + "'" break if rebase_name is not None: self.track_commit(rebase_name, rebase_hash) else: raise KeyError('Original hash not found') def track_commit(self, name, commit_hash): with open(self.commits_path, 'a') as commit_file: commit_file.write(','.join([name, commit_hash])) commit_file.write('\n') def get_known_commits(self): known_commits = {} with open(self.commits_path, 'r') as commit_file: reader = csv.reader(commit_file) for name, commit_hash in reader: known_commits[commit_hash] = name return known_commits def get_branches_by_commit(self): tree = self.get_current_tree() referred_by = {} for branch_name in tree['branches']: target = tree['branches'][branch_name]['target'] if target not in referred_by: referred_by[target] = [branch_name] else: referred_by[target].append(branch_name) return referred_by def get_diffs(self, known_commits): diffs = {} for commit_hash, commit_name in known_commits.items(): if commit_name == '1': diff = self.repo.git.diff( '4b825dc642cb6eb9a060e54bf8d69288fbee4904', commit_hash) anti_diff = self.repo.git.diff( commit_hash, '4b825dc642cb6eb9a060e54bf8d69288fbee4904') else: diff = self.repo.git.diff(commit_hash + '~', commit_hash) anti_diff = self.repo.git.diff(commit_hash, commit_hash + '~') diffs[diff] = commit_name + "'" diffs[anti_diff] = commit_name + '-' return diffs def get_copy_mapping(self, non_merges, known_commits): diffs = self.get_diffs(known_commits) mapping = {} for commit_hash in non_merges: if commit_hash in known_commits: continue diff = self.repo.git.diff(commit_hash + '~', commit_hash) if diff in diffs: mapping[commit_hash] = diffs[diff] return mapping def get_all_commits(self, sort_commits=True): all_commits = [] for head in self.repo.heads: for commit in self.repo.iter_commits(head, reverse=True): if commit not in all_commits: all_commits.append(commit) if sort_commits: all_commits.sort(key=lambda commit: commit.committed_date) return all_commits def get_commits(self): try: return list(self.repo.iter_commits('HEAD', reverse=True)) except GitCommandError: return [] def branch_has_merges(self, branch=None): try: if branch is None: commit = self.repo.head.commit elif isinstance(branch, str): commit = self.repo.commit(branch) else: commit = branch.commit except ValueError: # Orphan branch return False while commit: if len(commit.parents) == 1: commit = commit.parents[0] elif len(commit.parents) == 0: commit = None else: return True return False
import sys from functools import lru_cache from pathlib import Path from typing import Union from ...core import Flight, Traffic _current_dir = Path(__file__).parent __all__ = list(f.stem[:-5] for f in _current_dir.glob("**/*.json.gz")) @lru_cache() def get_flight(filename: str, directory: Path) -> Union[Flight, Traffic]: flight: Union[None, Flight, Traffic] = Traffic.from_file( directory / f"{filename}.json.gz", dtype={"icao24": str} ) if flight is None: raise RuntimeError(f"File {filename}.json.gz not found in {directory}") icao24 = set(flight.data.icao24) if len(icao24) == 1: # easier way to cast... flight = Flight(flight.data) return flight.assign( timestamp=lambda df: df.timestamp.dt.tz_localize("utc") ) def get_sample(module, name: str): if sys.version_info >= (3, 7): return getattr(module, name) path = Path(module.__file__).parent
def find_one_filetype(path: Path, filename: str, filetypes: List[str]) -> Path: """Find first file matching filetypes.""" for file in path.glob(f"**/{filename}.*"): if file.suffix in filetypes: return file raise ConfigurationFileError(f"{path!s}/{filename}.({filetypes}) not exists!")
class DirectoryAgent: def __init__(self, data_dir, n_components, *, data_spec=None, x_lim=None, component_dir=None, output_dir=None, header=0, file_ordering=None, file_limit=None, figsize=None, **kwargs): """ Class for building trained model and classifying a directory. Classification can be accomplished on the fly or once using the spin() method. Parameters ---------- data_dir: pathlike Directory containing data to be classified n_components: int Number of components for NMF data_spec: basestring String specification for glob() method in searching data_dir Default behavior is to include all files. If writing temporary files, it is important to include final file spec such as '*.xy'. x_lim: tuple Size two tuple for bounding the Xs to a region of interest component_dir: pathlike Directory containing initial components. header: int Number of header lines in file training_output_dir: pathlike Output directory of training containing checkpoints for loading path_to_model: pathlike path to model to load in full (not presently implemented) file_ordering: function Function for sorting file paths as key argument in sorted file_limit: int Maximum number of files to consider figsize: tuple Two integer tuple for matplotlib figsize. Keep in mind all plots appear in a row. **kwargs: Keyword arguments to pass to companion.nmf.decomposition """ self.dir = Path(data_dir).expanduser() self.component_dir = Path(component_dir).expanduser() self.n_components = n_components if data_spec is None: self.path_spec = "*" else: self.path_spec = data_spec self.output_dir = output_dir self.paths = [] self.Ys = [] self.Xs = [] self.initial_components = [] self.limit = file_limit self.x_lim = x_lim self.header = header self.fig = plt.figure(figsize=figsize) self.decomposition_args = kwargs try: self.fig.canvas.manager.show() except: display.display(self.fig) if file_ordering is None: self.file_ordering = lambda x: x else: self.file_ordering = file_ordering def __len__(self): return len(self.paths) def path_list(self): return list(self.dir.glob(self.path_spec)) def load_files(self, paths): xs = [] ys = [] paths = sorted(paths) for idx, path in enumerate(paths): if not (self.limit is None) and idx >= self.limit: break _x, _y = np.loadtxt(path, comments="#", skiprows=self.header).T xs.append(_x) ys.append(_y) return xs, ys def update_plot(self): if len(self) < 2: return idxs = [ x for x, y in sorted(enumerate(self.paths), key=lambda x: self.file_ordering(x[1])) ] Xs = np.array(self.Xs) Ys = np.array(self.Ys) sub_X, sub_Y, alphas, components = decomposition( Xs, Ys, q_range=self.x_lim, n_components=self.n_components, normalize=True, initial_components=self.initial_components, fix_components=[True for _ in range(len(self.initial_components))], **self.decomposition_args) self.fig.clf() axes = self.fig.subplots(1, self.n_components + 2) example_plot( sub_X, sub_Y, alphas, axes=axes[:-1], sax=axes[-2], components=components, comax=axes[-1], alt_ordinate=np.array(idxs), summary_fig=True, ) self.fig.patch.set_facecolor("white") self.fig.canvas.draw_idle() self.fig.canvas.flush_events() display.clear_output(wait=True) display.display(self.fig) def spin(self, sleep_delay=60, verbose=False, timeout=0): """ Starts the spin to read new files and append their classifications to the output and internal dictionary If a single pass of the data_directory is required, use a short or negative timeout time. This can be run as a multiprocessing.Process target with a Manager to retain the output list if being run dynamically. Parameters ---------- sleep_delay: float number of seconds to wait before checking directory for new files verbose: bool Print classifications to screen timeout: float Time to wait before stop spinning. Default is infinite spin. If a single pass is required, use a negative value. Returns ------- self.classifications: dictionary of file basenames and classifications """ start_time = time() while True: if len(self.path_list()) != len(self): self.fig.clf() for path in self.path_list(): if path.name not in self.paths: self.paths.append(path.name) xs, ys = self.load_files([path]) self.Xs.extend(xs) self.Ys.extend(ys) _, self.initial_components = self.load_files( list(self.component_dir.glob(self.path_spec))) self.update_plot() if timeout and time() - start_time > timeout: break sleep(sleep_delay) return np.array(self.Xs), np.array(self.Ys)
def list_files(dir, glob): dir = Path(dir) entries = dir.glob(glob) files = [f for f in entries if f.is_file()] return files
from pathlib import Path from click.testing import CliRunner import pytest from clea.__main__ import main TESTS_DIRECTORY = Path(__file__).parent @pytest.mark.parametrize("xml_file_path", TESTS_DIRECTORY.glob("xml/*")) def test_clea_cli_from_valid_files(xml_file_path, monkeypatch): xml_file_name = str(xml_file_path.relative_to(TESTS_DIRECTORY)) json_file_path = TESTS_DIRECTORY / f"json/{xml_file_path.stem}.json" with open(json_file_path, "rb") as json_file: expected_result = json_file.read() monkeypatch.chdir(TESTS_DIRECTORY) runner = CliRunner(mix_stderr=False) result = runner.invoke(main, [xml_file_name]) assert result.exit_code == 0 assert result.stdout_bytes == expected_result assert result.stderr_bytes == b""
class Tptp: """Representing TPTP problem set""" def __init__(self, tptpdir=None): if (tptpdir is not None and Path(tptpdir).is_dir() and (Path(tptpdir) / 'Problems').is_dir() and (Path(tptpdir) / 'Axioms').is_dir()): self.tptpdir = Path(tptpdir) else: self.tptpdir = Path(os.environ['TPTP']) try: with (self.tptpdir / TPTP_PY_CACHE_FILENAME).open(mode='rb') as j: data = pickle.load(j) logging.debug(f"Loaded cache from {TPTP_PY_CACHE_FILENAME}") self.problems = data['problems'] self.axioms = data['axioms'] except EnvironmentError: self.problems = self.tptpdir.glob("Problems/*/*.p") self.problems = [ TptpProblem(group=p.parent.stem, name=p.stem, file=p, tptp=tptpdir) for p in self.problems ] self.problems = {p.name: p for p in self.problems} self.axioms = self.tptpdir.glob("Axioms/**/*.ax") self.axioms = [ TptpAxiom( #name='/'.join(a.parts[1:-1] + (a.stem,)), name=a.stem, file=a, tptp=tptpdir) for a in self.axioms ] self.axioms = {a.name: a for a in self.axioms} data = dict(problems=self.problems, axioms=self.axioms) with (self.tptpdir / TPTP_PY_CACHE_FILENAME).open(mode='wb') as j: pickle.dump(data, j, protocol=PICKLE_PROTOCOL) logging.debug(f"Saved cache to {TPTP_PY_CACHE_FILENAME}") def parse_meta(self, problem): problem = Path(problem.file) with problem.open() as f: l = f.readline() if not l.startswith('%-'): raise TptpMetaParseException( 'File does not begin with comment block') metadata = {} lastkey = None while True: l = f.readline().strip() if l and l.startswith('%-'): break if not l: continue if not (l.startswith('%')): raise TptpMetaParseException( 'Comment block is malformed: {}, {}'.format( problem, l)) l = l.lstrip('%') if l and l[0] == ' ': l = l[1:] if l and not l.startswith(' '): try: lastkey, val = [ i.strip() for i in l.split(':', maxsplit=1) ] except ValueError: print(problem, l) raise metadata.setdefault(lastkey, []).append(val) else: l = l.strip() if l and l[0] == ':': l = l[1:].strip() metadata[lastkey].append(l.strip()) # Flatten metadata (needed?) for k, v in metadata.items(): if len(v) == 1: metadata[k] = v[0] return metadata def get_problems(self, metamatch=None): if not metamatch: yield from self.problems.values() return metamatch = {k: re.compile(v) for k, v in metamatch.items()} for problem in self.problems.values(): matched = True for key, rx in metamatch.items(): metadata = self.parse_meta(problem) if not key in metadata: matched = False break text = str(metadata[key]) if rx.fullmatch(text) is None: matched = False break if matched: yield problem def find_by_filename(self, problem_name): problem_name = Path(problem_name) problem_type = list(problem_name.parents)[-2].name try: if problem_type == 'Axioms': return self.axioms[problem_name.stem] elif problem_type == 'Problems': return self.problems[problem_name.stem] except KeyError: pass # Exception will be raised below raise TptpException(f"Cannot find {problem_name}")
def _patch_pkgconfig(self, file: pathlib.Path): for pcfile in file.glob("*.pc"): self.logger.info("Patching {}".format(pcfile)) self._patch_textfile(pcfile, "prefix=/home/qt/work/install", 'prefix={}'.format(str(self.prefix)))
def analyze( self, detector, # fer.FER instance display: bool = False, output: str = "csv", frequency: Optional[int] = None, max_results: int = None, save_fps: Optional[bool] = None, video_id: Optional[str] = None, save_frames: bool = True, save_video: bool = True, annotate_frames: bool = True, zip_images: bool = True): """Recognize facial expressions in video using `detector`.""" data = [] if frequency is None: frequency = 1 else: frequency = int(frequency) results_nr = 0 # Open video assert self.cap.open(self.filepath), "Video capture not opening" self.__emotions = detector._get_labels().items() self.cap.set(cv2.CAP_PROP_POS_FRAMES, 0) pos_frames = self.cap.get(cv2.CAP_PROP_POS_FRAMES) assert int(pos_frames) == 0, "Video not at index 0" frameCount = 0 height, width = ( int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT)), int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH)), ) fps = self.cap.get(cv2.CAP_PROP_FPS) length = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT)) assert fps and length, "File {} not loaded".format(self.filepath) if save_fps is not None: frequency = fps // save_fps logging.info("Saving every {} frames".format(frequency)) logging.info("{:.2f} fps, {} frames, {:.2f} seconds".format( fps, length, length / fps)) capture_duration = 1000 / fps if save_frames: os.makedirs(self.outdir, exist_ok=True) logging.info(f"Making directories at {self.outdir}") root, ext = os.path.splitext(os.path.basename(self.filepath)) outfile = os.path.join(self.outdir, f"{root}_output{ext}") if save_video: videowriter = self._save_video(outfile, fps, width, height) while self.cap.isOpened(): start_time = time.time() ret, frame = self.cap.read() if not ret: # end of video break if frameCount % frequency != 0: frameCount += 1 continue padded_frame = detector.pad(frame) try: # Get faces with emotions faces = detector.detect_emotions(padded_frame) except Exception as e: logging.error(e) break # Save images to `self.outdir` imgpath = os.path.join(self.outdir, (video_id or root) + str(frameCount) + ".jpg") if save_frames and not annotate_frames: cv2.imwrite(imgpath, frame) if display or save_video or annotate_frames: assert isinstance(faces, list), type(faces) for face in faces: bounding_box = face["box"] emotions = face["emotions"] cv2.rectangle( frame, (bounding_box[0] - 40, bounding_box[1] - 40), ( bounding_box[0] - 40 + bounding_box[2], bounding_box[1] - 40 + bounding_box[3], ), (0, 155, 255), 2, ) for idx, (emotion, score) in enumerate(emotions.items()): color = (211, 211, 211) if score < 0.01 else (0, 255, 0) emotion_score = "{}: {}".format( emotion, "{:.2f}".format(score) if score > 0.01 else "") cv2.putText( frame, emotion_score, ( bounding_box[0] - 40, bounding_box[1] - 40 + bounding_box[3] + 30 + idx * 15, ), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1, cv2.LINE_AA, ) if display: cv2.imshow("Video", frame) if save_frames and annotate_frames: cv2.imwrite(imgpath, frame) if save_video: videowriter.write(frame) results_nr += 1 if display or save_video: remaining_duration = max( 1, int((time.time() - start_time) * 1000 - capture_duration)) if cv2.waitKey(remaining_duration) & 0xFF == ord("q"): break else: if cv2.waitKey(1) & 0xFF == ord("q"): break if zip_images: print("Starting to Zip") outdir = Path(self.outdir) zip_dir = outdir / 'images.zip' images = sorted(list(outdir.glob("*.jpg"))) total = len(images) i = 0 with ZipFile(zip_dir, 'w') as zip: for file in images: zip.write(file, arcname=file.name) os.remove(file) i += 1 if i % 50 == 0: print(f"Compressing: {i*100 // total}%") print("Zip has finished") frameCount += 1 if faces: data.append(faces) if max_results and results_nr > max_results: break self.cap.release() if display or save_video: videowriter.release() if save_video: logging.info("Completed analysis: saved to {}".format( self.tempfile or outfile)) if self.tempfile: os.replace(self.tempfile, outfile) if output == "csv": return self.to_csv(data) elif output == "pandas": return self.to_pandas(data) else: raise NotImplementedError(f"{output} is not supported") return data
Chapter 2, recipe 9 """ from pathlib import Path import shutil import os def version1(source_file_path, target_file_path): shutil.copy(str(source_file_path), str(target_file_path)) def version2(source_file_path, target_file_path): try: shutil.copy(str(source_file_path), str(target_file_path)) except FileNotFoundError: os.makedirs(str(target_file_path.parent)) shutil.copy(str(source_file_path), str(target_file_path)) except OSError as ex: print(ex) if __name__ == "__main__": source_path = Path( os.path.expanduser('~/Documents/Writing/Python Cookbook/source')) target_path = Path(os.path.expanduser('~/Dropbox/B05442/demo/')) for source_file_path in source_path.glob('*/*.rst'): source_file_detail = source_file_path.relative_to(source_path) target_file_path = target_path / source_file_detail version2(source_file_path, target_file_path)
def filter_files(files: Union[List[str], FileFilter], directory: Path) -> List[str]: if callable(files): return list(x.name for x in filter(files, directory.glob("*"))) else: return files
def run(edges=None, prizes=None, output_file=None, w=None, b=None, g=None, noise=None, noisy_edges=None, random_terminals=None, dummy_mode=None, seed=None): """ Run Omics Integrator 2 in the Docker image with the provided parameters. Only the .tsv output file is retained and then renamed. All other output files are deleted. @param output_file: the name of the output file, which will overwrite any existing file with this name """ if edges is None or prizes is None or output_file is None: raise ValueError('Required Omics Integrator 2 arguments are missing') # Initialize a Docker client using environment variables client = docker.from_env() work_dir = Path(__file__).parent.parent.absolute() edge_file = Path(edges) prize_file = Path(prizes) out_dir = Path(output_file).parent # Omics Integrator 2 requires that the output directory exist Path(work_dir, out_dir).mkdir(parents=True, exist_ok=True) command = ['OmicsIntegrator', '-e', edge_file.as_posix(), '-p', prize_file.as_posix(), '-o', out_dir.as_posix(), '--filename', 'oi2'] # Add optional arguments if w is not None: command.extend(['-w', str(w)]) if b is not None: command.extend(['-b', str(b)]) if g is not None: command.extend(['-g', str(g)]) if noise is not None: command.extend(['-noise', str(noise)]) if noisy_edges is not None: command.extend(['--noisy_edges', str(noisy_edges)]) if random_terminals is not None: command.extend(['--random_terminals', str(random_terminals)]) if dummy_mode is not None: # This argument does not follow the other naming conventions command.extend(['--dummyMode', str(dummy_mode)]) if seed is not None: command.extend(['--seed', str(seed)]) print('Running Omics Integrator 2 with arguments: {}'.format(' '.join(command)), flush=True) #Don't perform this step on systems where permissions aren't an issue like windows need_chown = True try: uid = os.getuid() except AttributeError: need_chown = False try: out = client.containers.run('reedcompbio/omics-integrator-2', command, stderr=True, volumes={ prepare_path_docker(work_dir): {'bind': '/OmicsIntegrator2', 'mode': 'rw'}}, working_dir='/OmicsIntegrator2') if need_chown: #This command changes the ownership of output files so we don't # get a permissions error when snakemake tries to touch the files chown_command = " ".join(["chown",str(uid),out_dir.as_posix()+"/oi2*"]) out_chown = client.containers.run('reedcompbio/omics-integrator-2', chown_command, stderr=True, volumes={prepare_path_docker(work_dir): {'bind': '/OmicsIntegrator2', 'mode': 'rw'}}, working_dir='/OmicsIntegrator2') print(out.decode('utf-8')) finally: # Not sure whether this is needed client.close() # TODO do we want to retain other output files? # TODO if deleting other output files, write them all to a tmp directory and copy # the desired output file instead of using glob to delete files from the actual output directory # Rename the primary output file to match the desired output filename Path(output_file).unlink(missing_ok=True) output_tsv = Path(out_dir, 'oi2.tsv') output_tsv.rename(output_file) # Remove the other output files for oi2_output in out_dir.glob('*.html'): oi2_output.unlink(missing_ok=True)
def test(data, weights=None, batch_size=32, imgsz=640, conf_thres=0.001, iou_thres=0.6, # for NMS save_json=False, single_cls=False, augment=False, verbose=False, model=None, dataloader=None, save_dir=Path(''), # for saving images save_txt=False, # for auto-labelling save_hybrid=False, # for hybrid auto-labelling save_conf=False, # save auto-label confidences plots=True, log_imgs=0): # number of logged images # Initialize/load model and set device training = model is not None if training: # called by train.py device = next(model.parameters()).device # get model device else: # called directly set_logging() device = select_device(opt.device, batch_size=batch_size) # Directories save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99 # if device.type != 'cpu' and torch.cuda.device_count() > 1: # model = nn.DataParallel(model) # Half half = device.type != 'cpu' # half precision only supported on CUDA if half: model.half() # Configure model.eval() is_coco = data.endswith('coco.yaml') # is COCO dataset with open(data) as f: data = yaml.load(f, Loader=yaml.FullLoader) # model dict check_dataset(data) # check nc = 1 if single_cls else int(data['nc']) # number of classes iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for [email protected]:0.95 niou = iouv.numel() # Logging log_imgs, wandb = min(log_imgs, 100), None # ceil try: import wandb # Weights & Biases except ImportError: log_imgs = 0 # Dataloader if not training: img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img) if device.type != 'cpu' else None # run once path = data['test'] if opt.task == 'test' else data['val'] # path to val/test images dataloader = create_dataloader(path, imgsz, batch_size, model.stride.max(), opt, pad=0.5, rect=True, prefix=colorstr('test: ' if opt.task == 'test' else 'val: '))[0] seen = 0 confusion_matrix = ConfusionMatrix(nc=nc) names = {k: v for k, v in enumerate(model.names if hasattr(model, 'names') else model.module.names)} coco91class = coco80_to_coco91_class() s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', '[email protected]', '[email protected]:.95') p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0. loss = torch.zeros(3, device=device) jdict, stats, ap, ap_class, wandb_images = [], [], [], [], [] for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): img = img.to(device, non_blocking=True) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 targets = targets.to(device) nb, _, height, width = img.shape # batch size, channels, height, width with torch.no_grad(): # Run model t = time_synchronized() inf_out, train_out = model(img, augment=augment) # inference and training outputs t0 += time_synchronized() - t # Compute loss if training: loss += compute_loss([x.float() for x in train_out], targets, model)[1][:3] # box, obj, cls # Run NMS targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(device) # to pixels lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling t = time_synchronized() output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres, labels=lb) t1 += time_synchronized() - t # Statistics per image for si, pred in enumerate(output): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class path = Path(paths[si]) seen += 1 if len(pred) == 0: if nl: stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) continue # Predictions predn = pred.clone() scale_coords(img[si].shape[1:], predn[:, :4], shapes[si][0], shapes[si][1]) # native-space pred # Append to text file if save_txt: gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0]] # normalization gain whwh for *xyxy, conf, cls in predn.tolist(): xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format with open(save_dir / 'labels' / (path.stem + '.txt'), 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') # W&B logging if plots and len(wandb_images) < log_imgs: box_data = [{"position": {"minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3]}, "class_id": int(cls), "box_caption": "%s %.3f" % (names[cls], conf), "scores": {"class_score": conf}, "domain": "pixel"} for *xyxy, conf, cls in pred.tolist()] boxes = {"predictions": {"box_data": box_data, "class_labels": names}} # inference-space wandb_images.append(wandb.Image(img[si], boxes=boxes, caption=path.name)) # Append to pycocotools JSON dictionary if save_json: # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... image_id = int(path.stem) if path.stem.isnumeric() else path.stem box = xyxy2xywh(predn[:, :4]) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner for p, b in zip(pred.tolist(), box.tolist()): jdict.append({'image_id': image_id, 'category_id': coco91class[int(p[5])] if is_coco else int(p[5]), 'bbox': [round(x, 3) for x in b], 'score': round(p[4], 5)}) # Assign all predictions as incorrect correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device) if nl: detected = [] # target indices tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) scale_coords(img[si].shape[1:], tbox, shapes[si][0], shapes[si][1]) # native-space labels if plots: confusion_matrix.process_batch(pred, torch.cat((labels[:, 0:1], tbox), 1)) # Per target class for cls in torch.unique(tcls_tensor): ti = (cls == tcls_tensor).nonzero(as_tuple=False).view(-1) # prediction indices pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view(-1) # target indices # Search for detections if pi.shape[0]: # Prediction to target ious ious, i = box_iou(predn[pi, :4], tbox[ti]).max(1) # best ious, indices # Append detections detected_set = set() for j in (ious > iouv[0]).nonzero(as_tuple=False): d = ti[i[j]] # detected target if d.item() not in detected_set: detected_set.add(d.item()) detected.append(d) correct[pi[j]] = ious[j] > iouv # iou_thres is 1xn if len(detected) == nl: # all targets already located in image break # Append statistics (correct, conf, pcls, tcls) stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) # Plot images if plots and batch_i < 3: f = save_dir / f'test_batch{batch_i}_labels.jpg' # labels Thread(target=plot_images, args=(img, targets, paths, f, names), daemon=True).start() f = save_dir / f'test_batch{batch_i}_pred.jpg' # predictions Thread(target=plot_images, args=(img, output_to_target(output), paths, f, names), daemon=True).start() # Compute statistics stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy if len(stats) and stats[0].any(): p, r, ap, f1, ap_class = ap_per_class(*stats, plot=plots, save_dir=save_dir, names=names) p, r, ap50, ap = p[:, 0], r[:, 0], ap[:, 0], ap.mean(1) # [P, R, [email protected], [email protected]:0.95] mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class else: nt = torch.zeros(1) # Print results pf = '%20s' + '%12.3g' * 6 # print format print(pf % ('all', seen, nt.sum(), mp, mr, map50, map)) # Print results per class if (verbose or (nc <= 20 and not training)) and nc > 1 and len(stats): for i, c in enumerate(ap_class): print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i])) # Print speeds t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (imgsz, imgsz, batch_size) # tuple if not training: print('Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t) # Plots if plots: confusion_matrix.plot(save_dir=save_dir, names=list(names.values())) if wandb and wandb.run: wandb.log({"Images": wandb_images}) wandb.log({"Validation": [wandb.Image(str(f), caption=f.name) for f in sorted(save_dir.glob('test*.jpg'))]}) # Save JSON if save_json and len(jdict): w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else '' # weights anno_json = '../coco/annotations/instances_val2017.json' # annotations json pred_json = str(save_dir / f"{w}_predictions.json") # predictions json print('\nEvaluating pycocotools mAP... saving %s...' % pred_json) with open(pred_json, 'w') as f: json.dump(jdict, f) try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval anno = COCO(anno_json) # init annotations api pred = anno.loadRes(pred_json) # init predictions api eval = COCOeval(anno, pred, 'bbox') if is_coco: eval.params.imgIds = [int(Path(x).stem) for x in dataloader.dataset.img_files] # image IDs to evaluate eval.evaluate() eval.accumulate() eval.summarize() map, map50 = eval.stats[:2] # update results ([email protected]:0.95, [email protected]) except Exception as e: print(f'pycocotools unable to run: {e}') # Return results if not training: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") model.float() # for training maps = np.zeros(nc) + map for i, c in enumerate(ap_class): maps[c] = ap[i] return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t
def train(hyp, opt, device, tb_writer=None, wandb=None): logger.info(f'Hyperparameters {hyp}') save_dir, epochs, batch_size, total_batch_size, weights, rank = \ Path(opt.save_dir), opt.epochs, opt.batch_size, opt.total_batch_size, opt.weights, opt.global_rank # Directories wdir = save_dir / 'weights' wdir.mkdir(parents=True, exist_ok=True) # make dir last = wdir / 'last.pt' best = wdir / 'best.pt' results_file = save_dir / 'results.txt' # Save run settings with open(save_dir / 'hyp.yaml', 'w') as f: yaml.dump(hyp, f, sort_keys=False) with open(save_dir / 'opt.yaml', 'w') as f: yaml.dump(vars(opt), f, sort_keys=False) # Configure plots = not opt.evolve # create plots cuda = device.type != 'cpu' init_seeds(2 + rank) with open(opt.data) as f: data_dict = yaml.load(f, Loader=yaml.FullLoader) # data dict with torch_distributed_zero_first(rank): check_dataset(data_dict) # check train_path = data_dict['train'] test_path = data_dict['val'] nc = 1 if opt.single_cls else int(data_dict['nc']) # number of classes names = ['item'] if opt.single_cls and len( data_dict['names']) != 1 else data_dict['names'] # class names assert len(names) == nc, '%g names found for nc=%g dataset in %s' % ( len(names), nc, opt.data) # check # Model pretrained = weights.endswith('.pt') if pretrained: with torch_distributed_zero_first(rank): attempt_download(weights, tag='v4.0') # download if not found locally ckpt = torch.load(weights, map_location=device) # load checkpoint if hyp.get('anchors'): ckpt['model'].yaml['anchors'] = round( hyp['anchors']) # force autoanchor model = Model(opt.cfg or ckpt['model'].yaml, ch=3, nc=nc).to(device) # create exclude = ['anchor'] if opt.cfg or hyp.get('anchors') else [ ] # exclude keys state_dict = ckpt['model'].float().state_dict() # to FP32 state_dict = intersect_dicts(state_dict, model.state_dict(), exclude=exclude) # intersect model.load_state_dict(state_dict, strict=False) # load logger.info( 'Transferred %g/%g items from %s' % (len(state_dict), len(model.state_dict()), weights)) # report else: model = Model(opt.cfg, ch=3, nc=nc).to(device) # create if opt.qat: kqat.fuse_model(model, inplace=True) qcfg, qcfg8 = kqat.get_default_qconfig(8, True) model.qconfig = qcfg8 kqat.quant_model(model, mapping=kqat.kneron_qat_default, inplace=True) fb = kqat.FreezeSch(opt.freeze_sch, kqat.FreezeKneron(decay=0.1, collect=False)) else: fb = None # Freeze freeze = [] # parameter names to freeze (full or partial) for k, v in model.named_parameters(): v.requires_grad = True # train all layers if any(x in k for x in freeze): print('freezing %s' % k) v.requires_grad = False # Optimizer nbs = 64 # nominal batch size accumulate = max(round(nbs / total_batch_size), 1) # accumulate loss before optimizing hyp['weight_decay'] *= total_batch_size * accumulate / nbs # scale weight_decay logger.info(f"Scaled weight_decay = {hyp['weight_decay']}") pg0, pg1, pg2 = [], [], [] # optimizer parameter groups p = [] for k, v in model.named_modules(): if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter): pg2.append(v.bias) # biases if isinstance(v, nn.BatchNorm2d): pg0.append(v.weight) # no decay elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter): pg1.append(v.weight) # apply decay if hasattr(v, 'radix'): p.append(v.radix) if opt.adam: optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum else: optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True) optimizer.add_param_group({ 'params': pg1, 'weight_decay': hyp['weight_decay'] }) # add pg1 with weight_decay optimizer.add_param_group({'params': pg2}) # add pg2 (biases) if opt.qat: optimizer.add_param_group({ 'params': p, 'lr': hyp['lr0'] * 10, 'weight_decay': 0. }) # add pg2 (biases) logger.info('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0))) del pg0, pg1, pg2 # Scheduler https://arxiv.org/pdf/1812.01187.pdf # https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR # lf = one_cycle(1, hyp['lrf'], epochs) # cosine 1->hyp['lrf'] # scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # plot_lr_scheduler(optimizer, scheduler, epochs) import timm scheduler, num_epochs = timm.create_scheduler(opt, optimizer) # Logging if rank in [-1, 0] and wandb and wandb.run is None: opt.hyp = hyp # add hyperparameters wandb_run = wandb.init( config=opt, resume="allow", project='YOLOv5' if opt.project == 'runs/train' else Path(opt.project).stem, name=save_dir.stem, id=ckpt.get('wandb_id') if 'ckpt' in locals() else None) loggers = {'wandb': wandb} # loggers dict # Resume start_epoch, best_fitness = 0, 0.0 if pretrained: # Optimizer if ckpt['optimizer'] is not None: optimizer.load_state_dict(ckpt['optimizer']) best_fitness = ckpt['best_fitness'] # Results if ckpt.get('training_results') is not None: with open(results_file, 'w') as file: file.write(ckpt['training_results']) # write results.txt # Epochs start_epoch = ckpt['epoch'] + 1 if opt.resume: assert start_epoch > 0, '%s training to %g epochs is finished, nothing to resume.' % ( weights, epochs) if epochs < start_epoch: logger.info( '%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' % (weights, ckpt['epoch'], epochs)) epochs += ckpt['epoch'] # finetune additional epochs del ckpt, state_dict # Image sizes gs = int(model.stride.max()) # grid size (max stride) nl = model.model[ -1].nl # number of detection layers (used for scaling hyp['obj']) imgsz, imgsz_test = [check_img_size(x, gs) for x in opt.img_size ] # verify imgsz are gs-multiples # DP mode if cuda and rank == -1 and torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) # SyncBatchNorm if opt.sync_bn and cuda and rank != -1: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device) logger.info('Using SyncBatchNorm()') # EMA ema = ModelEMA(model) if rank in [-1, 0] else None # DDP mode if cuda and rank != -1: model = DDP(model, device_ids=[opt.local_rank], output_device=opt.local_rank) # Trainloader dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs, opt, hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect, rank=rank, world_size=opt.world_size, workers=opt.workers, image_weights=opt.image_weights, quad=opt.quad) mlc = np.concatenate(dataset.labels, 0)[:, 0].max() # max label class nb = len(dataloader) # number of batches assert mlc < nc, 'Label class %g exceeds nc=%g in %s. Possible class labels are 0-%g' % ( mlc, nc, opt.data, nc - 1) # Process 0 if rank in [-1, 0]: ema.updates = start_epoch * nb // accumulate # set EMA updates testloader = create_dataloader( test_path, imgsz_test, total_batch_size, gs, opt, # testloader hyp=hyp, cache=opt.cache_images and not opt.notest, rect=True, rank=-1, world_size=opt.world_size, workers=opt.workers, pad=0.5)[0] if not opt.resume: labels = np.concatenate(dataset.labels, 0) c = torch.tensor(labels[:, 0]) # classes # cf = torch.bincount(c.long(), minlength=nc) + 1. # frequency # model._initialize_biases(cf.to(device)) if plots: plot_labels(labels, save_dir, loggers) if tb_writer: tb_writer.add_histogram('classes', c, 0) # Anchors if not opt.noautoanchor: check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz) # Model parameters hyp['cls'] *= nc / 80. # scale hyp['cls'] to class count hyp['obj'] *= imgsz**2 / 640.**2 * 3. / nl # scale hyp['obj'] to image size and output layers model.nc = nc # attach number of classes to model model.hyp = hyp # attach hyperparameters to model model.gr = 1.0 # iou loss ratio (obj_loss = 1.0 or iou) model.class_weights = labels_to_class_weights( dataset.labels, nc).to(device) * nc # attach class weights model.names = names # Start training t0 = time.time() nw = max(round(hyp['warmup_epochs'] * nb), 1000) # number of warmup iterations, max(3 epochs, 1k iterations) # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training maps = np.zeros(nc) # mAP per class results = (0, 0, 0, 0, 0, 0, 0 ) # P, R, [email protected], [email protected], val_loss(box, obj, cls) scheduler.last_epoch = start_epoch - 1 # do not move scaler = amp.GradScaler(enabled=cuda) logger.info('Image sizes %g train, %g test\n' 'Using %g dataloader workers\nLogging results to %s\n' 'Starting training for %g epochs...' % (imgsz, imgsz_test, dataloader.num_workers, save_dir, epochs)) if scheduler is not None and start_epoch > 0: scheduler.step(start_epoch) for epoch in range( start_epoch, num_epochs ): # epoch ------------------------------------------------------------------ model.train() if fb: fb.trigger(model, epoch) # Update image weights (optional) if opt.image_weights: # Generate indices if rank in [-1, 0]: cw = model.class_weights.cpu().numpy() * ( 1 - maps)**2 / nc # class weights iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw) # image weights dataset.indices = random.choices( range(dataset.n), weights=iw, k=dataset.n) # rand weighted idx # Broadcast if DDP if rank != -1: indices = (torch.tensor(dataset.indices) if rank == 0 else torch.zeros(dataset.n)).int() dist.broadcast(indices, 0) if rank != 0: dataset.indices = indices.cpu().numpy() # Update mosaic border # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs) # dataset.mosaic_border = [b - imgsz, -b] # height, width borders mloss = torch.zeros(4, device=device) # mean losses if rank != -1: dataloader.sampler.set_epoch(epoch) pbar = enumerate(dataloader) logger.info( ('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'box', 'obj', 'cls', 'total', 'targets', 'img_size')) if rank in [-1, 0]: pbar = tqdm(pbar, total=nb) # progress bar optimizer.zero_grad() for i, ( imgs, targets, paths, _ ) in pbar: # batch ------------------------------------------------------------- ni = i + nb * epoch # number integrated batches (since train start) imgs = imgs.to(device, non_blocking=True).float( ) / 255.0 # uint8 to float32, 0-255 to 0.0-1.0 # Warmup if ni <= nw: xi = [0, nw] # x interp # model.gr = np.interp(ni, xi, [0.0, 1.0]) # iou loss ratio (obj_loss = 1.0 or iou) accumulate = max( 1, np.interp(ni, xi, [1, nbs / total_batch_size]).round()) for j, x in enumerate(optimizer.param_groups): # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0 x['lr'] = np.interp(ni, xi, [ hyp['warmup_bias_lr'] if j == 2 else 0.0, x['initial_lr'] * scheduler.get_epoch_values(epoch)[1] ]) if 'momentum' in x: x['momentum'] = np.interp( ni, xi, [hyp['warmup_momentum'], hyp['momentum']]) # Multi-scale if opt.multi_scale: sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size sf = sz / max(imgs.shape[2:]) # scale factor if sf != 1: ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:] ] # new shape (stretched to gs-multiple) imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False) # Forward kqat.initialize(model, imgs) with amp.autocast(enabled=cuda): pred = model(imgs) # forward loss, loss_items = compute_loss( pred, targets.to(device), model) # loss scaled by batch_size if rank != -1: loss *= opt.world_size # gradient averaged between devices in DDP mode if opt.quad: loss *= 4. # Backward scaler.scale(loss).backward() # Optimize if ni % accumulate == 0: scaler.step(optimizer) # optimizer.step scaler.update() if fb: fb.collect_batch(model, optimizer) # trigger update at some point if i != 0: fb.trigger_batch(model, epoch, i) optimizer.zero_grad() if ema: ema.update(model) # Print if rank in [-1, 0]: mloss = (mloss * i + loss_items) / (i + 1 ) # update mean losses mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0) # (GB) s = ('%10s' * 2 + '%10.4g' * 6) % ('%g/%g' % (epoch, epochs - 1), mem, *mloss, targets.shape[0], imgs.shape[-1]) pbar.set_description(s) # Plot if plots and ni < 3: f = save_dir / f'train_batch{ni}.jpg' # filename Thread(target=plot_images, args=(imgs, targets, paths, f), daemon=True).start() # if tb_writer: # tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch) # tb_writer.add_graph(model, imgs) # add model to tensorboard elif plots and ni == 3 and wandb: wandb.log({ "Mosaics": [ wandb.Image(str(x), caption=x.name) for x in save_dir.glob('train*.jpg') ] }) # end batch ------------------------------------------------------------------------------------------------ # end epoch ---------------------------------------------------------------------------------------------------- # Scheduler lr = [x['lr'] for x in optimizer.param_groups] # for tensorboard scheduler.step(epoch + 1) # DDP process 0 or single-GPU if rank in [-1, 0]: training_save = save_dir / "{}".format(epoch) training_save.mkdir(parents=True, exist_ok=True) # make dir # mAP if ema: ema.update_attr(model, include=[ 'yaml', 'nc', 'hyp', 'gr', 'names', 'stride', 'class_weights' ]) final_epoch = epoch + 1 == epochs if not opt.notest or final_epoch: # Calculate mAP results, maps, times = test.test( opt.data, batch_size=total_batch_size, imgsz=imgsz_test, model=ema.ema, single_cls=opt.single_cls, dataloader=testloader, save_dir=training_save, plots=plots and final_epoch, log_imgs=opt.log_imgs if wandb else 0, enable_half=False) # Write with open(results_file, 'a') as f: f.write( s + '%10.4g' * 7 % results + '\n') # P, R, [email protected], [email protected], val_loss(box, obj, cls) if len(opt.name) and opt.bucket: os.system('gsutil cp %s gs://%s/results/results%s.txt' % (results_file, opt.bucket, opt.name)) # Log tags = [ 'train/box_loss', 'train/obj_loss', 'train/cls_loss', # train loss 'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95', 'val/box_loss', 'val/obj_loss', 'val/cls_loss', # val loss 'x/lr0', 'x/lr1', 'x/lr2' ] # params for x, tag in zip(list(mloss[:-1]) + list(results) + lr, tags): if tb_writer: tb_writer.add_scalar(tag, x, epoch) # tensorboard if wandb: wandb.log({tag: x}) # W&B # Update best mAP fi = fitness(np.array(results).reshape( 1, -1)) # weighted combination of [P, R, [email protected], [email protected]] if fi > best_fitness: best_fitness = fi # Save model save = (not opt.nosave) or (final_epoch and not opt.evolve) if save: with open(results_file, 'r') as f: # create checkpoint ckpt = { 'epoch': epoch, 'best_fitness': best_fitness, 'training_results': f.read(), 'model': ema.ema, 'optimizer': None if final_epoch else optimizer.state_dict(), 'wandb_id': wandb_run.id if wandb else None } # Save last, best and delete torch.save(ckpt, training_save / "last.pt") if best_fitness == fi: torch.save(ckpt, best) del ckpt # end epoch ---------------------------------------------------------------------------------------------------- # end training if rank in [-1, 0]: # Strip optimizers final = best if best.exists() else last # final model for f in [last, best]: if f.exists(): strip_optimizer(f) # strip optimizers if opt.bucket: os.system(f'gsutil cp {final} gs://{opt.bucket}/weights') # upload # Plots if plots: plot_results(save_dir=save_dir) # save as results.png if wandb: files = [ 'results.png', 'precision_recall_curve.png', 'confusion_matrix.png' ] wandb.log({ "Results": [ wandb.Image(str(save_dir / f), caption=f) for f in files if (save_dir / f).exists() ] }) if opt.log_artifacts: wandb.log_artifact(artifact_or_path=str(final), type='model', name=save_dir.stem) # Test best.pt logger.info('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600)) if opt.data.endswith('coco.yaml') and nc == 80: # if COCO for conf, iou, save_json in ([0.25, 0.45, False], [0.001, 0.65, True]): # speed, mAP tests results, _, _ = test.test(opt.data, batch_size=total_batch_size, imgsz=imgsz_test, conf_thres=conf, iou_thres=iou, model=attempt_load(final, device).half(), single_cls=opt.single_cls, dataloader=testloader, save_dir=save_dir, save_json=save_json, plots=False) else: dist.destroy_process_group() wandb.run.finish() if wandb and wandb.run else None torch.cuda.empty_cache() return results
import contextlib import os import runpy import sys import pytest if sys.version_info >= (3, 4): from pathlib import Path else: # python2.7 compat from _pytest.pathlib import Path examples_dir = Path(__file__, '../../doc/source/examples').resolve() examples = sorted(examples_dir.glob('*.py')) @contextlib.contextmanager def cd(where_to): """ Temporarily change the working directory. Restore the current working dir after exiting the context. """ curr = Path.cwd() try: os.chdir(str(where_to)) yield finally: os.chdir(str(curr))
import re import pdftotext import pandas as pd from pathlib import Path df = pd.DataFrame(columns = ['Progress Note', 'Diagnosis', 'Lab Order', 'Medication', 'Label']) path = Path('../1/') files = path.glob('*.pdf') for file_ in files: label = file_.stem[-1:] with open(file_, 'rb') as f: pdf = pdftotext.PDF(f) text = '\n\n'.join(pdf) with open(f'./output/{file_.stem}.text', 'w') as f: f.write(text) lines = [] lab = [] progress = [] medication = [] diagnosis = [] with open(f'./output/{file_.stem}.text') as f: for line in f.readlines(): lines.append(str(line).strip()) lines = [sub.replace('*** End ***', '') for sub in lines] lines = list(filter(lambda x: not re.match('[0-9]{2}[\-,:][0-9]{2}[\-,:][0-9]{2}', x), lines)) for (i, line) in enumerate(lines): sentence = str(line).strip() if (sentence.startswith('UHID')): lines[i] = ''
def check_udev_rules(): """Make sure the udev rules look good. """ ok = True udev_dir = Path("/etc/udev/rules.d/") desired_rules = { 'atmel-dfu': { _udev_rule("03EB", "2FEF"), # ATmega16U2 _udev_rule("03EB", "2FF0"), # ATmega32U2 _udev_rule("03EB", "2FF3"), # ATmega16U4 _udev_rule("03EB", "2FF4"), # ATmega32U4 _udev_rule("03EB", "2FF9"), # AT90USB64 _udev_rule("03EB", "2FFB") # AT90USB128 }, 'kiibohd': {_udev_rule("1C11", "B007")}, 'stm32': { _udev_rule("1EAF", "0003"), # STM32duino _udev_rule("0483", "DF11") # STM32 DFU }, 'bootloadhid': {_udev_rule("16C0", "05DF")}, 'usbasploader': {_udev_rule("16C0", "05DC")}, 'massdrop': {_udev_rule("03EB", "6124")}, 'caterina': { # Spark Fun Electronics _udev_rule("1B4F", "9203", 'ENV{ID_MM_DEVICE_IGNORE}="1"'), # Pro Micro 3V3/8MHz _udev_rule("1B4F", "9205", 'ENV{ID_MM_DEVICE_IGNORE}="1"'), # Pro Micro 5V/16MHz _udev_rule("1B4F", "9207", 'ENV{ID_MM_DEVICE_IGNORE}="1"'), # LilyPad 3V3/8MHz (and some Pro Micro clones) # Pololu Electronics _udev_rule("1FFB", "0101", 'ENV{ID_MM_DEVICE_IGNORE}="1"'), # A-Star 32U4 # Arduino SA _udev_rule("2341", "0036", 'ENV{ID_MM_DEVICE_IGNORE}="1"'), # Leonardo _udev_rule("2341", "0037", 'ENV{ID_MM_DEVICE_IGNORE}="1"'), # Micro # Adafruit Industries LLC _udev_rule("239A", "000C", 'ENV{ID_MM_DEVICE_IGNORE}="1"'), # Feather 32U4 _udev_rule("239A", "000D", 'ENV{ID_MM_DEVICE_IGNORE}="1"'), # ItsyBitsy 32U4 3V3/8MHz _udev_rule("239A", "000E", 'ENV{ID_MM_DEVICE_IGNORE}="1"'), # ItsyBitsy 32U4 5V/16MHz # dog hunter AG _udev_rule("2A03", "0036", 'ENV{ID_MM_DEVICE_IGNORE}="1"'), # Leonardo _udev_rule("2A03", "0037", 'ENV{ID_MM_DEVICE_IGNORE}="1"') # Micro } } # These rules are no longer recommended, only use them to check for their presence. deprecated_rules = { 'atmel-dfu': {_deprecated_udev_rule("03eb", "2ff4"), _deprecated_udev_rule("03eb", "2ffb"), _deprecated_udev_rule("03eb", "2ff0")}, 'kiibohd': {_deprecated_udev_rule("1c11")}, 'stm32': {_deprecated_udev_rule("1eaf", "0003"), _deprecated_udev_rule("0483", "df11")}, 'bootloadhid': {_deprecated_udev_rule("16c0", "05df")}, 'caterina': {'ATTRS{idVendor}=="2a03", ENV{ID_MM_DEVICE_IGNORE}="1"', 'ATTRS{idVendor}=="2341", ENV{ID_MM_DEVICE_IGNORE}="1"'}, 'tmk': {_deprecated_udev_rule("feed")} } if udev_dir.exists(): udev_rules = [rule_file for rule_file in udev_dir.glob('*.rules')] current_rules = set() # Collect all rules from the config files for rule_file in udev_rules: for line in rule_file.read_text().split('\n'): line = line.strip() if not line.startswith("#") and len(line): current_rules.add(line) # Check if the desired rules are among the currently present rules for bootloader, rules in desired_rules.items(): # For caterina, check if ModemManager is running if bootloader == "caterina": if check_modem_manager(): ok = False cli.log.warn("{bg_yellow}Detected ModemManager without the necessary udev rules. Please either disable it or set the appropriate udev rules if you are using a Pro Micro.") if not rules.issubset(current_rules): deprecated_rule = deprecated_rules.get(bootloader) if deprecated_rule and deprecated_rule.issubset(current_rules): cli.log.warn("{bg_yellow}Found old, deprecated udev rules for '%s' boards. The new rules on https://docs.qmk.fm/#/faq_build?id=linux-udev-rules offer better security with the same functionality.", bootloader) else: cli.log.warn("{bg_yellow}Missing udev rules for '%s' boards. See https://docs.qmk.fm/#/faq_build?id=linux-udev-rules for more details.", bootloader) return ok
def count_wkw_files(mag_path: Path) -> int: return len(list(mag_path.glob("**/x*.wkw")))
# Note: strict=True needs Python >= v3.6 # print(in_) out = Path(args.outdir).resolve() # does not exist yet, so no strict=True if args.force: out.mkdir(exist_ok=True) else: out.mkdir() with open(args.config, 'r') as file: config = json.load(file) model = GenomeModel(args.models, mode='core', norm='l2') genomes = list(in_.glob('[!^\.]*.fasta')) # Don't include hidden files (e.g. the OS index on a Mac) print('Collecting embedding for each genome ...') names, m = [], [] for g in genomes: name = g.name.replace('.fasta', '') try: v = model.embedding[name] names.append(name) m.append(v) except KeyError: continue # names, m = zip(*[(k, v) for k, v in model.embedding.items()]) percent = int(100*len(m)/len(genomes)) print(f'Found {len(m)} embeddings for {len(genomes)} genomes ({percent}%)')
for val in range(2,13): z_val = "{0:02d}".format(val) URL = 'https://www.jma.go.jp/jp/radnowc/imgs/nowcast/205/'+delay_fmt+'-'+z_val+'.png' requestImage = requests.get(URL) print(str(val*5)+"分後の予報降水分布図取得保存完了") with open('/home/a2011529/AreaBroadcast/public/ForecastImage/'+delay_fmt+'-'+z_val+'.png','wb') as f: f.write(requestImage.content) #gif画像フォルダ操作 islist=bool(os.listdir('/home/a2011529/AreaBroadcast/public/NowcastGifImage/')) if islist: p = Path("/home/a2011529/AreaBroadcast/public/NowcastGifImage/") files = list(p.glob("*")) file_updates = {file_path: os.stat(file_path).st_mtime for file_path in files} newst_file_path = max(file_updates, key=file_updates.get) print("最新のファイルパス:") print(newst_file_path) #最新のファイルから番号を抽出 strNewst=str(newst_file_path) splitNewst = strNewst[52:55] print(splitNewst) latestNumber=int(splitNewst) number=latestNumber+1 strNumber=str(number) number_z=strNumber.zfill(3) #書き込み
import yaml from pathlib import Path import re # admux = r'^ucsr[0-9]?[a-z]{1}$' regs_parh = Path('../../avr-registers/registers/') mcus = (str(f.stem).rstrip('.yml') for f in regs_parh.glob('*.yml')) def load(mcu): regs_file = regs_parh / '{}.yml'.format(mcu) with regs_file.open(mode='r') as fobj: yield from yaml.load(fobj, Loader=yaml.FullLoader) regs = set() for m in mcus: for reg in load(m): if reg['name'].startswith('PCMSK'): regs.add(reg['name']) print(regs)
from rdkit import Chem from rdkit.Chem import Descriptors from rdkit.Chem import AllChem from chembl_structure_pipeline import standardizer # get parent path of file from pathlib import Path dir_path = Path(__file__).parent.absolute() all_df = pd.read_csv(dir_path / "../covid_submissions_all_info.csv") # get all csvs from folders received_csv_files = [ f for f in dir_path.glob("**/*.csv") if "all_received_mols.csv" not in str(f) ] smiles_dict = {} for csv_file in received_csv_files: if "xchem" in str(csv_file): try: received_df = pd.read_csv(csv_file) # received_df["SMILES"] = received_df["SMILES"].apply( # lambda x: Chem.MolToSmiles( # Chem.MolFromSmiles( # Chem.MolToSmiles( # standardizer.standardize_mol( # standardizer.get_parent_mol(Chem.MolFromSmiles(x))[ # 0
class PickleShareDB(collections_abc.MutableMapping): """ The main 'connection' object for PickleShare database """ def __init__(self, root): """ Return a db object that will manage the specied directory""" if not isinstance(root, string_types): root = str(root) root = os.path.abspath(os.path.expanduser(root)) self.root = Path(root) if not self.root.is_dir(): # catching the exception is necessary if multiple processes are concurrently trying to create a folder # exists_ok keyword argument of mkdir does the same but only from Python 3.5 try: self.root.mkdir(parents=True) except OSError as e: if e.errno != errno.EEXIST: raise # cache has { 'key' : (obj, orig_mod_time) } self.cache = {} def __getitem__(self, key): """ db['key'] reading """ fil = self.root / key try: mtime = (fil.stat()[stat.ST_MTIME]) except OSError: raise KeyError(key) if fil in self.cache and mtime == self.cache[fil][1]: return self.cache[fil][0] try: # The cached item has expired, need to read with fil.open("rb") as f: obj = pickle.loads(f.read()) except: raise KeyError(key) self.cache[fil] = (obj, mtime) return obj def __setitem__(self, key, value): """ db['key'] = 5 """ fil = self.root / key parent = fil.parent if parent and not parent.is_dir(): parent.mkdir(parents=True) # We specify protocol 2, so that we can mostly go between Python 2 # and Python 3. We can upgrade to protocol 3 when Python 2 is obsolete. with fil.open('wb') as f: pickle.dump(value, f, protocol=2) try: self.cache[fil] = (value, fil.stat().st_mtime) except OSError as e: if e.errno != errno.ENOENT: raise def hset(self, hashroot, key, value): """ hashed set """ hroot = self.root / hashroot if not hroot.is_dir(): hroot.mkdir() hfile = hroot / gethashfile(key) d = self.get(hfile, {}) d.update({key: value}) self[hfile] = d def hget(self, hashroot, key, default=_sentinel, fast_only=True): """ hashed get """ hroot = self.root / hashroot hfile = hroot / gethashfile(key) d = self.get(hfile, _sentinel) #print "got dict",d,"from",hfile if d is _sentinel: if fast_only: if default is _sentinel: raise KeyError(key) return default # slow mode ok, works even after hcompress() d = self.hdict(hashroot) return d.get(key, default) def hdict(self, hashroot): """ Get all data contained in hashed category 'hashroot' as dict """ hfiles = self.keys(hashroot + "/*") hfiles.sort() last = len(hfiles) and hfiles[-1] or '' if last.endswith('xx'): # print "using xx" hfiles = [last] + hfiles[:-1] all = {} for f in hfiles: # print "using",f try: all.update(self[f]) except KeyError: print("Corrupt", f, "deleted - hset is not threadsafe!") del self[f] self.uncache(f) return all def hcompress(self, hashroot): """ Compress category 'hashroot', so hset is fast again hget will fail if fast_only is True for compressed items (that were hset before hcompress). """ hfiles = self.keys(hashroot + "/*") all = {} for f in hfiles: # print "using",f all.update(self[f]) self.uncache(f) self[hashroot + '/xx'] = all for f in hfiles: p = self.root / f if p.name == 'xx': continue p.unlink() def __delitem__(self, key): """ del db["key"] """ fil = self.root / key self.cache.pop(fil, None) try: fil.unlink() except OSError: # notfound and permission denied are ok - we # lost, the other process wins the conflict pass def _normalized(self, p): """ Make a key suitable for user's eyes """ return str(p.relative_to(self.root)).replace('\\', '/') def keys(self, globpat=None): """ All keys in DB, or all keys matching a glob""" if globpat is None: files = self.root.rglob('*') else: files = self.root.glob(globpat) return [self._normalized(p) for p in files if p.is_file()] def __iter__(self): return iter(self.keys()) def __len__(self): return len(self.keys()) def uncache(self, *items): """ Removes all, or specified items from cache Use this after reading a large amount of large objects to free up memory, when you won't be needing the objects for a while. """ if not items: self.cache = {} for it in items: self.cache.pop(it, None) def waitget(self, key, maxwaittime=60): """ Wait (poll) for a key to get a value Will wait for `maxwaittime` seconds before raising a KeyError. The call exits normally if the `key` field in db gets a value within the timeout period. Use this for synchronizing different processes or for ensuring that an unfortunately timed "db['key'] = newvalue" operation in another process (which causes all 'get' operation to cause a KeyError for the duration of pickling) won't screw up your program logic. """ wtimes = [0.2] * 3 + [0.5] * 2 + [1] tries = 0 waited = 0 while 1: try: val = self[key] return val except KeyError: pass if waited > maxwaittime: raise KeyError(key) time.sleep(wtimes[tries]) waited += wtimes[tries] if tries < len(wtimes) - 1: tries += 1 def getlink(self, folder): """ Get a convenient link for accessing items """ return PickleShareLink(self, folder) def __repr__(self): return "PickleShareDB('%s')" % self.root
def rawmapper(rawfolder, outfolder: Path=Path(), sessions: tuple=(), rename: bool=False, dicomfield: tuple=('PatientComments',), wildcard: str='*', subprefix: str='sub-', sesprefix: str='ses-', dryrun: bool=False) -> None: """ :param rawfolder: The root folder-name of the sub/ses/data/file tree containing the source data files :param outfolder: The name of the folder where the mapping-file is saved (default = sourcefolder) :param sessions: Space separated list of selected sub-#/ses-# names / folders to be processed. Otherwise all sessions in the bidsfolder will be selected :param rename: Flag for renaming the sub-subid folders to sub-dicomfield :param dicomfield: The names of the dicomfields that are mapped (/ renamed to sub-dcmval/ses-dcmval) :param wildcard: The Unix style pathname pattern expansion that is used by glob to select the series from which the dicomfield is being mapped :param subprefix: The prefix common for all source subject-folders :param sesprefix: The prefix common for all source session-folders :param dryrun: Flag for dry-running renaming the sub-subid folders :return: Nothing """ # Input checking rawfolder = Path(rawfolder).resolve() if not outfolder: outfolder = rawfolder print(f"Outfolder: {outfolder}") outfolder = Path(outfolder).resolve() # Write the header of the mapper logfile mapperfile = outfolder/f"rawmapper_{'_'.join(dicomfield)}.tsv" if not dryrun: if rename and not mapperfile.is_file(): # Write the header once with mapperfile.open('w') as fid: fid.write('subid\tsesid\tnewsubid\tnewsesid\n') else: # Write the header once with mapperfile.open('w') as fid: fid.write('subid\tsesid\tseriesname\t{}\n'.format('\t'.join(dicomfield))) # Map the sessions in the sourcefolder if not sessions: sessions = list(rawfolder.glob(f"{subprefix}*/{sesprefix}*")) if not sessions: sessions = list(rawfolder.glob(f"{subprefix}*")) # Try without session-subfolders else: sessions = [sessionitem for session in sessions for sessionitem in rawfolder.glob(session)] sessions = [session for session in sessions if session.is_dir()] # Loop over the selected sessions in the sourcefolder for session in sessions: # Get the subject and session identifiers from the sub/ses session folder subid, sesid = bids.get_subid_sesid(session/'dum.my', subprefix=subprefix, sesprefix=sesprefix) subid = subid.replace('sub-', subprefix) sesid = sesid.replace('ses-', sesprefix) # Parse the new subject and session identifiers from the dicomfield series = bids.lsdirs(session, wildcard) if not series: series = '' dcmval = '' else: series = series[0] # NB: Assumes the first folder contains a dicom file and that all folders give the same info dcmval = '' for dcmfield in dicomfield: dcmval = dcmval + '/' + str(bids.get_dicomfield(dcmfield, bids.get_dicomfile(series))) dcmval = dcmval[1:] # Rename the session subfolder in the sourcefolder and print & save this info if rename: # Get the new subid and sesid if not dcmval or dcmval=='None': warnings.warn(f"Skipping renaming because the dicom-field was empty for: {session}") continue else: if '/' in dcmval: # Allow for different sub/ses delimiters that could be entered at the console (i.e. in PatientComments) delim = '/' elif '\\' in dcmval: delim = '\\' else: delim = '\r\n' newsubsesid = [val for val in dcmval.split(delim) if val] # Skip empty lines / entries newsubid = subprefix + bids.cleanup_value(re.sub(f'^{subprefix}', '', newsubsesid[0])) if newsubid==subprefix or newsubid==subprefix+'None': newsubid = subid warnings.warn(f"Could not rename {subid} because the dicom-field was empty for: {session}") if len(newsubsesid)==1: newsesid = sesid elif len(newsubsesid)==2: newsesid = sesprefix + bids.cleanup_value(re.sub(f'^{sesprefix}', '', newsubsesid[1])) else: warnings.warn(f"Skipping renaming of {session} because the dicom-field '{dcmval}' could not be parsed into [subid, sesid]") continue if newsesid==sesprefix or newsesid==subprefix+'None': newsesid = sesid warnings.warn(f"Could not rename {sesid} because the dicom-field was empty for: {session}") # Save the dicomfield / sub-ses mapping in the mapper logfile and rename the session subfolder (but skip if it already exists) newsession = rawfolder/newsubid/newsesid print(f"{session} -> {newsession}") if newsession == session: continue if newsession.is_dir(): warnings.warn(f"{newsession} already exists, skipping renaming of {session}") elif not dryrun: with mapperfile.open('a') as fid: fid.write(f"{subid}\t{sesid}\t{newsubid}\t{newsesid}\n") if sesid and newsesid != sesid: (rawfolder/subid/sesid).rename(rawfolder/subid/newsesid) if newsubid != subid: (rawfolder/subid).rename(rawfolder/newsubid) # Print & save the dicom values in the mapper logfile else: print('{}/{}/{}\t-> {}'.format(subid, sesid, series.name, '\t'.join(dcmval.split('/')))) if not dryrun: with mapperfile.open('a') as fid: fid.write('{}\t{}\t{}\t{}\n'.format(subid, sesid, series.name, '\t'.join(dcmval.split('/'))))
# Arguments root = Path("./") # Fixed parameters typ = "linear" p = 200 # only needed for logistic setting def sortf(x): return float(x.stem.split("_")[-1]) if typ.startswith("log"): files = root.glob(f"./MNARlog/Simulation*p{p}*.pkl") Marfiles = root.glob(f"./MARlog/MARSimulation*p{p}*.pkl") else: files = root.glob(f"./MNARlinear/Simulation*p{p}*.pkl") Marfiles = root.glob(f"./MARlinear/MARSimulation*p{p}*.pkl") files = list(files) files.sort(key=sortf) files = np.array(files) #[[0, 1, 2, 4, 6]] Marfiles = list(Marfiles) Marfiles.sort(key=sortf) Berrs = [] Terrs = [] MarBerrs = [] MarTerrs = [] xlist = []
def check_udev_rules(): """Make sure the udev rules look good. """ rc = CheckStatus.OK udev_dir = Path("/etc/udev/rules.d/") desired_rules = { 'atmel-dfu': { _udev_rule("03eb", "2fef"), # ATmega16U2 _udev_rule("03eb", "2ff0"), # ATmega32U2 _udev_rule("03eb", "2ff3"), # ATmega16U4 _udev_rule("03eb", "2ff4"), # ATmega32U4 _udev_rule("03eb", "2ff9"), # AT90USB64 _udev_rule("03eb", "2ffb") # AT90USB128 }, 'kiibohd': {_udev_rule("1c11", "b007")}, 'stm32': { _udev_rule("1eaf", "0003"), # STM32duino _udev_rule("0483", "df11") # STM32 DFU }, 'bootloadhid': {_udev_rule("16c0", "05df")}, 'usbasploader': {_udev_rule("16c0", "05dc")}, 'massdrop': {_udev_rule("03eb", "6124", 'ENV{ID_MM_DEVICE_IGNORE}="1"')}, 'caterina': { # Spark Fun Electronics _udev_rule("1b4f", "9203", 'ENV{ID_MM_DEVICE_IGNORE}="1"'), # Pro Micro 3V3/8MHz _udev_rule("1b4f", "9205", 'ENV{ID_MM_DEVICE_IGNORE}="1"'), # Pro Micro 5V/16MHz _udev_rule("1b4f", "9207", 'ENV{ID_MM_DEVICE_IGNORE}="1"'), # LilyPad 3V3/8MHz (and some Pro Micro clones) # Pololu EleCTRONICS _udev_rule("1ffb", "0101", 'ENV{ID_MM_DEVICE_IGNORE}="1"'), # A-Star 32U4 # Arduino SA _udev_rule("2341", "0036", 'ENV{ID_MM_DEVICE_IGNORE}="1"'), # Leonardo _udev_rule("2341", "0037", 'ENV{ID_MM_DEVICE_IGNORE}="1"'), # Micro # Adafruit INDUSTRIES llC _udev_rule("239a", "000c", 'ENV{ID_MM_DEVICE_IGNORE}="1"'), # Feather 32U4 _udev_rule("239a", "000d", 'ENV{ID_MM_DEVICE_IGNORE}="1"'), # ItsyBitsy 32U4 3V3/8MHz _udev_rule("239a", "000e", 'ENV{ID_MM_DEVICE_IGNORE}="1"'), # ItsyBitsy 32U4 5V/16MHz # dog hunter ag _udev_rule("2a03", "0036", 'ENV{ID_MM_DEVICE_IGNORE}="1"'), # Leonardo _udev_rule("2a03", "0037", 'ENV{ID_MM_DEVICE_IGNORE}="1"') # Micro } } # These rules are no longer recommended, only use them to check for their presence. deprecated_rules = { 'atmel-dfu': {_deprecated_udev_rule("03eb", "2ff4"), _deprecated_udev_rule("03eb", "2ffb"), _deprecated_udev_rule("03eb", "2ff0")}, 'kiibohd': {_deprecated_udev_rule("1c11")}, 'stm32': {_deprecated_udev_rule("1eaf", "0003"), _deprecated_udev_rule("0483", "df11")}, 'bootloadhid': {_deprecated_udev_rule("16c0", "05df")}, 'caterina': {'ATTRS{idVendor}=="2a03", ENV{ID_MM_DEVICE_IGNORE}="1"', 'ATTRS{idVendor}=="2341", ENV{ID_MM_DEVICE_IGNORE}="1"'}, 'tmk': {_deprecated_udev_rule("feed")} } if udev_dir.exists(): udev_rules = [rule_file for rule_file in udev_dir.glob('*.rules')] current_rules = set() # Collect all rules from the config files for rule_file in udev_rules: for line in rule_file.read_text().split('\n'): line = line.strip() if not line.startswith("#") and len(line): current_rules.add(line) # Check if the desired rules are among the currently present rules for bootloader, rules in desired_rules.items(): if not rules.issubset(current_rules): deprecated_rule = deprecated_rules.get(bootloader) if deprecated_rule and deprecated_rule.issubset(current_rules): cli.log.warning("{fg_yellow}Found old, deprecated udev rules for '%s' boards. The new rules on https://docs.qmk.fm/#/faq_build?id=linux-udev-rules offer better security with the same functionality.", bootloader) else: # For caterina, check if ModemManager is running if bootloader == "caterina": if check_modem_manager(): rc = CheckStatus.WARNING cli.log.warning("{fg_yellow}Detected ModemManager without the necessary udev rules. Please either disable it or set the appropriate udev rules if you are using a Pro Micro.") rc = CheckStatus.WARNING cli.log.warning("{fg_yellow}Missing or outdated udev rules for '%s' boards. Run 'sudo cp %s/util/udev/50-qmk.rules /etc/udev/rules.d/'.", bootloader, QMK_FIRMWARE) else: cli.log.warning("{fg_yellow}'%s' does not exist. Skipping udev rule checking...", udev_dir) return rc
def test_load_and_save_file(self): run_id = uuid.uuid4() save_path = Path(self.test_dir.name) / str(run_id) save_path.mkdir() datapath_object = MLObject() datapath_object.set_type('0.0.1', MLSchemaTypes.DATAPATH) datapath_object.run_id = run_id datapath_object.step_id = uuid.uuid4() datapath_object.run_date = datetime.datetime.now() datapath_object.data_store = None # This is an intentional bug # This is an intentional bug (Should be AWS_BLOB) datapath_object.storage_connection_type = 'AWS_BLOB_OBJECT' datapath_object.connection.endpoint = None # Another intentional bug datapath_object.connection.access_key_id = 'AKIAIOSFODNN7EXAMPLE' datapath_object.connection.secret_access_key = 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY' response, errors = datapath_object.save(save_path) self.assertFalse(response) self.assertTrue(len(errors) == 3) self.assertTrue(len(list(Path(save_path).glob('*'))) == 0) datapath_object.storage_connection_type = 'AWS_BLOB' response, errors = datapath_object.save(save_path) self.assertFalse(response) self.assertTrue(len(errors) == 2) self.assertTrue(len(list(Path(save_path).glob('*'))) == 0) datapath_object.connection.endpoint = 'http://s3.amazon.com/BUCKET' response, errors = datapath_object.save(save_path) self.assertFalse(response) self.assertTrue(len(errors) == 1) self.assertTrue(len(list(Path(save_path).glob('*'))) == 0) datapath_object.data_store = 'BUCKET NAME' response, errors = datapath_object.save(save_path) self.assertTrue(response) self.assertTrue(len(errors) == 0) path = Path(save_path) all_files = list(path.glob('*')) self.assertTrue(len(all_files) == 1) ml_object, errors = MLObject.create_object_from_file(all_files[0]) self.assertTrue(len(ml_object) == 13) self.assertTrue(len(errors) == 0) self.assertTrue(datapath_object.data_store == ml_object.data_store) self.assertTrue(datapath_object.storage_connection_type == ml_object.storage_connection_type) self.assertTrue(datapath_object.connection.endpoint == ml_object.connection.endpoint)
def psn_qa_results(path): """Create qa results from a PsN qa run :param path: Path to PsN qa run directory :return: A :class:`QAResults` object """ path = Path(path) original_model = Model(path / 'linearize_run' / 'scm_dir1' / 'derivatives.mod') base_path = list(path.glob('*_linbase.mod'))[0] base_model = Model(base_path) fullblock_path = path / 'modelfit_run' / 'fullblock.mod' if fullblock_path.is_file(): fullblock_model = Model(fullblock_path) else: fullblock_model = None boxcox_path = path / 'modelfit_run' / 'boxcox.mod' if boxcox_path.is_file(): boxcox_model = Model(boxcox_path) else: boxcox_model = None tdist_path = path / 'modelfit_run' / 'tdist.mod' if tdist_path.is_file(): tdist_model = Model(tdist_path) else: tdist_model = None addetas_path = path / 'add_etas_run' / 'add_etas_linbase.mod' if addetas_path.is_file(): addetas_model = Model(addetas_path) else: addetas_model = None iov_path = path / 'modelfit_run' / 'iov.mod' if iov_path.is_file(): iov_model = Model(iov_path) else: iov_model = None frem_path = path / 'frem_run' / 'results.json' if frem_path.is_file(): frem_res = read_results(frem_path) else: frem_res = None cdd_path = path / 'cdd_run' / 'results.json' if cdd_path.is_file(): cdd_res = read_results(cdd_path) else: cdd_res = None scm_path = path / 'scm_run' / 'results.json' if scm_path.is_file(): scm_res = read_results(scm_path) else: scm_res = None simeval_path = path / 'simeval_run' / 'results.json' if simeval_path.is_file(): simeval_res = read_results(simeval_path) else: simeval_res = None args = psn_helpers.options_from_command(psn_helpers.psn_command(path)) if 'add_etas' not in args: etas_added_to = None else: etas_added_to = args['add_etas'].split(',') idv = args.get('resmod_idv', 'TIME') resmod_idv_path = path / f'resmod_{idv}' / 'results.json' if resmod_idv_path.is_file(): resmod_idv_res = read_results(resmod_idv_path) else: resmod_idv_res = None res = calculate_results( original_model, base_model, fullblock_model=fullblock_model, boxcox_model=boxcox_model, tdist_model=tdist_model, add_etas_model=addetas_model, iov_model=iov_model, etas_added_to=etas_added_to, frem_results=frem_res, cdd_results=cdd_res, scm_results=scm_res, simeval_results=simeval_res, resmod_idv_results=resmod_idv_res, ) bias = read_results_summary(path) res.structural_bias = bias return res
def main(): global args global cols global genz parser = argparse.ArgumentParser() parser.add_argument('-k', '--keyboard', action='store_true', help='break to interactive keyboard at certain points') parser.add_argument('-c', '--cclass', action='store', default=None, nargs='+', help='select only components with this component class (cclass)') parser.add_argument('-f', '--fabric', action='store', default=None, nargs='+', help='select only components with this fabric number') parser.add_argument('-g', '--gcid', action='store', default=None, nargs='+', help='select only components with this GCID') parser.add_argument('-s', '--serial', action='store', default=None, nargs='+', help='select only components with this serial number') parser.add_argument('-u', '--cuuid', action='store', default=None, nargs='+', help='select only components with this class UUID (cuuid)') parser.add_argument('-v', '--verbosity', action='count', default=0, help='increase output verbosity') parser.add_argument('-F', '--fake-root', action='store', help='fake root directory') parser.add_argument('-G', '--genz-version', choices=['1.1'], default='1.1', help='Gen-Z spec version of Control Space structures') parser.add_argument('-P', '--post_mortem', action='store_true', help='enter debugger on uncaught exception') parser.add_argument('-S', '--struct', action='store', help='input file representing a single control structure') parser.add_argument('operations', type=str, nargs='+', help='the read/write operations to perform') args = parser.parse_args() if args.verbosity > 5: print('Gen-Z version = {}'.format(args.genz_version)) genz = import_module('genz.genz_{}'.format(args.genz_version.replace('.', '_'))) map = genz.ControlStructureMap() if args.keyboard: set_trace() if args.struct: fpath = Path(args.struct) struct = get_struct(fpath, map, verbosity=args.verbosity) print(struct) return try: match_fabrics = [] if args.fabric is None else [int(f, base=10) for f in args.fabric] except ValueError: print('invalid fabric number: {}'.format(args.fabric)) exit(1) try: match_gcids = [] if args.gcid is None else [GCID(str=g) for g in args.gcid] except ValueError: print('invalid GCID: {}'.format(args.gcid)) exit(1) try: match_serials = [] if args.serial is None else [int(s, base=0) for s in args.serial] except ValueError: print('invalid serial number: {}'.format(args.serial)) exit(1) try: match_cuuids = [] if args.cuuid is None else [UUID(u) for u in args.cuuid] except ValueError: print('invalid class uuid: {}'.format(args.cuuid)) exit(1) try: # Revisit: allow cclass names match_cclasses = [] if args.cclass is None else [int(c, base=0) for c in args.cclass] except ValueError: print('invalid cclass number: {}'.format(args.cclass)) exit(1) ops = [Operation(op) for op in args.operations] if args.fake_root is not None: sys_devices = Path(args.fake_root) / 'sys/devices' else: sys_devices = Path('/sys/devices') dev_fabrics = sys_devices.glob('genz*') # locally-visible Gen-Z devices genz_fabrics = Path('/sys/bus/genz/fabrics') # fabric components (FM-only) all_comps = {} for fab in dev_fabrics: comps = {} fabnum = component_num(fab) bridges = fab.glob('bridge*') # local bridges for br_path in bridges: brnum = component_num(br_path) br = Comp(fabnum, br_path, map=map, name='bridge{}'.format(brnum), verbosity=args.verbosity) if args.keyboard: set_trace() selected = br.check_selected(args, match_cuuids, match_serials, match_fabrics, match_gcids, match_cclasses) if selected: comps[br.cuuid_sn] = br # save br for later processing # end for br_path fab_comps = genz_fabrics.glob('fabric{}/*:*/*:*:*'.format(fabnum)) # FM-visible components for comp_path in fab_comps: comp = Comp(fabnum, comp_path, map=map, verbosity=args.verbosity) if args.keyboard: set_trace() selected = comp.check_selected(args, match_cuuids, match_serials, match_fabrics, match_gcids, match_cclasses) if selected: if not comp.cuuid_sn in comps.keys(): comps[comp.cuuid_sn] = comp # save comp for later processing if args.verbosity < 1: continue # end for comp os_comps = fab.glob('*:*/*:*:*') # other OS-visible Gen-Z devices for comp_path in os_comps: comp = Comp(fabnum, comp_path, verbosity=args.verbosity) if args.keyboard: set_trace() selected = comp.check_selected(args, match_cuuids, match_serials, match_fabrics, match_gcids, match_cclasses) if not selected: continue # end for comp_path if args.keyboard: set_trace() # now we actually process things for comp in sorted(comps.values()): if comp.cuuid_sn not in all_comps.keys(): drs = comp.set_comp(ops) # Revisit: how does user name a DR component? #for dr in drs: # _ = dr.ls_comp(ignore_dr=False) # end for comp all_comps |= comps # end for fab if args.keyboard: set_trace() return