def create_wordtranscriptions_wsj(scp_files, wsj_dirs, word_transcriptions): transcriptions = {} delete_pattern = re.compile("(?<![\\\])[\(\)!:]") for file in itertools.chain( glob.iglob(os.path.join(wsj_dirs[0], 'transcrp', 'dots') + '/*/*/*.dot'), glob.iglob(wsj_dirs[0] + '/si_et_*/*/*.dot'), glob.iglob(os.path.join(wsj_dirs[1], 'trans', 'wsj1') + '/*/*/*.dot')): for line in open(file): parts = line.split() transcription = [re.sub(delete_pattern, '', trans.lower().lstrip('(-~').rstrip(')~').replace('*', '')) for trans in parts[0:len(parts) - 1] if trans is not '.'] file = parts[len(parts) -1][1:9].lower() transcriptions[file] = transcription with open(word_transcriptions, 'w') as transcriptions_file: print >> transcriptions_file, "#!MLF!#" for scp_file in scp_files: for line in open(scp_file): name = os.path.splitext(os.path.basename(line.rstrip()))[0] if not transcriptions.has_key(name): sys.exit("No transcription found for %s" % name) print >> transcriptions_file, '"*/%s.mfc"' % name print >> transcriptions_file, '<s>' for word in transcriptions[name]: if not word.startswith('[') and not word.startswith('<') and not word.endswith('-'): if word.startswith('"'): print >> transcriptions_file, "\%s" % word elif len(word) > 0: print >> transcriptions_file, word print >> transcriptions_file, '</s>' print >> transcriptions_file, '.'
def extended_iglob(pattern): if '{' in pattern: m = re.match('(.*){([^}]+)}(.*)', pattern) if m: before, switch, after = m.groups() for case in switch.split(','): for path in extended_iglob(before + case + after): yield path return if '**/' in pattern: seen = set() first, rest = pattern.split('**/', 1) if first: first = iglob(first+'/') else: first = [''] for root in first: for path in extended_iglob(join_path(root, rest)): if path not in seen: seen.add(path) yield path for path in extended_iglob(join_path(root, '*', '**/' + rest)): if path not in seen: seen.add(path) yield path else: for path in iglob(pattern): yield path
def add_test_methods(test_class): ignored = set(glob.iglob(ignore_glob)) for filename in glob.iglob(os.path.join(basedir, tests_glob)): if filename in ignored: continue validating, _ = os.path.splitext(os.path.basename(filename)) id = itertools.count(1) with open(filename) as test_file: for case in json.load(test_file): for test in case["tests"]: name = "test_%s_%s_%s" % ( validating, next(id), re.sub(r"[\W ]+", "_", test["description"]), ) assert not hasattr(test_class, name), name test_case = make_case( data=test["data"], schema=case["schema"], valid=test["valid"], name=name, ) test_case = maybe_skip(skip, test_case, case) setattr(test_class, name, test_case) return test_class
def create_wordtranscriptions_dsp_eng(scp_files, dsp_eng_dir, word_transcriptions): transcriptions = {} for file in itertools.chain(glob.iglob(dsp_eng_dir + '/*/*.txt'),glob.iglob(dsp_eng_dir + '/*/*/*.txt')): id = os.path.splitext(os.path.basename(file))[0].replace('_','') trans = [] for line in open(file): nline = line.replace('.', '').replace(',', '').lower() trans.extend(nline.split()) transcriptions[id] = trans with open(word_transcriptions, 'w') as transcriptions_file: print >> transcriptions_file, "#!MLF!#" for scp_file in scp_files: for line in open(scp_file): name = os.path.splitext(os.path.basename(line.rstrip()))[0] n_name = name.replace('_', '') if not transcriptions.has_key(name): sys.exit("No transcription found for %s" % name) print >> transcriptions_file, '"*/%s.mfc"' % n_name print >> transcriptions_file, '<s>' for word in transcriptions[name]: if not word.startswith('[') and not word.startswith('<') and not word.endswith('-'): if word.startswith('"'): print >> transcriptions_file, "\%s" % word elif len(word) > 0: print >> transcriptions_file, word print >> transcriptions_file, '</s>' print >> transcriptions_file, '.'
def organize_files(files): # me voy a la carpeta con los datos chdir(files.pathout) folder = "cmwp_idea_pole_figures" call(["mkdir", folder]) for datafile in iglob("*.mtex"): if existe(folder + datafile): print("Error moving .mtex file: %s already exists in destination" % datafile) else: move(datafile, folder) folder = "cmwp_idea_files" call(["mkdir", folder]) for datafile in iglob(files.input_file + "*"): if existe(folder + datafile): print("Error moving files into %s: %s already exists in destination" % (folder, datafile)) else: move(datafile, folder) # me voy a la carpeta con todos los resultados del ajuste results = files.results_folder + files.pathout chdir(results) folder = "cmwp_idea_fit_files" call(["mkdir", folder]) for datafile in iglob(files.input_file + "*"): if existe(folder + datafile): print("Error moving files into %s: %s already exists in destination" % (folder, datafile)) else: move(datafile, folder)
def iter_paths(prefix=None, from_dir=None, only_dirs=False): if prefix: start_at = os.path.expandvars(os.path.expanduser(prefix)) # TODO: implement env var completion. if not prefix.startswith(('%', '$', '~')): start_at = os.path.join(from_dir, prefix) start_at = os.path.expandvars(os.path.expanduser(start_at)) prefix_split = os.path.split(prefix) prefix_len = len(prefix_split[1]) if ('/' in prefix and not prefix_split[0]): prefix_len = 0 for path in glob.iglob(start_at + '*'): if not only_dirs or os.path.isdir(path): suffix = ('/' if os.path.isdir(path) else '') item = os.path.split(path)[1] yield prefix + (item + suffix)[prefix_len:] else: prefix = from_dir start_at = os.path.expandvars(os.path.expanduser(prefix)) stuff = glob.iglob(start_at + "*") for path in glob.iglob(start_at + '*'): if not only_dirs or os.path.isdir(path): yield path[len(start_at):] + ('' if not os.path.isdir(path) else '/')
def gen_tar(rundir, db, instids, module, cname, solvers, user="******", verbose=False): prepdir = '.tmp_{0}'.format(rundir) if not os.path.exists(prepdir): os.makedirs(prepdir) else: raise IOError("File preparation directory {0} already exists".format( prepdir)) max_time = 60 * 60 * 5 # 5 hours remotehome = batlab_base_dir_template.format(user=user) nfiles = _gen_files(prepdir, os.path.basename(db), instids, module, cname, solvers, remotehome, max_time=max_time, verbose=verbose) subfiles = glob.iglob(os.path.join(prepdir, '*.sub')) shfiles = glob.iglob(os.path.join(prepdir, '*.sh')) nfiles += 1 # add db if verbose: print("tarring {0} files".format(nfiles)) tarname = "{0}.tar.gz".format(rundir) with tarfile.open(tarname, 'w:gz') as tar: tar.add(db, arcname="{0}/{1}".format(rundir, os.path.basename(db))) for f in subfiles: basename = os.path.basename(f) tar.add(f, arcname="{0}/{1}".format(rundir, basename)) for f in shfiles: basename = os.path.basename(f) tar.add(f, arcname="{0}/{1}".format(rundir, basename)) shutil.rmtree(prepdir) return tarname
def check_all_input_data(case): ############################################################################### success = check_input_data(case=case, download=True) expect(success, "Failed to download input data") get_refcase = case.get_value("GET_REFCASE") run_type = case.get_value("RUN_TYPE") continue_run = case.get_value("CONTINUE_RUN") # We do not fully populate the inputdata directory on every # machine and do not expect every user to download the 3TB+ of # data in our inputdata repository. This code checks for the # existence of inputdata in the local inputdata directory and # attempts to download data from the server if it's needed and # missing. if get_refcase and run_type != "startup" and not continue_run: din_loc_root = case.get_value("DIN_LOC_ROOT") run_refdate = case.get_value("RUN_REFDATE") run_refcase = case.get_value("RUN_REFCASE") run_refdir = case.get_value("RUN_REFDIR") rundir = case.get_value("RUNDIR") refdir = os.path.join(din_loc_root, run_refdir, run_refcase, run_refdate) expect(os.path.isdir(refdir), """ ***************************************************************** prestage ERROR: $refdir is not on local disk obtain this data from the svn input data repository > mkdir -p %s > cd %s > cd .. > svn export --force https://svn-ccsm-inputdata.cgd.ucar.edu/trunk/inputdata/%s or set GET_REFCASE to FALSE in env_run.xml and prestage the restart data to $RUNDIR manually *****************************************************************""" % (refdir, refdir, refdir)) logger.info(" - Prestaging REFCASE (%s) to %s" % (refdir, rundir)) # prestage the reference case's files. if (not os.path.exists(rundir)): logger.debug("Creating run directory: %s"%rundir) os.makedirs(rundir) for rcfile in glob.iglob(os.path.join(refdir,"*%s*"%run_refcase)): logger.debug("Staging file %s"%rcfile) rcbaseline = os.path.basename(rcfile) if not os.path.exists("%s/%s" % (rundir, rcbaseline)): os.symlink(rcfile, "%s/%s" % ((rundir, rcbaseline))) # copy the refcases' rpointer files to the run directory for rpointerfile in glob.iglob(os.path.join("%s","*rpointer*") % (refdir)): logger.debug("Copy rpointer %s"%rpointerfile) shutil.copy(rpointerfile, rundir) for cam2file in glob.iglob(os.path.join("%s","*.cam2.*") % rundir): camfile = cam2file.replace("cam2", "cam") os.symlink(cam2file, camfile)
def run(self, input): jobdir = input['CreateJobDirectory']['JobDir'] jobs = input['MonitorJobs']['LSFJobIDs'] job_status = input['MonitorJobs']['LSFJobStatus'] actions = {'FilesToCompress':{'Files':[]},'FilesToClean':{'Files':[]}} actions['FilesToClean']['Files'].append(input['ExpandConfig']['ExpandedFullCFG']) if input.has_key('RunTestEvents'): actions['FilesToClean']['Files'].append(input['RunTestEvents']['TestCFG']) for rt in glob.iglob('%s/*.root' % jobdir): actions['FilesToClean']['Files'].append(rt) for pyc in glob.iglob('%s/*.pyc' % jobdir): actions['FilesToClean']['Files'].append(pyc) for j in jobs: status = job_status[j] if os.path.exists(status) and not status.endswith('.gz'): actions['FilesToCompress']['Files'].append(status) compress = GZipFiles(self.dataset,self.user,self.options) compressed = compress.run(actions) clean = CleanFiles(self.dataset,self.user,self.options) removed = clean.run(actions) return {'Cleaned':removed,'Compressed':compressed}
def windows_package(args): pkgfile = 'windows_package.7z' pkgdir = os.path.abspath('windows_package') logging.info("Packaging libraries and headers in package: %s", pkgfile) j = os.path.join pkgdir_lib = os.path.abspath(j(pkgdir, 'lib')) with remember_cwd(): os.chdir(args.output) logging.info("Looking for static libraries and dlls in: \"%s", os.getcwd()) libs = list(glob.iglob('**/*.lib', recursive=True)) dlls = list(glob.iglob('**/*.dll', recursive=True)) os.makedirs(pkgdir_lib, exist_ok=True) for lib in libs: logging.info("packing lib: %s", lib) shutil.copy(lib, pkgdir_lib) for dll in dlls: logging.info("packing dll: %s", dll) shutil.copy(dll, pkgdir_lib) os.chdir(get_mxnet_root()) logging.info('packing python bindings') copy_tree('python', j(pkgdir, 'python')) logging.info('packing headers') copy_tree('include', j(pkgdir, 'include')) logging.info("Compressing package: %s", pkgfile) check_call(['7z', 'a', pkgfile, pkgdir])
def import_vectormap(fname, layer=''): """Import a vector layer into GRASS. Uses grass_safe to convert filename into a layer name if none is provided. NOTE: snap and min_area values are hardcoded and may not be appropriate for all projects @output: layer(str): layer name TODO: not sure why making this function so complicated. This is a legacy. """ if not layer: layer = grass_safe(fname) # remove temporary previously projected shape files for f in iglob('proj.*'): os.remove(f) proj = grass.read_command('g.proj', flags='wf') check_call(['ogr2ogr', '-t_srs', proj, 'proj.shp', fname]) clean_fields('proj.shp') if grass.run_command('v.in.ogr', flags='w', dsn='proj.shp', snap='0.01', output=layer, overwrite=True, quiet=True): raise RuntimeError('unable to import vectormap ' + fname) for f in iglob('proj.*'): os.remove(f) return layer
def extended_iglob(pattern): if "{" in pattern: m = re.match("(.*){([^}]+)}(.*)", pattern) if m: before, switch, after = m.groups() for case in switch.split(","): for path in extended_iglob(before + case + after): yield path return if "**/" in pattern: seen = set() first, rest = pattern.split("**/", 1) if first: first = iglob(first + "/") else: first = [""] for root in first: for path in extended_iglob(join_path(root, rest)): if path not in seen: seen.add(path) yield path for path in extended_iglob(join_path(root, "*", "**/" + rest)): if path not in seen: seen.add(path) yield path else: for path in iglob(pattern): yield path
def main(): arg_parser = argparse.ArgumentParser() arg_parser.add_argument('directory') args = arg_parser.parse_args() filenames = itertools.chain( glob.iglob(args.directory + '/*.gz'), glob.iglob(args.directory + '/*/*.gz'), ) for filename in filenames: with gzip.GzipFile(filename, mode='rb') as in_file: for line in in_file: class_, id_ = line.decode('utf8').strip().split('|', 1) if class_ == 'Album': if id_.startswith('fotopedia'): locale, name = id_.split('-', 2)[1:] name = urllib.parse.quote(name, encoding='utf8', safe='/!$&()*+,:;=@[]~\'') print('wiki:{0}:{1}'.format(locale, name)) else: print('album:{0}'.format(id_)) elif class_ == 'MUser': print('user:{0}'.format(id_)) elif class_ == 'Story': print('story:{0}'.format(id_)) elif class_ == 'Picture': print('photo:{0}'.format(id_)) else: raise Exception('Unknown type {0} {1}'.format(class_, id_))
def run(self): common.print_verbose("Running " + self.name + " action") exit_code = 0 for test_dir in glob.iglob('**/test', recursive=True): original_working_directory = os.getcwd() run_directory = os.path.join(original_working_directory, str(test_dir)) common.print_info("Running tests in " + str(run_directory)) common.print_verbose("Changing directory to " + str(run_directory)) os.chdir(run_directory) tests = [] for filename in glob.iglob('**/*.py', recursive=True): tests.append(filename) command = ['/usr/local/bin/python3', '-m', 'unittest'] command.extend(tests) subprocess_exit_code, output = common.run_command(command) if subprocess_exit_code != common.SUCCESS: exit_code = common.FAILED common.print_verbose(output) common.continue_if_failed(subprocess_exit_code, output) common.print_verbose("Changing directory to " + str(original_working_directory)) os.chdir(original_working_directory) return exit_code, ""
def _config_files(dointeractive=False): from os.path import exists, expanduser, expandvars, dirname, join from glob import iglob from os import environ # pattern to distinguish files to run only in interactive mode. # these files are loaded by the pylada-ipython extension itself. pattern = "*.py" if not dointeractive else "ipy_*.py" # dictionary with stuff we want defined when reading config files. global_dict = {"pyladamodules": __all__} local_dict = {} # first configuration files installed with pylada. for filename in iglob(join(join(dirname(__file__), "config"), pattern)): if dointeractive == False and filename[:4] == 'ipy_': continue execfile(filename, global_dict, local_dict) # then configuration files installed in a global config directory. if "PYLADA_CONFIG_DIR" in environ: for directory in environ["PYLADA_CONFIG_DIR"].split(':'): for filename in iglob(join(directory, pattern)): if dointeractive == False and filename[:4] == 'ipy_': continue execfile(filename, global_dict, local_dict) # then user configuration file. if exists(expandvars(expanduser('~/.pylada'))): execfile(expandvars(expanduser('~/.pylada')), global_dict, local_dict) return local_dict
def cleanup_logs(htab): # pdb.set_trace() # hose slurmctld and slurmdbd logs logger.info('cd logdir -> %s' % (htab['logdir'])) os.chdir(htab['logdir']) for f in glob.iglob('*'): try: os.unlink(f) except: pass # hose slurmd logs slogdir = '%s/log' % (htab['logdir']) logger.info('cd logdir -> %s' % (slogdir)) os.chdir(slogdir) for f in glob.iglob('*'): try: os.unlink(f) except: pass # hose the spool shutil.rmtree(htab['spooldir']) os.mkdir(htab['spooldir'])
def compare_dl_files(fileExtension): # Need to handle gbk in a unique manner as the date will always modify the file slightly so a diff will return false if fileExtension == 'gbk': newestFile = min(glob.iglob('/Users/jmatsumura/Downloads/VAC1_test2.annotation.*.'+fileExtension), key=os.path.getctime) my_cmd = ['diff', '/Users/jmatsumura/mana_dumps/VAC1_test2.annotation.20160329.gbk'] + [newestFile] with open('/Users/jmatsumura/mana_dumps/gbk_diff.txt', "w") as outfile: subprocess.call(my_cmd, stdout=outfile) result = "OK" if os.stat("/Users/jmatsumura/mana_dumps/gbk_diff.txt").st_size < 300 else "FAILED" # Similar to the previous, handle by file size differences. elif fileExtension == 'GO_annotation.txt': newestFile = min(glob.iglob('/Users/jmatsumura/Downloads/VAC1_test2_'+fileExtension), key=os.path.getctime) my_cmd = ['diff', '/Users/jmatsumura/mana_dumps/VAC1_test2_GO_annotation.txt'] + [newestFile] with open('/Users/jmatsumura/mana_dumps/GO_diff.txt', "w") as outfile: subprocess.call(my_cmd, stdout=outfile) f_size = os.stat("/Users/jmatsumura/mana_dumps/GO_diff.txt").st_size result = "OK" if ((f_size > 2200000) and (f_size < 2900000)) else "FAILED" elif fileExtension == 'tbl' or fileExtension == 'gff3': newestFile = min(glob.iglob('/Users/jmatsumura/Downloads/VAC1_test2.annotation.*.'+fileExtension), key=os.path.getctime) result = "OK" if filecmp.cmp('/Users/jmatsumura/mana_dumps/VAC1_test2.annotation.20160329.'+fileExtension, newestFile) else "FAILED" elif fileExtension == 'sigp': newestFile = min(glob.iglob('/Users/jmatsumura/Downloads/sigp4.1_VAC.transcript.9803630972.1_pred.txt'), key=os.path.getctime) result = "OK" if filecmp.cmp('/Users/jmatsumura/mana_dumps/sigpOut.txt', newestFile) else "FAILED" else: newestFile = min(glob.iglob('/Users/jmatsumura/Downloads/VAC1_test2_'+fileExtension), key=os.path.getctime) result = "OK" if filecmp.cmp('/Users/jmatsumura/mana_dumps/VAC1_test2_'+fileExtension, newestFile) else "FAILED" return result
def add_data_files(config): """ Function adding all necessary data files. """ # Add all test data files for data_folder in glob.iglob(os.path.join( SETUP_DIRECTORY, "obspy", "*", "tests", "data")): path = os.path.join(*data_folder.split(os.path.sep)[-4:]) config.add_data_dir(path) # Add all data files for data_folder in glob.iglob(os.path.join( SETUP_DIRECTORY, "obspy", "*", "data")): path = os.path.join(*data_folder.split(os.path.sep)[-3:]) config.add_data_dir(path) # Add all docs files for data_folder in glob.iglob(os.path.join( SETUP_DIRECTORY, "obspy", "*", "docs")): path = os.path.join(*data_folder.split(os.path.sep)[-3:]) config.add_data_dir(path) # image directories config.add_data_dir(os.path.join("obspy", "core", "tests", "images")) config.add_data_dir(os.path.join("obspy", "imaging", "tests", "images")) config.add_data_dir(os.path.join("obspy", "segy", "tests", "images")) # Add the taup models. config.add_data_dir(os.path.join("obspy", "taup", "tables")) # Adding the Flinn-Engdahl names files config.add_data_dir(os.path.join("obspy", "core", "util", "geodetics", "data")) # Adding the version information file config.add_data_files(os.path.join("obspy", "RELEASE-VERSION"))
def cron(): """ Periodically fetches oldest pcap file to extract messages from. """ while True: time.sleep(1) try: oldest = min(glob.iglob("{}/*.pcap".format(SETTINGS['pcap_dir']))) except ValueError as err: logging.warning(err) continue latest = max(glob.iglob("{}/*.pcap".format(SETTINGS['pcap_dir']))) if oldest == latest: continue tmp = oldest dump = tmp.replace(".pcap", ".pcap_") try: os.rename(tmp, dump) # Mark file as being read except OSError as err: logging.warning(err) continue logging.info("Loading: %s", dump) start = time.time() cache = Cache(filepath=dump) cache.cache_messages() end = time.time() elapsed = end - start logging.info("Dump: %s (%d messages)", dump, cache.count) logging.info("Elapsed: %d", elapsed) os.remove(dump)
def pushData(self,path,run_name,msg): print 'In pushData!' print path print run_name msg.setText('Updating database records for '+run_name+'\nThis could take a while...') for unzipthis in glob.iglob(os.path.join(path,'*.gz')): f_in = gzip.open(os.path.join(path,unzipthis),'rb') f_out = open(os.path.join(path,unzipthis[:-3]),'wb') f_out.writelines(f_in) f_in.close() f_out.close() os.remove(os.path.join(path,unzipthis)) ret = pushRunData(path,self) if ret: # there was a problem print 'Warning: Could not update',run_name,'records in database.' for ex in ['*.activations','*.test','*.weights']: for zipthis in glob.iglob(os.path.join(path,ex)): print 'zipping',zipthis f_in = open(os.path.join(path,zipthis),'rb') f_out = gzip.open(os.path.join(path,(zipthis+'.gz')),'wb') f_out.writelines(f_in) f_out.close() f_in.close() os.remove(os.path.join(path,zipthis)) return ret
def generate_report(opts): """ Report is generated from .html files, and it's a .html file itself. Two major parts: bug reports (coming from 'report-*.html' files) and crash reports (coming from 'failures' directory content). Each parts are tables (or multiple tables) with rows. To reduce the memory footprint of the report generation, these tables are generated before the final report. Those called fragments (because they are fragments). The 'assembly_report' write the final report. Copy stylesheet(s) and javascript file(s) are also part of this method. """ out_dir = opts['out_dir'] result = 0 pool = multiprocessing.Pool(1 if opts['sequential'] else None) bug_generator = pool.imap_unordered( scan_bug, glob.iglob(os.path.join(out_dir, '*.html'))) crash_generator = pool.imap_unordered( scan_crash, glob.iglob(os.path.join(out_dir, 'failures', '*.info.txt'))) fragment = lambda fun, x: fun(x, out_dir, opts['prefix']) with fragment(bug_fragment, bug_generator) as bugs: with fragment(crash_fragment, crash_generator) as crashes: result = bugs.count + crashes.count if result > 0: assembly_report(opts, bugs, crashes) copy_resource_files(out_dir) pool.close() pool.join() return result
def _cleandatafile(): ''' delete all data files older than xx days.''' vanaf = time.time() - (botsglobal.ini.getint('settings','maxdays',30) * 3600 * 24) frompath = botslib.join(botsglobal.ini.get('directories','data','botssys/data'),'*') for filename in glob.iglob(frompath): statinfo = os.stat(filename) if not stat.S_ISDIR(statinfo.st_mode): try: os.remove(filename) #remove files - should be no files in root of data dir except: botsglobal.logger.exception(_('Cleanup could not remove file')) elif statinfo.st_mtime > vanaf : continue #directory is newer than maxdays, which is also true for the data files in it. Skip it. else: #check files in dir and remove all older than maxdays frompath2 = botslib.join(filename,'*') emptydir = True #track check if directory is empty after loop (should directory itself be deleted/) for filename2 in glob.iglob(frompath2): statinfo2 = os.stat(filename2) if statinfo2.st_mtime > vanaf or stat.S_ISDIR(statinfo2.st_mode): #check files in dir and remove all older than maxdays emptydir = False else: try: os.remove(filename2) except: botsglobal.logger.exception(_('Cleanup could not remove file')) if emptydir: try: os.rmdir(filename) except: botsglobal.logger.exception(_('Cleanup could not remove directory'))
def build_zlib(wd, conf): version = '1.2.8' extract_dir = os.path.join(wd, 'v' + version) archive = extract_dir + '.tar.gz' url = 'https://github.com/madler/zlib/archive/v%s.tar.gz' % version download_source_archive(url, archive, extract_dir) repodir = os.path.join(extract_dir, 'zlib-' + version) workflow = [ (conf.cmake_cmd([]), {'cwd': repodir}), (conf.make + ['clean'], {'cwd': repodir}), (conf.make, {'cwd': repodir}), ] # ZLib seems to build debug and release at once def maybe_dstname(lib): name, ext = os.path.splitext(lib) is_debug = name.endswith('d') if is_debug and conf.debug: return name[:-1] + ext elif not is_debug and not conf.debug: return lib if conf.execute_tasks(workflow): for f in glob.iglob(os.path.join(repodir, '*.h')): shutil.copy(f, os.path.join(conf.installdir, 'include')) for f in glob.iglob(os.path.join(repodir, '*.lib')): dst = maybe_dstname(os.path.basename(f)) if dst: shutil.copy(f, os.path.join(conf.installdir, 'lib', dst)) for f in glob.iglob(os.path.join(repodir, '*.dll')): dst = maybe_dstname(os.path.basename(f)) if dst: shutil.copy(f, os.path.join(conf.installdir, 'lib', dst)) return True
def copy_files(dir_to_scan, dir_to_copy, extension, recursive, delete_org = False): import os, glob, shutil if recursive == False: i=0 while i < 10000: for orgPath in glob.iglob(os.path.join(dir_to_scan, extension)): i+=1 fileName, ext = os.path.splitext(os.path.basename(orgPath)) shutil.copyfile((dir_to_scan+fileName+ext), (dir_to_copy+fileName+ext)) if delete_org==True: os.remove(dir_to_scan+fileName+ext) else: pass elif recursive == True: for path, dirs, files in os.walk(dir_to_scan): for d in dirs: for f in glob.iglob(os.path.join(path, d, extension)): shutil.copyfile(f, (dir_to_copy + os.path.basename(f))) if delete_org==True: os.remove(f) else: pass
def main(): # Write a part to put image directories into "groups" source_dirs = [ '/home/sbraden/400mpp_15x15_clm_wac/mare/', '/home/sbraden/400mpp_15x15_clm_wac/pyro/', '/home/sbraden/400mpp_15x15_clm_wac/imps/', '/home/sbraden/400mpp_15x15_clm_wac/mare_immature/' ] for directory in source_dirs: print directory groupname = os.path.split(os.path.dirname(directory))[1] print groupname # read in LROC WAC images wac_img_list = iglob(directory+'*_wac.cub') # read in Clementine images clm_img_list = iglob(directory+'*_clm.cub') make_cloud_plot(wac_img_list, colorloop.next(), groupname) fontP = FontProperties() fontP.set_size('small') #plt.legend(loc='upper left', fancybox=True, prop=fontP, scatterpoints=1) #plt.axis([0.70, 0.86, 0.90, 1.15],fontsize=14) plt.axis([0.60, 0.90, 0.90, 1.20],fontsize=14) plt.axes().set_aspect('equal') # THIS next line does not get called: plt.margins(0.20) # 4% add "padding" to the data limits before they're autoscaled plt.xlabel('WAC 320/415 nm', fontsize=14) plt.ylabel('CLM 950/750 nm', fontsize=14) plt.savefig('lunar_roi_cloud_plot.png', dpi=300) plt.close()
def main(): # It's fragile to rely on the location of this script to find the top-level # source directory. top_level_directory = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))) vsprops_directory = os.path.join(top_level_directory, 'WebKitLibraries', 'win', 'tools', 'vsprops') vsprops_files = glob.glob(os.path.join(vsprops_directory, '*.vsprops')) assert len(vsprops_files), "Couldn't find any .vsprops files in %s" % vsprops_directory newest_vsprops_time = max(map(os.path.getmtime, vsprops_files)) # Delete any manifest-related files because Visual Studio isn't smart # enough to figure out that it might need to rebuild them. obj_directory = os.path.join(os.environ['CONFIGURATIONBUILDDIR'], 'obj') for manifest_file in glob.iglob(os.path.join(obj_directory, '*', '*.manifest*')): delete_if_older_than(manifest_file, newest_vsprops_time) # Delete any precompiled headers because Visual Studio isn't smart enough # to figure out that it might need to rebuild them, even if we touch # wtf/Platform.h below. for precompiled_header in glob.iglob(os.path.join(obj_directory, '*', '*.pch')): delete_if_older_than(precompiled_header, newest_vsprops_time) # Touch wtf/Platform.h so all files will be recompiled. This is necessary # to pick up changes to preprocessor macros (e.g., ENABLE_*). wtf_platform_h = os.path.join(top_level_directory, 'Source', 'JavaScriptCore', 'wtf', 'Platform.h') if os.path.getmtime(wtf_platform_h) < newest_vsprops_time: print 'Touching wtf/Platform.h' os.utime(wtf_platform_h, None)
def do1vol(vol): time_begin=time.time() print(now()) coll_xml=os.environ['COLLDIR']+'/'+vol+'.xml' fo=codecs.open(coll_xml, 'w', 'utf8') fo.write("<collection>\n") print(vol,'phase-1') my_mkdir(phase1dir+'/'+vol) for p in glob.iglob(data_dir+'/'+vol+'/*.xml'): phase1(vol,p) print(vol, 'phase-2 cbetap4top5.xsl') my_mkdir(phase2dir+'/'+vol) for p in glob.iglob(phase1dir+'/'+vol+'/*.xml'): f=os.path.basename(p) fo.write('<doc href="%s/%s/%s"/>\n' % (phase1dir,vol,f)) phase2(vol,p) fo.write("</collection>\n") fo.close() gen_gaiji(vol) print(vol, 'phase-3 p5-pp.xsl') my_mkdir(phase3dir+'/'+vol) for p in glob.iglob(phase2dir+'/'+vol+'/*.xml'): phase3(vol,p) print(vol, 'phase-4') my_mkdir(phase4dir+'/'+vol[:1]) my_mkdir(phase4dir+'/'+vol[:1]+'/'+vol) for p in glob.iglob(phase3dir+'/'+vol+'/*.xml'): phase4(vol,p) print(vol, 'validate') for p in glob.iglob(phase4dir+'/'+vol[:1]+'/'+vol+'/*.xml'): validate(p) s=spend_time(time.time()-time_begin) print(vol, s) log.write(vol+' '+s+'\n')
def main(): try: os.mkdir(os.path.join(workdir,outdir)) except FileExistsError: print('Output directory already exists.') for notebookjson in glob.iglob(os.path.join(workdir,laverndir,dirnotebooks,'*.json')): #import json file as dict with open(notebookjson,'r') as filestr: notebookdict = json.load(filestr) #create dir for notebook nbdirname = clean_dirname(notebookdict['name'].lower()) outpath = os.path.join(workdir,outdir,nbdirname) try: os.mkdir(outpath) except FileExistsError: print('Notebook output directory already exists.') #process each notebook to own dir for notejson in glob.iglob(os.path.join(workdir,laverndir,dirnotes,'*.json')): with open(notejson,'r') as notestr: notedict = json.load(notestr) #if note belongs to notebook if notedict['notebookId'] == notebookdict['id']: notename = clean_filename(notedict['title'].lower(), 'md') notepath = os.path.join(outpath,notename) #create file, add title header and content with open(notepath,'w') as notemd: notemd.write('#{}\n\n{}'.format(notedict['title'],notedict['content'])) print('Done!')
def getResources(): resources = [] for fileName in glob.iglob("*.rsf"): resources.append(fileName) for fileName in glob.iglob("*.rgf"): resources.append(fileName) return resources
def load_strains(path, gbk_present, folders_dict): """ load input strains in strain_list """ if gbk_present: glob_item='.gbk' gbk_path=folders_dict['gbk_path'] glob_list=glob.glob('%s*%s'%(path,glob_item)) if len(glob_list)!=0: harmonize_filename(path,glob_list) strain_list= [i.split('/')[-1].split(glob_item)[0] for i in glob.iglob('%s*%s'%(path,glob_item))] ## move gbk files in folder input_GenBank command_organize_gbk_input=''.join(['mv ',path,'*gbk ',gbk_path]) os.system(command_organize_gbk_input) else: gbk_glob=glob.iglob('%s*%s'%(gbk_path,glob_item)) strain_list= [i.split('/')[-1].split(glob_item)[0] for i in gbk_glob] else: glob_item='.faa' glob_list=glob.glob('%s*%s'%(path,glob_item)) if len(glob_list)!=0: harmonize_filename(path,glob_list) strain_list=[i.split('/')[-1].split(glob_item)[0] for i in glob.iglob('%s*%s'%(path,glob_item))] else: protein_glob=glob.iglob('%s*%s'%(folders_dict['protein_path'],glob_item)) strain_list= [i.split('/')[-1].split(glob_item)[0] for i in protein_glob] command_organize_aa_input= 'mv %s*.faa %s'%(path,folders_dict['protein_path']) command_organize_nuc_input='mv %s*.fna %s'%(path,folders_dict['nucleotide_path']) os.system(command_organize_nuc_input) os.system(command_organize_aa_input) write_pickle('%s%s'%(path,'strain_list.cpk'), strain_list)
plt.plot(data, p, color=nextpow2_color, linestyle=nextpow2_style, linewidth=curve_width) legends.append('Next power of 2') #plt.title('CDF of anonymity in the dataset '+input_file) plt.title('') plt.ylabel('Percentile') plt.xlabel('Anonymity set size') plt.legend(legends, loc='lower right') plt.xscale('log') #plt.yscale('log') plt.grid(color=grid_color, linestyle=grid_style, linewidth=grid_width) plt.tight_layout() dataset = input_file.replace('./', '').replace('.sizes', '') plt.savefig('fig4-' + str(i) + '-' + dataset + '-anonymity-cdf.eps') i = 1 plt.grid(color=grid_color, linestyle=grid_style, linewidth=grid_width) for f in glob.iglob('./*.sizes'): dataset = f.replace('./', '').replace('.sizes', '') print("Plotting", 'fig4-' + str(i) + '-' + dataset + '-anonymity-cdf.eps') createFigDatasetAnonymityCDF(i, f) i += 1
#standard libaries import numpy as np import pandas as pd import matplotlib.pyplot as plt from pydataset import data import seaborn as sns import os os.listdir() os.listdir('E:/data/analytics') import glob print(glob.glob('.')) #print file names for file_name in glob.iglob('E:/data/analytics/*.txt', recursive=True): print(file_name) from glob import glob filenames = glob('E:/data/analytics/mv*.txt') filenames dataframes1 = [pd.read_csv(f) for f in filenames] dataframes1 #2nd method filenames2 = filenames dataframes2 = [] for f in filenames2: dataframes2.append(pd.read_csv(f)) dataframes2
def file2name(file): return os.path.splitext(os.path.basename(file))[0] def css2names(source): css = tinycss.make_parser().parse_stylesheet(source) return set(token.value[1:] for rule in css.rules for container in rule.selector if isinstance(container, tinycss.token_data.ContainerToken) for token in container.content if token.type == 'STRING') # resolve configurations outfiles = collections.defaultdict(dict) for fname in glob.iglob(CONFIG['input']['images']): for pattern, config in sorted(GROUP_CONFIG.items(), key=lambda item: '*' in item[0]): if fnmatch.fnmatch(file2name(fname), pattern): outfile = config.get('fname', CONFIG['fname']) outfiles[outfile].setdefault(fname, copy.deepcopy(CONFIG)).update(config) break else: outfiles[CONFIG['fname']][fname] = copy.deepcopy(CONFIG) # remove old build artifacts if os.path.exists(CONFIG['outdir']): shutil.rmtree(CONFIG['outdir']) os.mkdir(CONFIG['outdir'])
import glob result = glob.iglob('../*.py') print result #<generator object iglob at 0x02587648> for file in result: print file
# To name all the files with extension .c, as .cpp (use Glob module in python) import glob,os #print(glob.glob('*.py')) folder = '/home/cisco/PycharmProjects/samp_proj1/day_021118' # for file in os.listdir(folder): # infiles = os.path.join(folder,file) # print(infiles) # newfiles = infiles.replace('.csv','.txt') # output = os.rename(infiles,newfiles) for filename in glob.iglob(os.path.join(folder, '*.csv')): os.rename(filename, filename[:-4] + '.txt') for file in glob.iglob('/home/cisco/PycharmProjects/samp_proj1/day_011118/*'): print("see the list",file)
if pre: # Split a.xml into a1.xml and a2.xml splitter.a_splitter(quiet=quiet) # Sort <p>s in a2.xml to make them match Bonetti's sorting. # Produce file a2-sorted.xml sort_a2.sort_a2(quiet=quiet) # Split bonetti.xml and a2-sorted.xml for JuxtaCommons collation splitter.second_half_splitter('bonetti', '2-alfa', '2-bravo', '2-charlie') splitter.second_half_splitter('a2-sorted', '2-alfa', '2-bravo', '2-charlie') # entitize.py for f in iglob('%s*.xml' % (xmlpath)): # base = f.split('/')[-1].split('.')[0] entitize.entitize(f, quiet=quiet) # simplify_markup_for_collation.py / class msTree edition_list = [ 'g', 'a1', 'bonetti-2-alfa', 'bonetti-2-bravo', 'bonetti-2-charlie', 'a2-sorted-2-alfa', 'a2-sorted-2-bravo', 'a2-sorted-2-charlie', 'o' ] for edition in edition_list: mytree = simplify_markup_for_collation.msTree(edition, quiet=quiet) if edition == 'a1': mytree.reduce_layers_to_alph_only() for tag_to_strip in [ 'interp', 'abbr', 'surplus', 'note', 'milestone', 'link', 'anchor'
def find_docs(dir, ext="pdf"): out = [] for filename in glob.iglob(dir + f'/*.{ext}', recursive=True): out.append(filename) return out
from glob import iglob from os.path import isfile from copy import deepcopy from collections import namedtuple Frame = namedtuple('Frame', ('bitmask', 'offsets')) def chunk(s, n): return [s[i:i + n] for i in range(0, len(s), n)] # Process each Pokémon's animation data for front_name in iglob('gfx/pokemon/*/front.2bpp'): mon_name = front_name[len('gfx/pokemon/'):-len('/front.2bpp')] print(mon_name) # Get associated file names bitmask_name = 'gfx/pokemon/%s/bitmask.asm' % mon_name assert isfile(bitmask_name), 'no bitmask.asm for %s' % mon_name frames_name = 'gfx/pokemon/%s/frames.asm' % mon_name assert isfile(frames_name), 'no frames.asm for %s' % mon_name base_name = 'data/pokemon/base_stats/%s.asm' % mon_name if not isfile(base_name): base_name = 'data/pokemon/base_stats/%s.asm' % mon_name.split('_',
def __init__(self, glob_command, out_dir, n_progress): """ Given a glob command describing where all the Log.final.out files are from STAR, return a pd.DataFrame with each sample (id) as its own column. @param glob_command: A string that will be passed to glob @param ids_function: A function (could be an anonymous function like a lambda) that specifies how to get the sample ID from the filename. Could also be a list of IDs, but they must be in the exact order as in the directories, which is why a function can be easier. Example: >>> glob_command = '/Users/olga/workspace-git/single_cell/analysis/mapping_stats/*.Log.final.out' >>> mapping_stats = self.log_final_out(glob_command, ... lambda x: '_'.join(x.split('/')[-1].split('_')[:2])) """ out_dir = out_dir.rstrip('/') out_dir = os.path.abspath(os.path.expanduser(out_dir)) if n_progress < 1: raise ValueError('"n_progress" must be 1 or greater') # Make the directory if it's not there already out_dir = os.path.abspath(os.path.expanduser(out_dir)) try: os.mkdir(out_dir) except OSError: pass series = [] filenames = iglob(glob_command) n_files = sum(1 for i in filenames) sys.stdout.write("Reading {} of STAR's *Log.final.out " "files ...\n".format(n_files)) # re-initialize iterator filenames = iglob(glob_command) for i, filename in enumerate(filenames): s = pd.read_table(filename, header=None, index_col=0, squeeze=True) s.index = s.index.map( lambda x: x.rstrip(' |').rstrip(':').rstrip().lstrip()) converted = [ self.maybe_convert_to_float(x.strip('%')) if type(x) != float else x for x in s ] sample_id = os.path.basename(filename).split('.')[0] series.append(pd.Series(converted, index=s.index, name=sample_id)) if (i + 1) % n_progress == 0: sys.stdout.write("\t{}/{} files read\n".format(i + 1, n_files)) sys.stdout.write("\tDone.\n") sys.stdout.write("Merging STAR outputs into a single dataframe...\n") mapping_stats = pd.concat(series, axis=1) sys.stdout.write("\tDone.\n") sys.stdout.write("Adding percentages of splicing events ...\n") # Turn all the number of splicing events into percentages for # statistical testing number_splicing_event_names = [ 'Number of splices: Annotated (sjdb)', 'Number of splices: GT/AG', 'Number of splices: GC/AG', 'Number of splices: AT/AC', 'Number of splices: Non-canonical' ] percent_splicing_event_names = [ x.replace('Number of', '%') for x in number_splicing_event_names ] total_splicing_events = mapping_stats.ix[ 'Number of splices: Total', :].replace(0, np.nan).values.astype(float) pieces = [] for num_events in zip(number_splicing_event_names): pieces.append(100.0 * mapping_stats.ix[num_events, :].values \ / total_splicing_events) pieces = [ np.reshape(piece, len(mapping_stats.columns)) for piece in pieces ] percent_splicing = pd.DataFrame(pieces, index=percent_splicing_event_names, columns=mapping_stats.columns) df = pd.concat((mapping_stats, percent_splicing)).T.sort_index() sys.stdout.write("\tDone.\n") csv = '{}/mapping_stats.csv'.format(out_dir) sys.stdout.write("Writing mapping stats ...\n") df.to_csv(csv) sys.stdout.write("\tWrote {}\n".format(csv))
def formatfiles(file_filter, skip_up_to_date): for f in glob.iglob(searchpath): if file_filter == None or file_filter in f: formatfile(f, nav, skip_up_to_date)
def main(): input_file = next(glob.iglob(os.path.join(GIRDER_WORKER_DIR, 'input.*'))) cmd = [FFMPEG, '-i', input_file] sys.stdout.write(' '.join(('RUN:', repr(cmd)))) sys.stdout.write('\n') sys.stdout.flush() proc = subprocess.Popen(cmd, stderr=subprocess.PIPE, stdout=subprocess.DEVNULL, universal_newlines=True) meta = {'audio': {}, 'video': {}} for line in proc.stderr: if RE_VIDEO_INFO.match(line) and not meta['video'].get('width'): for part in line.split(',')[1:]: if 'x' in part and not meta['video'].get('width'): meta['video']['width'] = int( part.split('x')[0].split()[-1].strip()) meta['video']['height'] = int( part.strip().split('x')[1].split()[0].strip()) elif ' fps' in part and not meta['video'].get('frameRate'): meta['video']['frameRate'] = float( part.split(' fps')[0].strip()) elif ' kb/s' in part and not meta['video'].get('bitRate'): meta['video']['bitRate'] = float( part.split(' kb/s')[0].strip()) if RE_AUDIO_INFO.match(line) and not meta['audio'].get('bitRate'): for part in line.split(',')[1:]: if ' kb/s' in part and not meta['audio'].get('bitRate'): meta['audio']['bitRate'] = float( part.split(' kb/s')[0].strip()) elif ' Hz' in part and not meta['audio'].get('sampleRate'): meta['audio']['sampleRate'] = (float( part.split(' Hz')[0].strip())) if RE_DURATION_INFO.match(line) and not meta.get('duration'): meta['duration'] = duration_parse( line.split("Duration:", 1)[1].split(",")[0]) proc.stderr.close() check_exit_code([proc.wait(), 0][1], cmd) cmd.extend(['-vcodec', 'copy', '-an', '-f', 'null', 'null']) sys.stdout.write(' '.join(('RUN:', repr(cmd)))) sys.stdout.write('\n') sys.stdout.flush() proc = subprocess.Popen(cmd, stderr=subprocess.PIPE, stdout=subprocess.DEVNULL, universal_newlines=True) calcdur = None calcframe = None for line in proc.stderr: if line.startswith('frame=') and ' time=' in line: calcframe = int(line.split("frame=")[1].split()[0]) calcdur = duration_parse(line.split(" time=")[1].split()[0]) fps = 1 if calcdur: meta['duration'] = calcdur meta['video']['frameCount'] = calcframe fps = float(calcframe) / calcdur meta['video']['frameRate'] = fps fps = int(fps + 0.5) proc.stderr.close() check_exit_code([proc.wait(), 0][1], cmd) # TODO(opadron): work out multiple quality versions (-crf 30). cmd = [ FFMPEG, '-i', input_file, '-t', '00:00:03', '-vf', 'scale=640x480', '-quality', 'good', '-speed', '0', '-c:v', 'libvpx-vp9', '-crf', '5', '-b:v', '100k', '-g', str(fps), '-c:a', 'libopus', os.path.join(GIRDER_WORKER_DIR, 'source.webm') ] sys.stdout.write(' '.join(('RUN:', repr(cmd)))) sys.stdout.write('\n') sys.stdout.flush() meta_dump = json.dumps(meta, indent=2) with open(os.path.join(GIRDER_WORKER_DIR, '.girder_progress'), 'w') as prog: proc = subprocess.Popen(cmd, stderr=subprocess.PIPE, universal_newlines=True) total = int(calcframe + len(meta_dump)) for line in proc.stderr: if calcdur: m = RE_PROGRESS_INFO.match(line) if m: progress_update = { 'message': 'transcoding video...', 'total': total, 'current': int(m.group(1).strip()) } json.dump(progress_update, prog) prog.flush() sys.stderr.write(line) sys.stderr.flush() proc.stderr.close() check_exit_code([proc.wait(), 0][1], cmd) progress_update = { 'message': 'writing metadata...', 'total': total, 'current': total } json.dump(progress_update, prog) prog.flush() with open(os.path.join(GIRDER_WORKER_DIR, 'meta.json'), 'w') as f: f.write(meta_dump)
def main(args): img_fn_array = [] if args.image: img_obj = {} img_obj["img"] = args.image img_obj["out"] = args.out img_fn_array.append(img_obj) if args.dir: normpath = os.path.normpath("/".join([args.dir, '**', '*'])) for img_fn in glob.iglob(normpath, recursive=True): if os.path.isfile(img_fn) and True in [ ext in img_fn for ext in [".nrrd", ".nii", ".nii.gz", ".mhd", ".dcm", ".DCM"] ]: img_obj = {} img_obj["img"] = img_fn img_obj["out"] = os.path.normpath("/".join([ args.out, os.path.splitext( os.path.splitext(os.path.basename(img_fn))[0])[0] ])) img_fn_array.append(img_obj) for img_obj in img_fn_array: image = img_obj["img"] out = img_obj["out"] print("Reading:", image) if not os.path.exists(out): os.makedirs(out) # else: # shutil.rmtree(out) # os.makedirs(out) ImageType = itk.Image[itk.US, 3] img_read = itk.ImageFileReader[ImageType].New(FileName=image) img_read.Update() img = img_read.GetOutput() label = itk.ConnectedComponentImageFilter[ImageType, ImageType].New() label.SetInput(img) label.Update() labelStatisticsImageFilter = itk.LabelStatisticsImageFilter[ ImageType, ImageType].New() labelStatisticsImageFilter.SetLabelInput(label.GetOutput()) labelStatisticsImageFilter.SetInput(img) labelStatisticsImageFilter.Update() NbreOfLabel = len(labelStatisticsImageFilter.GetValidLabelValues()) for i in range(1, NbreOfLabel): extractImageFilter = itk.ExtractImageFilter[ImageType, ImageType].New() extractImageFilter.SetExtractionRegion( labelStatisticsImageFilter.GetRegion(i)) extractImageFilter.SetInput(img) extractImageFilter.Update() extractImageFilter = extractImageFilter.GetOutput() vector = itk.Size[3] Size = vector() Size[0] = extractImageFilter.GetLargestPossibleRegion().GetSize( )[0] * 2 Size[1] = extractImageFilter.GetLargestPossibleRegion().GetSize( )[1] * 2 Size[2] = extractImageFilter.GetLargestPossibleRegion().GetSize( )[2] * 2 vector = itk.Vector[itk.D, 3] Spacing = vector() Spacing[0] = extractImageFilter.GetSpacing()[0] / 2.0 Spacing[1] = extractImageFilter.GetSpacing()[1] / 2.0 Spacing[2] = extractImageFilter.GetSpacing()[2] / 2.0 vector = itk.Index[3] Index = vector() Index[0] = extractImageFilter.GetLargestPossibleRegion().GetIndex( )[0] * 2 Index[1] = extractImageFilter.GetLargestPossibleRegion().GetIndex( )[1] * 2 Index[2] = extractImageFilter.GetLargestPossibleRegion().GetIndex( )[2] * 2 TransformType = itk.IdentityTransform[itk.D, 3].New() resample = itk.ResampleImageFilter[ImageType, ImageType].New() resample.SetInput(extractImageFilter) resample.SetSize(Size) resample.SetOutputSpacing(Spacing) resample.SetOutputDirection(extractImageFilter.GetDirection()) resample.SetOutputOrigin(extractImageFilter.GetOrigin()) resample.SetOutputStartIndex(Index) resample.SetTransform(TransformType) resample.UpdateOutputInformation() resample.Update() writer = itk.ImageFileWriter[ImageType].New() outputFilename = out + "/" + os.path.basename(out) + "_" + str( i) + ".nii.gz" print("Writing:", outputFilename) writer.SetFileName(outputFilename) writer.SetInput(resample.GetOutput()) writer.Update()
def run_script(self): config = self.get_config() #If we want debugMode, keep the debug logging, otherwise leave this at INFO level if 'debugMode' in config: debugMode = config['debugMode'].lower() if debugMode == "true" or debugMode == "t": logging.getLogger().setLevel(logging.DEBUG) logger.info("LookupWatcher begin run") headers = {'Authorization': 'Splunk %s' % config['session_key']} #Verify=false is hardcoded to workaround local SSL issues url = 'https://localhost:8089/services/shcluster/captain/info?output_mode=json' res = requests.get(url, headers=headers, verify=False) if (res.status_code == 503): logger.debug( "Non-shcluster / standalone instance, safe to run on this node" ) elif (res.status_code != requests.codes.ok): logger.fatal( "unable to determine if this is a search head cluster or not, this is a bug, URL=%s statuscode=%s reason=%s, response=\"%s\"" % (url, res.status_code, res.reason, res.text)) print( "Fatal error, unable to determine if this is a search head cluster or not, refer to the logs" ) sys.exit(-1) elif (res.status_code == 200): #We're in a search head cluster, but are we the captain? json_dict = json.loads(res.text) if json_dict[ 'origin'] != "https://localhost:8089/services/shcluster/captain/info": logger.info("Not on the captain, exiting now") return else: logger.info("On the captain node, running") #At this point we are either on the captain or a standalone server so safe to continue splunk_home = os.environ['SPLUNK_HOME'] ostype = platform.system() #Find the lookup directories app_lookup_dirlist = glob.iglob(splunk_home + '/etc/apps/**/lookups') for lookup_dir in app_lookup_dirlist: logger.debug("Working with directory=%s" % (lookup_dir)) files = [ f for f in os.listdir(lookup_dir) if os.path.isfile(lookup_dir + '/' + f) ] for afile in files: mtime = os.path.getmtime(lookup_dir + '/' + afile) size = os.path.getsize(lookup_dir + '/' + afile) #minor tweak to reduce the amount of information logged if ostype == "Windows": lookup_loc = lookup_dir[lookup_dir.find("/apps\\") + 1:] else: lookup_loc = lookup_dir[lookup_dir.find("/apps/") + 1:] logger.debug('dir=%s file=%s mtime=%s size=%s lookup_loc=%s' % (lookup_dir, afile, mtime, size, lookup_loc)) if ostype == "Windows": print('lookup=%s\\%s mtime=%s size=%s' % (lookup_loc, afile, mtime, size)) else: print('lookup=%s/%s mtime=%s size=%s' % (lookup_loc, afile, mtime, size)) user_lookup_dirlist = glob.iglob(splunk_home + '/etc/users/**/**/lookups') for lookup_dir in user_lookup_dirlist: logger.debug("Working with directory=%s" % (lookup_dir)) files = [ f for f in os.listdir(lookup_dir) if os.path.isfile(lookup_dir + '/' + f) ] for afile in files: mtime = os.path.getmtime(lookup_dir + '/' + afile) size = os.path.getsize(lookup_dir + '/' + afile) #minor tweak to reduce the amount of information logged if ostype == "Windows": lookup_loc = lookup_dir[lookup_dir.find("/users\\") + 1:] else: lookup_loc = lookup_dir[lookup_dir.find("/users/") + 1:] logger.debug('dir=%s file=%s mtime=%s size=%s lookup_loc=%s' % (lookup_dir, afile, mtime, size, lookup_loc)) if ostype == "Windows": print('lookup=%s\\%s mtime=%s size=%s' % (lookup_loc, afile, mtime, size)) else: print('lookup=%s/%s mtime=%s size=%s' % (lookup_loc, afile, mtime, size)) logger.info("LookupWatcher end run")
img = cv.cvtColor(img, cv.COLOR_BGR2GRAY) lbp = local_binary_pattern(img, 8 * 3, 3, method='uniform') x = np.unique(lbp.ravel(), return_counts=True) feature = x[1] / sum(x[1]) return feature def distance(x, y): return np.linalg.norm(x - y) query = 'Faces/Test/15.png' queryFeature = image2feature(query) menorDistancia = 99999999 mostSimilar = "" for filename in glob.iglob('Faces/Train/*.png'): databaseFeature = image2feature(filename) distancia = distance(queryFeature, databaseFeature) if (distancia < menorDistancia): menorDistancia = distancia mostSimilar = filename img = cv.imread(mostSimilar, 0) query = cv.imread(query, 0) cv.imshow("AOw", img) cv.imshow("AO", query) cv.waitKey(0) cv.destroyAllWindows()
def mk_lst(glb): return map(lambda s: 'file:'+s, iglob(p + glb))
def check_formatting(my_path): """ purpose: Iterate through every forecast file and metadatadata file and perform validation checks if haven't already. link: https://github.com/epiforecasts/covid19-forecast-hub-europe/wiki/Check-submission params: * my_path: string path to folder where forecasts are """ df = pd.read_csv('code/validation/validated_files.csv') previous_checked = list(df['file_path']) files_in_repository = [] output_errors = {} meta_output_errors = {} existing_metadata_name = collections.defaultdict(list) existing_metadata_abbr = collections.defaultdict(list) errors_exist = False # Keep track of errors metadata_validation_cache = {} # Iterate through processed csvs for path in glob.iglob(my_path + "**/**/", recursive=False): model_name = None model_abbr = None # Check metadata file is_metadata_error, metadata_error_output = check_for_metadata( path, cache=metadata_validation_cache) # Check metadata names and abbreviations for duplicates if not is_metadata_error: model_name, model_abbr = get_metadata_model(path) # Add checked model_name and model_abbr to list to keep track of duplicates if model_name is not None: existing_metadata_name[model_name].append(path) if model_abbr is not None: existing_metadata_abbr[model_abbr].append(path) # Output metadata errors if is_metadata_error: meta_output_errors[path] = metadata_error_output # Get filepath forecast_file_path = os.path.basename(os.path.dirname(path)) # Iterate through forecast files to validate format for filepath in glob.iglob(path + "*.csv", recursive=False): files_in_repository += [filepath] # Check if file has been edited since last checked if filepath not in previous_checked: # Validate forecast file name = forecast file path is_filename_error, filename_error_output = validate_forecast_file_name( filepath, forecast_file_path) # Validate forecast file formatting is_error, forecast_error_output = validate_forecast_file( filepath) # validate predictions if not is_error: is_error, forecast_error_output = validate_forecast_values( filepath) # Validate forecast file date = forecast_date column is_date_error, forecast_date_output = filename_match_forecast_date( filepath) # Add to previously checked files output_error_text = compile_output_errors( filepath, is_filename_error, filename_error_output, is_error, forecast_error_output, is_date_error, forecast_date_output) if output_error_text != []: output_errors[filepath] = output_error_text # Add validated file to locally_validated_files.csv if len(output_errors) == 0: current_time = datetime.now() df = df.append( { 'file_path': filepath, 'validation_date': current_time }, ignore_index=True) # Output duplicate model name or abbreviation metadata errors output_errors = output_duplicate_models(existing_metadata_abbr, output_errors) output_errors = output_duplicate_models(existing_metadata_name, output_errors) # Update the locally_validated_files.csv update_checked_files(df, previous_checked, files_in_repository) # Error if necessary and print to console print_output_errors(meta_output_errors, prefix='metadata') print_output_errors(output_errors, prefix='data') print('Using validation code v%g.' % metadata_version) if len(meta_output_errors) + len(output_errors) > 0: sys.exit("\n ERRORS FOUND EXITING BUILD...")
def scale_mri(subject_from, subject_to, scale, overwrite=False, subjects_dir=None, skip_fiducials=False, labels=True, annot=False): """Create a scaled copy of an MRI subject. Parameters ---------- subject_from : str Name of the subject providing the MRI. subject_to : str New subject name for which to save the scaled MRI. scale : float | array_like, shape = (3,) The scaling factor (one or 3 parameters). overwrite : bool If an MRI already exists for subject_to, overwrite it. subjects_dir : None | str Override the SUBJECTS_DIR environment variable. skip_fiducials : bool Do not scale the MRI fiducials. If False (default), an IOError will be raised if no fiducials file can be found. labels : bool Also scale all labels (default True). annot : bool Copy ``*.annot`` files to the new location (default False). See Also -------- scale_labels : add labels to a scaled MRI scale_source_space : add a source space to a scaled MRI """ subjects_dir = get_subjects_dir(subjects_dir, raise_error=True) paths = _find_mri_paths(subject_from, skip_fiducials, subjects_dir) scale = np.asarray(scale) # make sure we have an empty target directory dest = subject_dirname.format(subject=subject_to, subjects_dir=subjects_dir) if os.path.exists(dest): if overwrite: shutil.rmtree(dest) else: raise IOError("Subject directory for %s already exists: %r" % (subject_to, dest)) # create empty directory structure for dirname in paths['dirs']: dir_ = dirname.format(subject=subject_to, subjects_dir=subjects_dir) os.makedirs(dir_) # save MRI scaling parameters fname = os.path.join(dest, 'MRI scaling parameters.cfg') _write_mri_config(fname, subject_from, subject_to, scale) # surf files [in mm] for fname in paths['surf']: src = fname.format(subject=subject_from, subjects_dir=subjects_dir) src = os.path.realpath(src) dest = fname.format(subject=subject_to, subjects_dir=subjects_dir) pts, tri = read_surface(src) write_surface(dest, pts * scale, tri) # BEM files [in m] for bem_name in paths['bem']: scale_bem(subject_to, bem_name, subject_from, scale, subjects_dir) # fiducials [in m] for fname in paths['fid']: src = fname.format(subject=subject_from, subjects_dir=subjects_dir) src = os.path.realpath(src) pts, cframe = read_fiducials(src) for pt in pts: pt['r'] = pt['r'] * scale dest = fname.format(subject=subject_to, subjects_dir=subjects_dir) write_fiducials(dest, pts, cframe) # duplicate files for fname in paths['duplicate']: src = fname.format(subject=subject_from, subjects_dir=subjects_dir) dest = fname.format(subject=subject_to, subjects_dir=subjects_dir) shutil.copyfile(src, dest) # source spaces for fname in paths['src']: src_name = os.path.basename(fname) scale_source_space(subject_to, src_name, subject_from, scale, subjects_dir) # labels [in m] os.mkdir(os.path.join(subjects_dir, subject_to, 'label')) if labels: scale_labels(subject_to, subject_from=subject_from, scale=scale, subjects_dir=subjects_dir) # copy *.annot files (they don't contain scale-dependent information) if annot: src_pattern = os.path.join(subjects_dir, subject_from, 'label', '*.annot') dst_dir = os.path.join(subjects_dir, subject_to, 'label') for src_file in iglob(src_pattern): shutil.copy(src_file, dst_dir)
def main(args): logger = logging.getLogger(__name__) merge_cfg_from_file(args.cfg) cfg.NUM_GPUS = 1 args.weights = cache_url(args.weights, cfg.DOWNLOAD_CACHE) assert_and_infer_cfg(cache_urls=False) assert not cfg.MODEL.RPN_ONLY, \ 'RPN models are not supported' assert not cfg.TEST.PRECOMPUTED_PROPOSALS, \ 'Models that require precomputed proposals are not supported' model = infer_engine.initialize_model_from_cfg(args.weights) dummy_wider_dataset = wider_datasets.get_wider_dataset() INFER_BOX_ALPHA = 0.3 INFER_THRESH = 0.4 INFER_KP_THRESH = 2 if "model_iter" in args.weights: # MODEL_ITER = str(re.match(r"(.*)model_iter(.*)\.pkl", args.weights).group(2)) MODEL_ITER = str( re.match(r"(.*)model_iter(.*)\.pkl", args.weights).group(2)) else: MODEL_ITER = "180000" logger.info("Model Iter: {}".format(MODEL_ITER)) if args.test == "T": submit_mode = "test" elif args.test == "V": submit_mode = "val" elif args.test == "Tr": submit_mode = "train" elif args.test == "TN": submit_mode = "test_new" elif args.test == "OUT": submit_mode = "clip_out" elif args.test == "DEV": submit_mode = "val_dev" else: submit_mode = "default" submit_result = [] result_file_name = 'detectron_{}_result_{}_{}_' \ 'NMS_{}_SOFT_NMS_{}_RPN_NMS_THRESH_{}_PRE_NMS_{}_' \ 'POST_NMS_{}_BBOX_AUG_{}_' \ 'Thresh_{}_BoxNumber.txt'.format( submit_mode, args.model_name, MODEL_ITER, cfg.TEST.NMS, cfg.TEST.SOFT_NMS.ENABLED, cfg.TEST.RPN_NMS_THRESH, cfg.TEST.RPN_PRE_NMS_TOP_N, cfg.TEST.RPN_POST_NMS_TOP_N, cfg.TEST.BBOX_AUG.ENABLED, INFER_THRESH) if os.path.isdir(args.im_or_folder): im_list = glob.iglob(args.im_or_folder + '/*.' + args.image_ext) else: im_list = [args.im_or_folder] for i, im_name in enumerate(im_list): out_name = os.path.join( args.output_dir, '{}'.format(os.path.basename(im_name) + '.pdf')) logger.info('Processing {} -> {}'.format(im_name, out_name)) im = cv2.imread(im_name) timers = defaultdict(Timer) t = time.time() with c2_utils.NamedCudaScope(0): cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all( model, im, None, timers=timers) logger.info('Inference time: {:.3f}s'.format(time.time() - t)) for k, v in timers.items(): logger.info(' | {}: {:.3f}s'.format(k, v.average_time)) if i == 0: logger.info( ' \ Note: inference on the first image will be slower than the ' 'rest (caches and auto-tuning need to warm up)') result = vis_utils.vis_one_image_bbox_2_class( im[:, :, ::-1], # BGR -> RGB for visualization im_name, args.output_dir, cls_boxes, cls_segms, cls_keyps, dataset=dummy_wider_dataset, box_alpha=INFER_BOX_ALPHA, show_class=False, thresh=INFER_THRESH, kp_thresh=INFER_KP_THRESH) if result: submit_result.extend(result) logger.info('Image {}.'.format(i)) # Write file with open(result_file_name, 'wb') as result_file: for item in submit_result: result_file.write("%s\n" % item) logger.info( 'The result file has been written in {}'.format(result_file_name))
def play_project(in_viewport): global scripts_mtime wrd = bpy.data.worlds['Arm'] # Store area if armutils.with_krom() and in_viewport and bpy.context.area != None and bpy.context.area.type == 'VIEW_3D': state.play_area = bpy.context.area state.target = runtime_to_target(in_viewport) # Build data build_project(is_play=True, in_viewport=in_viewport, target=state.target) state.in_viewport = in_viewport khajs_path = get_khajs_path(in_viewport, state.target) if wrd.arm_recompile or \ wrd.arm_recompile_trigger or \ not wrd.arm_cache_compiler or \ not wrd.arm_cache_shaders or \ not os.path.isfile(khajs_path) or \ state.last_target != state.target or \ state.last_in_viewport != state.in_viewport: wrd.arm_recompile = True wrd.arm_recompile_trigger = False state.last_target = state.target state.last_in_viewport = state.in_viewport # Trait sources modified script_path = armutils.get_fp() + '/Sources/' + wrd.arm_project_package if os.path.isdir(script_path): for fn in glob.iglob(os.path.join(script_path, '**', '*.hx'), recursive=True): mtime = os.path.getmtime(fn) if scripts_mtime < mtime: scripts_mtime = mtime wrd.arm_recompile = True # New compile requred - traits or materials changed if wrd.arm_recompile or state.target == 'native': # Unable to live-patch, stop player if state.krom_running: bpy.ops.arm.space_stop('EXEC_DEFAULT') # play_project(in_viewport=True) # Restart return mode = 'play' if state.target == 'native': state.compileproc = compile_project(target_name='--run') elif state.target == 'krom': if in_viewport: mode = 'play_viewport' state.compileproc = compile_project(target_name='krom') else: # Electron, Browser w, h = armutils.get_render_resolution(armutils.get_active_scene()) write_data.write_electronjs(w, h) write_data.write_indexhtml(w, h) state.compileproc = compile_project(target_name='html5') threading.Timer(0.1, watch_compile, [mode]).start() else: # kha.js up to date compile_project(target_name=state.target, patch=True)
def _find_mri_paths(subject, skip_fiducials, subjects_dir): """Find all files of an mri relevant for source transformation. Parameters ---------- subject : str Name of the mri subject. skip_fiducials : bool Do not scale the MRI fiducials. If False, an IOError will be raised if no fiducials file can be found. subjects_dir : None | str Override the SUBJECTS_DIR environment variable (sys.environ['SUBJECTS_DIR']) Returns ------- paths : dict Dictionary whose keys are relevant file type names (str), and whose values are lists of paths. """ subjects_dir = get_subjects_dir(subjects_dir, raise_error=True) paths = {} # directories to create paths['dirs'] = [bem_dirname, surf_dirname] # surf/ files paths['surf'] = surf = [] surf_fname = os.path.join(surf_dirname, '{name}') surf_names = ('inflated', 'sphere', 'sphere.reg', 'white', 'orig', 'orig_avg', 'inflated_avg', 'inflated_pre', 'pial', 'pial_avg', 'smoothwm', 'white_avg', 'sphere.reg.avg') if os.getenv('_MNE_FEW_SURFACES', '') == 'true': # for testing surf_names = surf_names[:4] for surf_name in surf_names: for hemi in ('lh.', 'rh.'): name = hemi + surf_name path = surf_fname.format(subjects_dir=subjects_dir, subject=subject, name=name) if os.path.exists(path): surf.append(pformat(surf_fname, name=name)) # BEM files paths['bem'] = bem = [] path = head_bem_fname.format(subjects_dir=subjects_dir, subject=subject) if os.path.exists(path): bem.append('head') bem_pattern = pformat(bem_fname, subjects_dir=subjects_dir, subject=subject, name='*-bem') re_pattern = pformat(bem_fname, subjects_dir=subjects_dir, subject=subject, name='(.+)').replace('\\', '\\\\') for path in iglob(bem_pattern): match = re.match(re_pattern, path) name = match.group(1) bem.append(name) # fiducials if skip_fiducials: paths['fid'] = [] else: paths['fid'] = _find_fiducials_files(subject, subjects_dir) # check that we found at least one if len(paths['fid']) == 0: raise IOError("No fiducials file found for %s. The fiducials " "file should be named " "{subject}/bem/{subject}-fiducials.fif. In " "order to scale an MRI without fiducials set " "skip_fiducials=True." % subject) # duplicate curvature files paths['duplicate'] = dup = [] path = os.path.join(surf_dirname, '{name}') for name in ['lh.curv', 'rh.curv']: fname = pformat(path, name=name) dup.append(fname) # check presence of required files for ftype in ['surf', 'duplicate']: for fname in paths[ftype]: path = fname.format(subjects_dir=subjects_dir, subject=subject) path = os.path.realpath(path) if not os.path.exists(path): raise IOError("Required file not found: %r" % path) # find source space files paths['src'] = src = [] bem_dir = bem_dirname.format(subjects_dir=subjects_dir, subject=subject) fnames = fnmatch.filter(os.listdir(bem_dir), '*-src.fif') prefix = subject + '-' for fname in fnames: if fname.startswith(prefix): fname = "{subject}-%s" % fname[len(prefix):] path = os.path.join(bem_dirname, fname) src.append(path) return paths
import glob, os # Current directory current_dir = os.path.dirname(os.path.abspath(__file__)) # Directory where the data will reside, relative to 'darknet.exe' path_data = '/home/ubuntu/only_cancers/data/' # Percentage of images to be used for the test set percentage_test = 30 # Create and/or truncate train.txt and test.txt file_train = open('/home/ubuntu//only_cancers/data/train.txt', 'w') file_test = open('/home/ubuntu//only_cancers/data/test.txt', 'w') # Populate train.txt and test.txt counter = 1 index_test = round(100 / percentage_test) for pathAndFilename in glob.iglob(os.path.join(current_dir, "*.jpg")): title, ext = os.path.splitext(os.path.basename(pathAndFilename)) if counter == index_test: counter = 1 file_test.write(path_data + title + '.jpg' + "\n") else: file_train.write(path_data + title + '.jpg' + "\n") counter = counter + 1
''' To process CORRUPT EXIF data warning, run this code. If you encounter an error, do the following. 1. apt-get update 2. apt-get install imagemagick 3. Convert png file to jpg. Refer to https://discuss.pytorch.org/t/corrupt-exif-data-messages-when-training-imagenet/17313 ''' import glob import piexif nfiles=0 for filename in glob.iglob('/home/osilab/dataset/ILSVRC2015/ILSVRC2015/Data/CLS-LOC/train/**/*.JPEG', recursive=True): nfiles += 1 print("About to process file %d, which is %s." %(nfiles, filename)) piexif.remove(filename)
def create_charts(path): #read the latest file latest_file = sorted(glob.iglob(path + '/*'), key=os.path.getmtime)[-1] df_data = pd.DataFrame() #get date based on file name date = latest_file[len(path):-4] status_file = pd.ExcelFile(latest_file, sort=True) stat_file = pd.read_csv('processing/data/IPOstatus/stat.csv') #status data for sheet in status_file.sheet_names[1:]: data = status_file.parse(sheet, header=[2], index_col=0, skipfooter=1) new_columns = [ data.columns[i - 1] + "二" if data.columns[i].find("Unnamed") >= 0 else data.columns[i] for i in range(len(data.columns)) ] data.columns = new_columns data['date'] = date data['板块'] = sheet df_data = df_data.append(data, ignore_index=True) province = transform(df_data['注册地'].tolist())['省'] df_data['省'] = [x[:-1] if len(x) == 3 else x[0:2] for x in province.values] df_data.replace('', np.nan, inplace=True) df_data['省'].fillna(df_data['注册地'], inplace=True) # print(df_data['省'].value_counts().tolist()) # print(df_data['省'].value_counts().index.tolist()) #stat data #stat_file.drop(columns='waiting',inplace=True) #stat_file.rename(columns={"date": "日期", "total": "受理企业总数","passed":"已过会","queue":"待审企业","failed":"中止审查企业"},inplace = True) latest_stat = stat_file.iloc[-1] date_stat = stat_file['date'] total_stat = stat_file['total'] diff_stat = stat_file['total'] - stat_file['total'].shift(1) passed_stat = list(stat_file['passed']) queue_stat = list(stat_file['queue']) failed_stat = list(stat_file['failed']) ################################################################################## page = Page() style = Style(width=1100, height=600) value = df_data['省'].value_counts().tolist() attr = df_data['省'].value_counts().index.tolist() data = [(name, val) for (name, val) in zip(attr, value)] chart = Map("IPO申报企业分布图", "摸鱼科技", title_pos='center', **style.init_style) chart.add( "", attr, value, maptype='china', is_visualmap=True, is_label_show=True, visual_text_color='#000', tooltip_formatter=geo_formatter, # 重点在这里,将函数直接传递为参数。 label_emphasis_textsize=15, label_emphasis_pos='right', ) page.add(chart) # bar_diff = Bar("") bar_diff.add("受理企业总数", date_stat, total_stat) bar_diff.add("增长(减少)企业数", date_stat, diff_stat, legend_pos="15%") bar_stat = Bar("申报企业情况", "摸鱼科技") bar_stat.add("已过会", date_stat, passed_stat, is_stack=True) bar_stat.add("待审企业", date_stat, queue_stat, is_stack=True) bar_stat.add("中止审查企业", date_stat, failed_stat, is_stack=True, legend_pos="60%") chart = Grid(width=WIDTH) chart.add(bar_stat, grid_left="60%") chart.add(bar_diff, grid_right="60%") page.add(chart) # v1 = df_data['所属行业'].value_counts().tolist() attr = df_data['所属行业'].value_counts().index.tolist() pie = Pie("所属行业分布", "摸鱼科技", title_pos="center", **style.init_style) pie.add("", attr, v1, radius=[45, 55], center=[50, 50], legend_pos="85%", legend_orient='vertical') page.add(pie) # total_counts = df_data['板块'].count() chart = Pie('申报企业所占板块的比例', "申报企业总数: " + str(total_counts), title_pos='center', **style.init_style) for exchange, counts, position in zip(df_data['板块'].unique(), df_data['板块'].value_counts(), range(1, 4)): chart.add("", [exchange, ""], [counts, total_counts - counts], center=[25 * position, 30], radius=[28, 34], label_pos='center', is_label_show=True, label_text_color=None, legend_top="center") page.add(chart) # attr1 = [ attr.replace("(特殊普通合伙)", "").replace('(特殊普通合伙)', '').replace('(特殊普通合伙)', '') for attr in df_data['会计师事务所'].unique().tolist() ] attr2 = df_data['保荐机构'].unique().tolist() v1 = df_data['会计师事务所'].value_counts().tolist() v2 = df_data['保荐机构'].value_counts().tolist() #chart_accountants chart_accountants = Bar("会计师事务所 - 统计图", "摸鱼科技", title_pos="center", **style.init_style) chart_accountants.add("会计师事务所", attr1, v1, legend_pos="75%", mark_point=["max", "min"], is_datazoom_show=True, datazoom_range=[0, 40], datazoom_type='both', xaxis_interval=0, xaxis_rotate=30, yaxis_rotate=30) chart = Grid(width=WIDTH) chart.add(chart_accountants, grid_bottom="30%") page.add(chart) #chart_sponsor chart_sponsor = Bar("保荐机构 - 统计图", "摸鱼科技", title_pos="center", **style.init_style) chart_sponsor.add("保荐机构", attr2, v2, legend_pos="75%", mark_point=["max", "min"], is_datazoom_show=True, datazoom_range=[0, 40], datazoom_type='both', xaxis_interval=0, xaxis_rotate=30, yaxis_rotate=30, yaxis_margin=50) chart = Grid(width=WIDTH) chart.add(chart_sponsor, grid_bottom="30%") page.add(chart) return page
def FindScores(src_path, args): """Given a search path, find scores and return a DataFrame object. Args: src_path: Search path pattern. args: parsed arguments. Returns: A DataFrame object. """ # Get scores. scores = [] for score_filepath in glob.iglob(src_path): # Extract score descriptor fields from the path. (config_name, capture_name, render_name, echo_simulator_name, test_data_gen_name, test_data_gen_params, score_name) = _GetScoreDescriptors(score_filepath) # Ignore the score if required. if _ExcludeScore(config_name, capture_name, render_name, echo_simulator_name, test_data_gen_name, score_name, args): logging.info('ignored score: %s %s %s %s %s %s', config_name, capture_name, render_name, echo_simulator_name, test_data_gen_name, score_name) continue # Read metadata and score. metadata = data_access.Metadata.LoadAudioTestDataPaths( os.path.split(score_filepath)[0]) score = data_access.ScoreFile.Load(score_filepath) # Add a score with its descriptor fields. scores.append(( metadata['clean_capture_input_filepath'], metadata['echo_free_capture_filepath'], metadata['echo_filepath'], metadata['render_filepath'], metadata['capture_filepath'], metadata['apm_output_filepath'], metadata['apm_reference_filepath'], config_name, capture_name, render_name, echo_simulator_name, test_data_gen_name, test_data_gen_params, score_name, score, )) return pd.DataFrame(data=scores, columns=( 'clean_capture_input_filepath', 'echo_free_capture_filepath', 'echo_filepath', 'render_filepath', 'capture_filepath', 'apm_output_filepath', 'apm_reference_filepath', 'apm_config', 'capture', 'render', 'echo_simulator', 'test_data_gen', 'test_data_gen_params', 'eval_score_name', 'score', ))
def __init__(self, path, img_size=416, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False, cache_images=False, single_cls=False, pad=0.0): try: path = str(Path(path)) # os-agnostic parent = str(Path(path).parent) + os.sep if os.path.isfile(path): # file with open(path, 'r') as f: f = f.read().splitlines() f = [ x.replace('./', parent) if x.startswith('./') else x for x in f ] # local to global path elif os.path.isdir(path): # folder f = glob.iglob(path + os.sep + '*.*') else: raise Exception('%s does not exist' % path) self.img_files = [ x.replace('/', os.sep) for x in f if os.path.splitext(x)[-1].lower() in img_formats ] except: raise Exception('Error loading data from %s. See %s' % (path, help_url)) n = len(self.img_files) assert n > 0, 'No images found in %s. See %s' % (path, help_url) bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index nb = bi[-1] + 1 # number of batches self.n = n # number of images self.batch = bi # batch index of image self.img_size = img_size self.augment = augment self.hyp = hyp self.image_weights = image_weights self.rect = False if image_weights else rect self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training) self.label_files = [re.sub(".png", ".txt", x) for x in self.img_files] # Read image shapes (wh) sp = path.replace('.txt', '') + '.shapes' # shapefile path try: with open(sp, 'r') as f: # read existing shapefile s = [x.split() for x in f.read().splitlines()] assert len(s) == n, 'Shapefile out of sync' except: s = [ exif_size(Image.open(f)) for f in tqdm(self.img_files, desc='Reading image shapes') ] np.savetxt(sp, s, fmt='%g') # overwrites existing (if any) self.shapes = np.array(s, dtype=np.float64) # Rectangular Training https://github.com/ultralytics/yolov3/issues/232 if self.rect: # Sort by aspect ratio s = self.shapes # wh ar = s[:, 1] / s[:, 0] # aspect ratio irect = ar.argsort() self.img_files = [self.img_files[i] for i in irect] self.label_files = [self.label_files[i] for i in irect] self.shapes = s[irect] # wh ar = ar[irect] # Set training image shapes shapes = [[1, 1]] * nb for i in range(nb): ari = ar[bi == i] mini, maxi = ari.min(), ari.max() if maxi < 1: shapes[i] = [maxi, 1] elif mini > 1: shapes[i] = [1, 1 / mini] self.batch_shapes = np.ceil( np.array(shapes) * img_size / 32. + pad).astype(np.int) * 32 # Cache labels self.imgs = [None] * n self.labels = [np.zeros((0, 5), dtype=np.float32)] * n create_datasubset, extract_bounding_boxes, labels_loaded = False, False, False nm, nf, ne, ns, nd = 0, 0, 0, 0, 0 # number missing, found, empty, datasubset, duplicate np_labels_path = str(Path( self.label_files[0]).parent) + '.npy' # saved labels in *.npy file if os.path.isfile(np_labels_path): s = np_labels_path # print string x = np.load(np_labels_path, allow_pickle=True) if len(x) == n: self.labels = x labels_loaded = True else: s = path.replace('images', 'labels') pbar = tqdm(self.label_files) for i, file in enumerate(pbar): if labels_loaded: l = self.labels[i] # np.savetxt(file, l, '%g') # save *.txt from *.npy file else: try: with open(file, 'r') as f: l = np.array( [x.split(",") for x in f.read().splitlines()], dtype=np.float32) except: nm += 1 # print('missing labels for image %s' % self.img_files[i]) # file missing continue if l.shape[0]: assert l.shape[1] == 5, '> 5 label columns: %s' % file assert (l >= 0).all(), 'negative labels: %s' % file assert (l[:, 1:] <= 1).all( ), 'non-normalized or out of bounds coordinate labels: %s' % file if np.unique(l, axis=0).shape[0] < l.shape[0]: # duplicate rows nd += 1 # print('WARNING: duplicate rows in %s' % self.label_files[i]) # duplicate rows if single_cls: l[:, 0] = 0 # force dataset into single-class mode self.labels[i] = l nf += 1 # file found # Create subdataset (a smaller dataset) if create_datasubset and ns < 1E4: if ns == 0: create_folder(path='./datasubset') os.makedirs('./datasubset/images') exclude_classes = 43 if exclude_classes not in l[:, 0]: ns += 1 # shutil.copy(src=self.img_files[i], dst='./datasubset/images/') # copy image with open('./datasubset/images.txt', 'a') as f: f.write(self.img_files[i] + '\n') # Extract object detection boxes for a second stage classifier if extract_bounding_boxes: p = Path(self.img_files[i]) img = cv2.imread(str(p)) h, w = img.shape[:2] for j, x in enumerate(l): f = '%s%sclassifier%s%g_%g_%s' % ( p.parent.parent, os.sep, os.sep, x[0], j, p.name) if not os.path.exists(Path(f).parent): os.makedirs( Path(f).parent) # make new output folder b = x[1:] * [w, h, w, h] # box b[2:] = b[2:].max() # rectangle to square b[2:] = b[2:] * 1.3 + 30 # pad b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int) b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image b[[1, 3]] = np.clip(b[[1, 3]], 0, h) assert cv2.imwrite(f, img[ b[1]:b[3], b[0]:b[2]]), 'Failure extracting classifier boxes' else: ne += 1 # print('empty labels for image %s' % self.img_files[i]) # file empty # os.system("rm '%s' '%s'" % (self.img_files[i], self.label_files[i])) # remove pbar.desc = 'Caching labels %s (%g found, %g missing, %g empty, %g duplicate, for %g images)' % ( s, nf, nm, ne, nd, n) assert nf > 0 or n == 20288, 'No labels found in %s. See %s' % ( os.path.dirname(file) + os.sep, help_url) if not labels_loaded and n > 1000: print('Saving labels to %s for faster future loading' % np_labels_path) np.save(np_labels_path, self.labels) # save for next time # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM) if cache_images: # if training gb = 0 # Gigabytes of cached images pbar = tqdm(range(len(self.img_files)), desc='Caching images') self.img_hw0, self.img_hw = [None] * n, [None] * n for i in pbar: # max 10k images self.imgs[i], self.img_hw0[i], self.img_hw[i] = load_image( self, i) # img, hw_original, hw_resized gb += self.imgs[i].nbytes pbar.desc = 'Caching images (%.1fGB)' % (gb / 1E9) # Detect corrupted images https://medium.com/joelthchao/programmatically-detect-corrupted-image-8c1b2006c3d3 detect_corrupted_images = False if detect_corrupted_images: from skimage import io # conda install -c conda-forge scikit-image for file in tqdm(self.img_files, desc='Detecting corrupted images'): try: _ = io.imread(file) except: print('Corrupted image detected: %s' % file)
def main(args): if len(args) < 2: print "Usage: %s <project_directory> [sdk_verison]" % os.path.basename( args[0]) sys.exit(1) # What needs to be done in order to perform a "true" export? # --- # Wipe the build dir # Migrate resources # Migrate tiapp.xml (required for scripts) # Generate project from template # Populate Info.plist # Compile/migrate i18n # Migrate scripts for compiling JSS files (and i18n) # Modify xcodeproj build steps to call the JSS compiler # Then... Share and Enjoy. project_dir = os.path.abspath(args[1]) build_dir = os.path.join(project_dir, 'build', 'iphone') titanium_local = os.path.join(build_dir, 'titanium') if len(args) == 3: version = args[2] sdk_dir = find_sdk(version) else: sdk_dir = os.path.abspath(os.path.dirname(template_dir)) version = os.path.basename(sdk_dir) tiappxml = os.path.join(project_dir, 'tiapp.xml') tiapp = TiAppXML(tiappxml) app_id = tiapp.properties['id'] app_name = tiapp.properties['name'] if app_id is None or app_name is None: info("Your tiapp.xml is malformed - please specify an app name and id") sys.exit(1) # Clean build dir (if it exists), error otherwise (no iphone support) info("Cleaning build...") if os.path.exists(build_dir): for f in os.listdir(build_dir): path = os.path.join(build_dir, f) if os.path.isfile(path): os.remove(path) else: shutil.rmtree(path) else: info("Your project is not configured to be built for iphone.") exit(1) # Migrate Resources info("Migrating resources...") project_resources = os.path.join(project_dir, 'Resources') resources_dir = os.path.join(build_dir, 'Resources') shutil.copytree(project_resources, resources_dir) # Migrate platform/iphone contents into Resources. info("Migrating platform/iphone to Resources...") project_platform = os.path.join(project_dir, 'platform', 'iphone') if os.path.isdir(project_platform): contents = os.listdir(project_platform) for file in contents: path = os.path.join(project_platform, file) if os.path.isdir(path): shutil.copytree(path, os.path.join(resources_dir, file)) else: shutil.copy(path, os.path.join(resources_dir, file)) # Migrate tiapp.xml info("Migrating tiapp.xml...") shutil.copy(tiappxml, build_dir) ti = TiAppXML(tiappxml) # target the requested value if provided min_ver = 4.0 if 'min-ios-ver' in ti.ios: min_ver = float(ti.ios['min-ios-ver']) if min_ver < 4.0: print "[INFO] Minimum iOS version %s is lower than 4.0: Using 4.0 as minimum" % min_ver min_ver = 4.0 # Generate project stuff from the template info("Generating project from Titanium template...") project = Projector(app_name, version, template_dir, project_dir, app_id, min_ver) project.create(template_dir, build_dir) # Because the debugger.plist is built as part of the required # resources, we need to autogen an empty one debug_plist = os.path.join(resources_dir, 'debugger.plist') force_xcode = write_debugger_plist(None, None, None, None, template_dir, debug_plist) # Populate Info.plist applogo = None info("Populating Info.plist...") plist_out = os.path.join(build_dir, 'Info.plist') create_info_plist(tiapp, template_dir, project_dir, plist_out) applogo = tiapp.generate_infoplist(plist_out, app_id, 'iphone', project_dir, None) # Run the compiler to autogenerate .m files info("Copying classes, creating generated .m files...") compiler = Compiler(project_dir, app_id, app_name, 'export') compiler.compileProject(silent=True) #... But we still have to nuke the stuff that gets built that we don't want # to bundle. ios_build = os.path.join(build_dir, 'build') if os.path.isdir(ios_build): shutil.rmtree(os.path.join(build_dir, 'build')) # Install applogo/splash/etc. info("Copying icons and splash...") install_logo(tiapp, applogo, project_dir, template_dir, resources_dir) install_defaults(project_dir, template_dir, resources_dir) # Get Modules detector = ModuleDetector(project_dir) missing_modules, modules = detector.find_app_modules( tiapp, 'iphone', 'development') if len(missing_modules) != 0: for module in missing_modules: info("MISSING MODULE: %s ... Project will not build correctly" % module['id']) info("Terminating export: Please fix your modules.") sys.exit(1) module_search_path, module_asset_dirs = locate_modules( modules, project_dir, resources_dir, info) lib_dir = os.path.join(build_dir, 'lib') if not os.path.exists(lib_dir): os.makedirs(lib_dir) if len(module_search_path) > 0: info("Copying modules...") for module in module_search_path: module_name, module_path = module info("\t%s..." % module_name) shutil.copy(os.path.join(module_path, module_name), lib_dir) module[1] = os.path.join(lib_dir, module_name) info("Copying module metadata...") metadata_dir = os.path.join(build_dir, 'metadata') for module in modules: module_metadata = os.path.join(module.path, 'metadata.json') if os.path.exists(module_metadata): if not os.path.exists(metadata_dir): os.makedirs(metadata_dir) target = os.path.join(metadata_dir, "%s.json" % module.manifest.moduleid) shutil.copyfile(module_metadata, target) # Note: The module link information has to be added to # the xcodeproj after it's created. # We also have to mod the module_search_path to reference # the local 'lib' directory instead of the original # module install location info("Linking modules...") local_modules = [] for module in module_search_path: name = module[0] newpath = os.path.join('lib', name) local_modules.append([name, newpath]) link_modules(local_modules, app_name, build_dir, relative=True) # Copy libraries info("Copying libraries...") iphone_dir = os.path.join(sdk_dir, 'iphone') for lib in glob.iglob(os.path.join(iphone_dir, 'lib*')): info("\t%s..." % lib) shutil.copy(lib, lib_dir) # Process i18n files info("Processing i18n...") locale_compiler = LocaleCompiler(app_name, project_dir, 'ios', 'development', resources_dir) locale_compiler.compile() # Migrate compile scripts info("Copying custom Titanium compiler scripts...") shutil.copytree(os.path.join(sdk_dir, 'common'), titanium_local) shutil.copy(os.path.join(sdk_dir, 'tiapp.py'), titanium_local) iphone_script_dir = os.path.join(titanium_local, 'iphone') os.mkdir(iphone_script_dir) shutil.copy(os.path.join(sdk_dir, 'iphone', 'compiler.py'), iphone_script_dir) shutil.copy(os.path.join(sdk_dir, 'iphone', 'tools.py'), iphone_script_dir) shutil.copy(os.path.join(sdk_dir, 'iphone', 'run.py'), iphone_script_dir) shutil.copy(os.path.join(sdk_dir, 'iphone', 'csspacker.py'), iphone_script_dir) shutil.copy(os.path.join(sdk_dir, 'iphone', 'jspacker.py'), iphone_script_dir) shutil.copy(os.path.join(sdk_dir, 'iphone', 'titanium_prep'), iphone_script_dir) # Add compilation to the build script in project info("Modifying pre-compile stage...") xcodeproj = os.path.join(build_dir, '%s.xcodeproj' % app_name, 'project.pbxproj') contents = codecs.open(xcodeproj, 'r', encoding='utf-8').read() css_compiler = os.path.join('titanium', 'css', 'csscompiler.py') ti_compiler = os.path.join('titanium', 'iphone', 'compiler.py') script = """%s . ios Resources %s . export-build $TARGETED_DEVICE_FAMILY $SDKROOT %s""" % ( css_compiler, ti_compiler, version) contents = fix_xcode_script(contents, "Pre-Compile", script) # write our new project f = codecs.open(xcodeproj, 'w', encoding='utf-8') f.write(contents) f.close() info("Finished! Share and Enjoy.")
import numpy as np import matplotlib.pyplot as plt import pdb import glob #Plots the power spectrum of these things fig, ax = plt.subplots() #linear, z = 0 for filename in glob.iglob('/Users/penafiel/JPL/CCL-master/data_files/*.dat'): data = np.loadtxt('%s' %filename, skiprows=1) k = data[:,0] P_lin = data[:,1] ax.plot(k,P_lin) ax.set_xlabel('$k$') ax.set_ylabel('$P(k)$') ax.set_title('Matter power spectrum P(k) at redshift z = 0, linear, CCL') plt.xscale('log') plt.yscale('log') fig.savefig('ccl_Pk_z0_lin.png', format='png') print 'Done with 1' plt.clf() fig,ax = plt.subplots() #nonlin, z = 0 for filename in glob.iglob('/Users/penafiel/JPL/CCL-master/data_files/*.dat'): data = np.loadtxt('%s' %filename, skiprows=1) k = data[:,0] P_nl = data[:,2] ax.plot(k,P_nl)
def __test_replays(BASE_DIR): import logging ROOT_DIR = os.path.dirname(BASE_DIR) OUTPUT_DIR = os.path.join(ROOT_DIR, 'output') logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) MOVE_WORKING = True DEBUGGING = True success = 0 failure = 0 create_dir(OUTPUT_DIR) for filepath in glob.iglob(ROOT_DIR + '/**/*.replay', recursive=True): logger.info('decompiling %s', filepath) base_file = os.path.basename(filepath) json_path = os.path.join( OUTPUT_DIR, 'replays/decompiled/{}'.format( base_file.replace(".replay", ".json"))) proto_path = os.path.join( OUTPUT_DIR, 'replays/protos/{}'.format(base_file.replace(".replay", ".pts"))) pandas_path = os.path.join( OUTPUT_DIR, 'replays/pandas/{}'.format(base_file.replace(".replay", ".gzip"))) create_dir(os.path.dirname(json_path)) create_dir(os.path.dirname(proto_path)) create_dir(os.path.dirname(pandas_path)) if DEBUGGING: try: analysis_manager = analyze_replay_file( filepath, json_path, sanity_check=SanityChecker(), controls=ControlsCreator()) with open(os.path.join(OUTPUT_DIR, 'game.json'), 'w') as f: f.write(MessageToJson(analysis_manager.protobuf_game)) except subprocess.CalledProcessError as e: traceback.print_exc() else: try: analysis_manager = analyze_replay_file(filepath, json_path) with open(proto_path, 'wb') as fo: analysis_manager.write_proto_out_to_file(fo) with gzip.open(pandas_path, 'wb') as fo: analysis_manager.write_pandas_out_to_file(fo) if MOVE_WORKING: shutil.move(filepath, os.path.join('replays', 'working', filepath)) success += 1 except Exception as e: traceback.print_exc() failure += 1 if not DEBUGGING: if float(success + failure) == 0: print("NO REPLAYS WERE RUN.") print("Need files in: " + BASE_DIR) ratio = success / float(success + failure) print('success ratio:', ratio)
def load_synthetic(root, valid=0.25, test=0.25, train_patients=None, cells=None): try: with open(root + "/Xall.pkl", "rb") as f: Xall = pickle.load(f) with open(root + "/state.pkl", "rb") as f: state = pickle.load(f) except FileNotFoundError: Xall = [] state = [] for dirname in sorted(glob.iglob(root + "/*")): if os.path.isdir(dirname) and os.path.basename(dirname) != "Test": if os.path.basename(dirname) == "regression": X = [] for filename in tqdm.tqdm( sorted(glob.iglob(dirname + "/*.npz"))): x = load_counts(filename) value_filename = os.path.join( os.path.dirname(filename), "value_" + os.path.splitext(os.path.basename(filename))[0] + ".npy") y = np.load(value_filename).item() X.append((x[0], y, x[1])) state.append(os.path.basename(dirname)) Xall.append(X) else: X = [] for filename in tqdm.tqdm( sorted(glob.iglob(dirname + "/*.npz"))): X.append(load_counts(filename)) X = list(map(lambda x: (x[0], len(state), x[1]), X)) state.append(os.path.basename(dirname)) Xall.append(X) with open(root + "/Xall.pkl", "wb") as f: pickle.dump(Xall, f) with open(root + "/state.pkl", "wb") as f: pickle.dump(state, f) for X in Xall: random.shuffle(X) if isinstance(valid, float) and isinstance(test, float): Xvalid = [y for x in Xall for y in x[:round(len(x) * valid)]] Xtest = [ y for x in Xall for y in x[round(len(x) * valid):(round(len(x) * valid) + round(len(x) * test))] ] Xtrain = [ y for x in Xall for y in x[(round(len(x) * valid) + round(len(x) * test)):] ] elif isinstance(valid, int) and isinstance(test, int): Xvalid = [y for x in Xall for y in x[:valid]] Xtest = [y for x in Xall for y in x[valid:(valid + test)]] Xtrain = [y for x in Xall for y in x[(valid + test):]] else: raise TypeError() random.shuffle(Xtrain) random.shuffle(Xvalid) random.shuffle(Xtest) if train_patients is not None: Xtrain = Xtrain[:train_patients] if cells is not None: for i in range(len(Xtrain)): if Xtrain[i][0].shape[0] > cells: Xtrain[i] = list(Xtrain[i]) ind = np.random.choice(Xtrain[i][0].shape[0], cells, replace=False) Xtrain[i][0] = Xtrain[i][0][ind, :] Xtrain[i][2] = Xtrain[i][2][ind] Xtrain[i] = tuple(Xtrain[i]) for i in range(len(Xvalid)): if Xvalid[i][0].shape[0] > cells: Xvalid[i] = list(Xvalid[i]) ind = np.random.choice(Xvalid[i][0].shape[0], cells, replace=False) Xvalid[i][0] = Xvalid[i][0][ind, :] Xvalid[i][2] = Xvalid[i][2][ind] Xvalid[i] = tuple(Xvalid[i]) for i in range(len(Xtest)): if Xtest[i][0].shape[0] > cells: Xtest[i] = list(Xtest[i]) ind = np.random.choice(Xtest[i][0].shape[0], cells, replace=False) Xtest[i][0] = Xtest[i][0][ind, :] Xtest[i][2] = Xtest[i][2][ind] Xtest[i] = tuple(Xtest[i]) return Xtrain, Xvalid, Xtest, state
import xml.etree.ElementTree as ET import glob from pathlib import Path import os # scale all image digits down with this factor SCALE_FACTOR = 8 if __name__ == "__main__": path = os.path.dirname(os.path.abspath(__file__)) xml_folder = path + '/source_image_annotation/27092018_pallet_moving_L_annotations/' xml_folder_new = path + '/source_image_annotation/27092018_pallet_moving_L_annotations_scaled_darkflow/' for filepath in glob.iglob(xml_folder + '*.xml'): filename = filepath.split('/')[-1] tree = ET.parse(xml_folder + filename) root = tree.getroot() for elem in root.iter('width'): elem.text = str(int(elem.text) / SCALE_FACTOR) for elem in root.iter('height'): elem.text = str(int(elem.text) / SCALE_FACTOR) for elem in root.iter('xmin'): elem.text = str(int(elem.text) / SCALE_FACTOR) for elem in root.iter('ymin'):