def _InstrumentExecutables(self): build_dir = self._build_dir work_dir = self._work_dir _LOGGER.info('Build dir "%s".', build_dir) # Make a copy of all unittest executables, DLLs, PDBs and test_data in # the build directory. for pattern in ('*_unittests.exe', '*.dll', '*.pdb', 'test_data'): files = glob.glob(os.path.join(build_dir, pattern)) for path in files: _LOGGER.info('Copying "%s" to "%s".', path, work_dir) if os.path.isdir(path): # If the source file is a directory, do a recursive copy. dst = os.path.join(work_dir, os.path.basename(path)) shutil.copytree(path, dst) else: shutil.copy(path, work_dir) # Instrument all EXEs in the work dir. for exe in glob.glob(os.path.join(work_dir, '*.exe')): self._InstrumentOneFile(exe) # And the DLLs we've specified. for dll in _DLLS_TO_INSTRUMENT: self._InstrumentOneFile(os.path.join(work_dir, dll))
def get_cached_file(self, file_path, file_glob, required=False, use_first=False, **addl_args): if file_glob == None: return None if hasattr(file_glob, '__iter__'): found_files = [] for curr_glob in file_glob: curr_files = glob.glob( os.path.join(file_path, curr_glob) ) if len(curr_files) > 0: found_files = curr_files break else: found_files = glob.glob( os.path.join(file_path, file_glob) ) if len(found_files) == 0: if required: raise OSError('Could not find at path: "%s" any files matching glob: "%s"' % (file_path, file_glob)) else: return None elif len(found_files) > 1 and not use_first: raise OSError('Found too many files files at path: "%s" with glob: "%s", found: %s' % (file_path, file_glob, found_files)) if self.file_cache.has_key(found_files[0]): file_obj = self.file_cache[ found_files[0] ] else: file_obj = OCO_Matrix( found_files[0], **addl_args ) self.file_cache[ found_files[0] ] = file_obj return file_obj
def test_onset_functions(): # Load in all files in the same order ref_files = sorted(glob.glob(REF_GLOB)) est_files = sorted(glob.glob(EST_GLOB)) sco_files = sorted(glob.glob(SCORES_GLOB)) assert len(ref_files) == len(est_files) == len(sco_files) > 0 # Unit tests for metric in [mir_eval.onset.f_measure]: yield (__unit_test_onset_function, metric) # Regression tests for ref_f, est_f, sco_f in zip(ref_files, est_files, sco_files): with open(sco_f, 'r') as f: expected_scores = json.load(f) # Load in an example onset annotation reference_onsets = mir_eval.io.load_events(ref_f) # Load in an example onset tracker output estimated_onsets = mir_eval.io.load_events(est_f) # Compute scores scores = mir_eval.onset.evaluate(reference_onsets, estimated_onsets) # Compare them for metric in scores: # This is a simple hack to make nosetest's messages more useful yield (__check_score, sco_f, metric, scores[metric], expected_scores[metric])
def test_stage_package_gets_cached(self): self.run_snapcraft(["pull", "oneflatwithstagepackages"], "dump") # Verify the 'hello' deb package was cached. cache_dir = os.path.join( xdg.BaseDirectory.xdg_cache_home, "snapcraft", "stage-packages", "apt" ) archive_dir = os.path.join("var", "cache", "apt", "archives") cached = glob.glob(os.path.join(cache_dir, "*", archive_dir, "hello*")) self.assertThat(cached, HasLength(1)) cached_deb = cached[0] staged = glob.glob( os.path.join( "parts", "oneflatwithstagepackages", "ubuntu", "download", "hello*" ) ) self.assertThat(staged, HasLength(1)) staged_deb = staged[0] # Verify that the staged and cached debs are the same file (hard # linked) by comparing inodes. cached_deb_inode = os.stat(cached_deb).st_ino self.assertThat(cached_deb_inode, Equals(os.stat(staged_deb).st_ino)) # Now clean the part and pull again. self.run_snapcraft("clean", "dump") self.run_snapcraft(["pull", "oneflatwithstagepackages"], "dump") # Verify that the staged deb is _still_ the same one from the cache. self.assertThat(cached_deb_inode, Equals(os.stat(staged_deb).st_ino))
def annotate(self, conf, subject): subject = dict(subject) lib_jars = list() for lib_dir in subject['lib_dirs']: lib_jars += glob.glob(os.path.join(lib_dir, '*.jar')) lib_jars += glob.glob(os.path.join(lib_dir, '*.aar')) subject['lib_jars'] = lib_jars subject['soot_classpath'] = ':'.join(( ':'.join(subject['class_dirs']), ':'.join(subject['lib_jars']), subject['classpath'], )) targets = list() if 'target' in subject: targets.append(subject['target']) else: targets.extend(subject['targets']) subject['jpdg_cmd'] = [ 'java', '-Xmx8g', '-jar', self.jpdg_jar, '-c', subject['soot_classpath'], '-l', 'op', ] for t in targets: subject['jpdg_cmd'] += ['-d', t] for ex_dir in subject['exclude_pkgs']: subject['jpdg_cmd'] += ['-e', ex_dir] return subject
def main(args): # Default invocation will verify the golden files are unchanged. failed = 0 if not args: args = ['--wnone', '--diff', '--test', '--dstroot=.'] ParseOptions(args) idldir = os.path.split(sys.argv[0])[0] idldir = os.path.join(idldir, 'test_cgen', '*.idl') filenames = glob.glob(idldir) ast = ParseFiles(filenames) if hgen.GenerateRelease(ast, 'M14', {}): print "Golden file for M14 failed." failed = 1 else: print "Golden file for M14 passed." idldir = os.path.split(sys.argv[0])[0] idldir = os.path.join(idldir, 'test_cgen_range', '*.idl') filenames = glob.glob(idldir) ast = ParseFiles(filenames) if hgen.GenerateRange(ast, ['M13', 'M14', 'M15', 'M16', 'M17'], {}): print "Golden file for M13-M17 failed." failed =1 else: print "Golden file for M13-M17 passed." return failed
def _file_configs_paths(osname, agentConfig): """ Retrieve all the file configs and return their paths """ try: confd_path = get_confd_path(osname) all_file_configs = glob.glob(os.path.join(confd_path, '*.yaml')) all_default_configs = glob.glob(os.path.join(confd_path, '*.yaml.default')) except PathNotFound as e: log.error("No conf.d folder found at '%s' or in the directory where the Agent is currently deployed.\n" % e.args[0]) sys.exit(3) if all_default_configs: current_configs = set([_conf_path_to_check_name(conf) for conf in all_file_configs]) for default_config in all_default_configs: if not _conf_path_to_check_name(default_config) in current_configs: all_file_configs.append(default_config) # Compatibility code for the Nagios checks if it's still configured # in datadog.conf # FIXME: 6.x, should be removed if not any('nagios' in config for config in itertools.chain(*all_file_configs)): # check if it's configured in datadog.conf the old way if any([nagios_key in agentConfig for nagios_key in NAGIOS_OLD_CONF_KEYS]): all_file_configs.append('deprecated/nagios') return all_file_configs
def get_batches_fn(batch_size): """ Create batches of training data :param batch_size: Batch Size :return: Batches of training data """ image_paths = glob(os.path.join(data_folder, 'image_2', '*.png')) label_paths = { re.sub(r'_(lane|road)_', '_', os.path.basename(path)): path for path in glob(os.path.join(data_folder, 'gt_image_2', '*_road_*.png'))} background_color = np.array([255, 0, 0]) random.shuffle(image_paths) for batch_i in range(0, len(image_paths), batch_size): images = [] gt_images = [] for image_file in image_paths[batch_i:batch_i+batch_size]: gt_image_file = label_paths[os.path.basename(image_file)] image = scipy.misc.imresize(scipy.misc.imread(image_file), image_shape) gt_image = scipy.misc.imresize(scipy.misc.imread(gt_image_file), image_shape) gt_bg = np.all(gt_image == background_color, axis=2) gt_bg = gt_bg.reshape(*gt_bg.shape, 1) gt_image = np.concatenate((gt_bg, np.invert(gt_bg)), axis=2) images.append(image) gt_images.append(gt_image) yield np.array(images), np.array(gt_images)
def process_all_input(self, projectname, path, queue, \ increment_preprocessed_func, increment_processed_func, cancelled_func) : dir_re = re.compile(".*c(\d+)$") input_re = re.compile("^datain_(\d+)\..*") listing = filter(lambda x : os.path.isdir(x) and dir_re.match(x), glob.glob(path + os.sep + "*")) if len(listing) == 0 : raise PluginError("no chromosome directories to process in %s" % path) for dir in listing : chromo = dir_re.match(dir).group(1) inputfiles = glob.glob(dir + os.sep + 'datain_*') for f in inputfiles : if cancelled_func() : return dirname,filename = os.path.split(f) m = input_re.match(filename) if not m : continue fragid = m.group(1) if os.path.exists(dirname + os.sep + ("gh_%s.out" % fragid)) : increment_processed_func() continue input = map(lambda x : dirname + os.sep + (x % (fragid,chromo)), \ ['datain_%s.%s','map_%s.%s','pedin_%s.%s','setup_%s.%s']) output = dirname + os.sep + ("gh_%s.out" % fragid) tmp = (input,output) queue.put( tmp ) increment_preprocessed_func()
def get_tests_info(input_dir, msg_dir, prefix, suffix): """get python input examples and output messages We use following conventions for input files and messages: for different inputs: test for python >= x.y -> input = <name>_pyxy.py test for python < x.y -> input = <name>_py_xy.py for one input and different messages: message for python >= x.y -> message = <name>_pyxy.txt lower versions -> message with highest num """ result = [] for fname in glob(join(input_dir, prefix + '*' + suffix)): infile = basename(fname) fbase = splitext(infile)[0] # filter input files : pyrestr = fbase.rsplit('_py', 1)[-1] # like _26 or 26 if pyrestr.isdigit(): # '24', '25'... if SYS_VERS_STR < pyrestr: continue if pyrestr.startswith('_') and pyrestr[1:].isdigit(): # skip test for higher python versions if SYS_VERS_STR >= pyrestr[1:]: continue messages = glob(join(msg_dir, fbase + '*.txt')) # the last one will be without ext, i.e. for all or upper versions: if messages: for outfile in sorted(messages, reverse=True): py_rest = outfile.rsplit('_py', 1)[-1][:-4] if py_rest.isdigit() and SYS_VERS_STR >= py_rest: break else: outfile = None result.append((infile, outfile)) return result
def deploy(self, file, contextroot=None, deploymentorder=100, libraries=[]): files = glob.glob(file) if len(files) != 1: abort("Exactly one file must match " + file) cmd = self.asadminCommand + " " + "deploy" if self.version >= 4: cmd = cmd + " --deploymentorder " + str(deploymentorder) if contextroot: cmd = cmd + " --contextroot " + contextroot if libraries: libstring = "" for library in libraries: path = os.path.join(self.lib_path, library) libs = glob.glob(path) if len(libs) != 1: abort("Exactly one library must match " + path) libadd = os.path.basename(libs[0]) if libstring: libstring += "," + libadd else: libstring = "--libraries " + libadd cmd = cmd + " " + libstring cmd = cmd + " " + files[0] if self.verbosity: print "\nexecute: " + cmd out, err, rc = self.execute(cmd) if self.verbosity > 1: if out: print out if err: for line in err.splitlines(): line = line.strip() if line: if line.startswith("PER01"): continue print line
def w2p_pack_plugin(filename, path, plugin_name): """Packs the given plugin into a w2p file. Will match files at:: <path>/*/plugin_[name].* <path>/*/plugin_[name]/* """ filename = abspath(filename) path = abspath(path) if not filename.endswith('web2py.plugin.%s.w2p' % plugin_name): raise Exception("Not a web2py plugin name") plugin_tarball = tarfile.open(filename, 'w:gz') try: app_dir = path while app_dir[-1] == '/': app_dir = app_dir[:-1] files1 = glob.glob( os.path.join(app_dir, '*/plugin_%s.*' % plugin_name)) files2 = glob.glob( os.path.join(app_dir, '*/plugin_%s/*' % plugin_name)) for file in files1 + files2: plugin_tarball.add(file, arcname=file[len(app_dir) + 1:]) finally: plugin_tarball.close()
def init_notebooks(self): """Construct the list of notebooks. If notebooks are passed on the command-line, they override notebooks specified in config files. Glob each notebook to replace notebook patterns with filenames. """ # Specifying notebooks on the command-line overrides (rather than adds) # the notebook list if self.extra_args: patterns = self.extra_args else: patterns = self.notebooks # Use glob to replace all the notebook patterns with filenames. filenames = [] for pattern in patterns: # Use glob to find matching filenames. Allow the user to convert # notebooks without having to type the extension. globbed_files = glob.glob(pattern) globbed_files.extend(glob.glob(pattern + '.ipynb')) if not globbed_files: self.log.warn("pattern %r matched no files", pattern) for filename in globbed_files: if not filename in filenames: filenames.append(filename) self.notebooks = filenames
def config(): # spark-default.conf spark_defaults_tmp_location = os.path.join(tmp_dir,"spark-defaults.conf") spark_default_final_location = os.path.join(spark_home,"conf") with open(spark_defaults_tmp_location,'a') as spark_defaults: spark_defaults.write("spark.eventLog.enabled true\n") spark_defaults.write("spark.eventLog.dir {0}\n".format(spark_evlogs)) subprocess.check_call(["/bin/mv",spark_defaults_tmp_location,spark_default_final_location]) # bashrc file with open("/home/hadoop/.bashrc","a") as bashrc: bashrc.write("export SCALA_HOME={0}".format(scala_home)) # spark-env.sh spark_env_tmp_location = os.path.join(tmp_dir,"spark-env.sh") spark_env_final_location = os.path.join(spark_home,"conf") files= glob.glob("{0}/{1}/share/*/*/*/hadoop-*lzo.jar".format(hadoop_apps,hadoop_version)) if len(files) < 1: files=glob.glob("{0}/{1}/share/*/*/*/hadoop-*lzo-*.jar".format(hadoop_apps,hadoop_version)) if len(files) < 1: print "lzo not found inside {0}/{1}/share/".format(hadoop_apps,hadoop_version) else: lzo_jar=files[0] #subprocess.check_call(["/bin/mkdir","-p",spark_log_dir]) subprocess.call(["/bin/mkdir","-p",spark_log_dir]) with open(spark_env_tmp_location,'a') as spark_env: spark_env.write("export SPARK_DAEMON_JAVA_OPTS=\"-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps\"\n") spark_env.write("export SPARK_LOCAL_DIRS={0}\n".format(local_dir)) spark_env.write("export SPARK_LOG_DIR={0}\n".format(spark_log_dir)) spark_env.write("export SPARK_CLASSPATH=\"{0}/emr/*:{1}/emrfs/*:{2}/share/hadoop/common/lib/*:{3}:/home/hadoop/hive/conf/*\"\n".format(spark_classpath,spark_classpath,hadoop_home,lzo_jar)) subprocess.check_call(["mv",spark_env_tmp_location,spark_env_final_location])
def lookup_copyright_notice(self, ufo_folder): current_path = ufo_folder try: contents = open(os.path.join(ufo_folder, 'fontinfo.plist')).read() copyright = self.grep_copyright_notice(contents) if copyright: return copyright except (IOError, OSError): pass while os.path.realpath(self.operator.path) != current_path: # look for all text files inside folder # read contents from them and compare with copyright notice # pattern files = glob.glob(os.path.join(current_path, '*.txt')) files += glob.glob(os.path.join(current_path, '*.ttx')) for filename in files: with open(os.path.join(current_path, filename)) as fp: match = COPYRIGHT_REGEX.search(fp.read()) if not match: continue return match.group(0).strip(',\r\n') current_path = os.path.join(current_path, '..') # go up current_path = os.path.realpath(current_path) return
def copy_build_directories_vs(dist_build, build_dir): """Copy the build/visual-studio directories to the distribution directory. """ buildfiles = __astyle_dir + "/build/" # copy solution files vsdir = '/' + build_dir + '/' dist_astyle_vs20xx = dist_build + vsdir os.mkdir(dist_astyle_vs20xx) slnfiles = glob.glob(buildfiles + vsdir + "*.sln") for sln in slnfiles: shutil.copy(sln, dist_astyle_vs20xx) # build project directories for projdir in ("/AStyle/", "/AStyle Dll/", "/AStyle Java/", "/AStyle Lib/"): dist_astyle_proj = dist_astyle_vs20xx[:-1] + projdir os.mkdir(dist_astyle_proj) # copy project files projfiles = glob.glob(buildfiles + vsdir[:-1] + projdir + "*.*proj") files_copied = 0 for proj in projfiles: files_copied += 1 shutil.copy(proj, dist_astyle_proj) if vsdir[1:-1] >= "vs2010": filtfiles = glob.glob(buildfiles + vsdir[:-1] + projdir + "*.*.filters") for filter_in in filtfiles: files_copied += 1 shutil.copy(filter_in, dist_astyle_proj) # verify number of files copied if files_copied != 2: libastyle.system_exit("Error in number of build files copied: " + str(files_copied))
def strip_symbols(): bin_dir = os.path.join(conf[CONF_BUILDDIR], 'pack', 'bin') ignored = [] def do_strip(fn): run('strip "%s"' % fn) info('stripping: %s' % fn) def should_ignore(path): '''Do not strip python.dll and msvc*.dll ''' name = os.path.basename(path).lower() return name.startswith('python') or name.startswith('msvc') for dll in glob.glob(os.path.join(bin_dir, '*.dll')): if should_ignore(dll): ignored.append(dll) else: do_strip(dll) for exe in glob.glob(os.path.join(bin_dir, '*.exe')): do_strip(exe) info('----------------------------') info('ignored:') for name in ignored: info('>> %s' % name)
def complete(self, txt): """ Returns the next completion for txt, or None if there is no completion. """ if not self.lookup: self.lookup = [] if txt == "" or txt[0] not in "~/": txt = "~/" + txt path = os.path.expanduser(txt) if os.path.isdir(path): files = glob.glob(os.path.join(path, "*")) prefix = txt else: files = glob.glob(path + "*") prefix = os.path.dirname(txt) prefix = prefix.rstrip("/") or "/" for f in files: display = os.path.join(prefix, os.path.basename(f)) if os.path.isdir(f): display += "/" self.lookup.append((display, f)) self.lookup.sort() self.offset = -1 self.lookup.append((txt, txt)) self.offset += 1 if self.offset >= len(self.lookup): self.offset = 0 ret = self.lookup[self.offset] self.thisfinal = ret[1] return ret[0]
def generate_rst(): """generate chX.rst in current working directory""" cwd = os.getcwd() demo_dir = os.path.join(cwd, 'demos') chapters = os.listdir(demo_dir) for chapter in chapters: if not os.path.isdir(os.path.join(demo_dir, chapter)): continue reg_py = os.path.join(demo_dir, chapter, '*.py') scripts = glob.glob(reg_py) rst_file = chapter + '.rst' rst_file = os.path.join(demo_dir, chapter, rst_file) with open(rst_file, 'w') as f: f.write(chapter) f.write('\n========================================\n') for script in scripts: script_name = os.path.basename(script) f.write('\n' + script_name[:-3]) f.write('\n----------------------------------------\n') reg_png = os.path.join(demo_dir, chapter, script_name[:-3] + '*.png') for img in glob.glob(reg_png): img_name = os.path.basename(img) f.write(".. image:: " + img_name + "\n") f.write(".. literalinclude:: " + script_name + "\n")
def install_new_policy(projdir, tmpdir): """ Copies the polkit policy files. """ files = glob.glob(projdir + '/data/*.policy') + \ glob.glob(projdir + '/modules/*/data/*.policy') return _copy_files(files, '/usr/share/polkit-1/actions/', tmpdir)
def main(): files = glob.glob("./scans/*.jpg") files += glob.glob("./scans/*.jpeg") for f in files: reset_stats() print "Processing: " + f.split("/")[len(f.split("/")) - 1] schedule = Schedule() schedule.load_data() if schedule.get_has_schedule(): scan_image(f, schedule) print "Sheet ok? ", while True: cv2.imshow("image", cv2.resize(img, (446, 578))) cv2.moveWindow("image", 0, 0) # user_in = raw_input() key = cv2.waitKey(-1) if key == ord("y"): print "Sheet ok... Dumping data" dump_stats() os.remove(f) break elif key == ord("n"): print "Marking to redo" #os.rename(f, "./scans/redo/" + f.split("/")[len(f.split("/")) - 1]) break elif key == ord("q"): exit(0) else: continue cv2.destroyAllWindows() else: print "Unable to load schedule... Aborting"
def getPurchaseReport(): purchaseFiles = [] for purchases in glob.glob('purchases/purchase*'): purchaseFiles.append(purchases) for purchases in glob.glob('purchases/Item*'): itemlist = purchases item_dict = {} with open(itemlist,'r') as itemfile: all_lines = itemfile.readlines() for i in range(2,len(all_lines)): item = all_lines[i].split() item_dict.update({item[0]:item[1]}) #print item_dict report_dict = {} for i in range(0,len(purchaseFiles)): with open(purchaseFiles[i],'r') as purchasefile: item_total = 0 purchase_list = purchasefile.readlines() #print purchaseFiles[i][19:22] for j in range(2,len(purchase_list)): purchase = purchase_list[j].split() #print float(item_dict[purchase[0]][1:]) #print float(purchase[1]) item_total = float(item_dict[purchase[0]][1:])*float(purchase[1])+item_total #print item_total report_dict.update({int(purchaseFiles[i][21]):"{0:.2f}".format(item_total)}) #print purchaseFiles[i][19:22] print report_dict return report_dict
def test_devtool_reset_all(self): # Check preconditions workspacedir = os.path.join(self.builddir, 'workspace') self.assertTrue(not os.path.exists(workspacedir), 'This test cannot be run with a workspace directory under the build directory') tempdir = tempfile.mkdtemp(prefix='devtoolqa') self.track_for_cleanup(tempdir) self.track_for_cleanup(workspacedir) self.add_command_to_tearDown('bitbake-layers remove-layer */workspace') testrecipe1 = 'mdadm' testrecipe2 = 'cronie' result = runCmd('devtool modify -x %s %s' % (testrecipe1, os.path.join(tempdir, testrecipe1))) result = runCmd('devtool modify -x %s %s' % (testrecipe2, os.path.join(tempdir, testrecipe2))) result = runCmd('devtool build %s' % testrecipe1) result = runCmd('devtool build %s' % testrecipe2) stampprefix1 = get_bb_var('STAMP', testrecipe1) self.assertTrue(stampprefix1, 'Unable to get STAMP value for recipe %s' % testrecipe1) stampprefix2 = get_bb_var('STAMP', testrecipe2) self.assertTrue(stampprefix2, 'Unable to get STAMP value for recipe %s' % testrecipe2) result = runCmd('devtool reset -a') self.assertIn(testrecipe1, result.output) self.assertIn(testrecipe2, result.output) result = runCmd('devtool status') self.assertNotIn(testrecipe1, result.output) self.assertNotIn(testrecipe2, result.output) matches1 = glob.glob(stampprefix1 + '*') self.assertFalse(matches1, 'Stamp files exist for recipe %s that should have been cleaned' % testrecipe1) matches2 = glob.glob(stampprefix2 + '*') self.assertFalse(matches2, 'Stamp files exist for recipe %s that should have been cleaned' % testrecipe2)
def findPR650(ports=None): """DEPRECATED (as of v.1.60.01). Use :func:`psychopy.hardware.findPhotometer()` instead, which finds a wider range of devices """ logging.error("DEPRECATED (as of v.1.60.01). Use psychopy.hardware.findPhotometer() instead, which "\ +"finds a wider range of devices") if ports==None: if sys.platform=='darwin': ports=[] #try some known entries in /dev/tty. used by keyspan ports.extend(glob.glob('/dev/tty.USA*'))#keyspan twin adapter is usually USA28X13P1.1 ports.extend(glob.glob('/dev/tty.Key*'))#some are Keyspan.1 or Keyserial.1 ports.extend(glob.glob('/dev/tty.modem*'))#some are Keyspan.1 or Keyserial.1 if len(ports)==0: logging.error("couldn't find likely serial port in /dev/tty.* Check for \ serial port name manually, check drivers installed etc...") elif sys.platform=='win32': ports = range(11) elif type(ports) in [int,float]: ports=[ports] #so that we can iterate pr650=None logging.info('scanning serial ports...\n\t') logging.console.flush() for thisPort in ports: logging.info(str(thisPort)); logging.console.flush() pr650 = Photometer(port=thisPort, meterType="PR650", verbose=False) if pr650.OK: logging.info(' ...OK\n'); logging.console.flush() break else: pr650=None logging.info('...Nope!\n\t'); logging.console.flush() return pr650
def write_csv_files(self, overwrite=False): self.extract_images() for setn in ('train', 'val'): img_dir = os.path.join(self.out_dir, setn) csvfile = getattr(self, setn + '_file') neon_logger.display("Getting %s file list" % (setn)) if os.path.exists(csvfile) and not overwrite: neon_logger.display("File %s exists, not overwriting" % (csvfile)) continue flines = [] subdirs = glob(os.path.join(img_dir, '*')) for subdir in subdirs: subdir_label = os.path.basename(subdir) # This is the int label files = glob(os.path.join(subdir, self.file_pattern)) flines += [(filename, subdir_label) for filename in files] if setn == 'train': np.random.seed(0) np.random.shuffle(flines) with gzip.open(csvfile, 'wb') as f: f.write('filename,l_id\n') for tup in flines: f.write('{},{}\n'.format(*tup))
def repackage_archive_zip_to_pmc_zip(self, doi_id): "repackage the zip file in the TMP_DIR to a PMC zip format" # unzip contents zip_input_dir = os.path.join(self.get_tmp_dir(), self.TMP_DIR) zip_extracted_dir = os.path.join(self.get_tmp_dir(), self.JUNK_DIR) zip_renamed_files_dir = os.path.join(self.get_tmp_dir(), self.RENAME_DIR) pmc_zip_output_dir = os.path.join(self.get_tmp_dir(), self.INPUT_DIR) archive_zip_name = glob.glob(zip_input_dir + "/*.zip")[0] with zipfile.ZipFile(archive_zip_name, 'r') as myzip: myzip.extractall(zip_extracted_dir) # rename the files and profile the files file_name_map = article_processing.rename_files_remove_version_number( files_dir = zip_extracted_dir, output_dir = zip_renamed_files_dir ) if self.logger: self.logger.info("FTPArticle running %s workflow for article %s, file_name_map" % (self.workflow, self.doi_id)) self.logger.info(file_name_map) # convert the XML article_xml_file = glob.glob(zip_renamed_files_dir + "/*.xml")[0] article_processing.convert_xml(xml_file=article_xml_file, file_name_map=file_name_map) # rezip the files into PMC zip format soup = parser.parse_document(article_xml_file) volume = parser.volume(soup) pmc_zip_file_name = article_processing.new_pmc_zip_filename(self.journal, volume, doi_id) with zipfile.ZipFile(os.path.join(pmc_zip_output_dir, pmc_zip_file_name), 'w', zipfile.ZIP_DEFLATED, allowZip64=True) as new_zipfile: dirfiles = article_processing.file_list(zip_renamed_files_dir) for df in dirfiles: filename = df.split(os.sep)[-1] new_zipfile.write(df, filename) return True
def _load_imdb(self): dir_path = os.path.join(config.data_path, self.folder) data_path = os.path.join(dir_path, self.which_set) pos_path = os.path.join(data_path, 'pos') neg_path = os.path.join(data_path, 'neg') files = glob.glob(pos_path+'/*.txt') pos_strings = [open(f, 'r').read() for f in files] pos_labels = np.ones(len(files)) files = glob.glob(neg_path+'/*.txt') neg_strings = [open(f, 'r').read() for f in files] neg_labels = np.zeros(len(files)) targets = np.hstack((pos_labels, neg_labels)) targets = numpy.array(targets, dtype='int32').reshape((-1, 1)) features = np.array(pos_strings + neg_strings) #n = 25000 / 2 #features = features[n-1000:n+1000] #targets = targets[n-1000:n+1000] self.num_examples = len(features) if self.sorted == True: index = np.vectorize(len)(features).argsort() features = features[index] targets = targets[index] return (features, targets)
def setRepositoryRevisions(self): # expand possible environment variables in paths if isinstance(self._args.repo_scan_base_dirs, basestring): self._args.repo_scan_base_dirs = [self._args.repo_scan_base_dirs] self._args.repo_scan_base_dirs = [os.path.expandvars(repoScanBaseDir) for repoScanBaseDir in self._args.repo_scan_base_dirs] # construct possible scan paths subDirWildcards = ["*/" * level for level in range(self._args.repo_scan_depth+1)] scanDirWildcards = [os.path.join(repoScanBaseDir, subDirWildcard) for repoScanBaseDir in self._args.repo_scan_base_dirs for subDirWildcard in subDirWildcards] # globbing and filter for directories scanDirs = tools.flattenList([glob.glob(scanDirWildcard) for scanDirWildcard in scanDirWildcards]) scanDirs = [scanDir for scanDir in scanDirs if os.path.isdir(scanDir)] # key: directory to check type of repository # value: command to extract the revision repoVersionCommands = { ".git" : "git rev-parse HEAD", ".svn" : "svn info"# | grep Revision | awk '{print $2}'" } # loop over dirs and revision control systems and write revisions to the config dict for repoDir, currentRevisionCommand in repoVersionCommands.items(): repoScanDirs = [os.path.join(scanDir, repoDir) for scanDir in scanDirs] repoScanDirs = [glob.glob(os.path.join(scanDir, repoDir)) for scanDir in scanDirs] repoScanDirs = tools.flattenList([glob.glob(os.path.join(scanDir, repoDir)) for scanDir in scanDirs]) repoScanDirs = [os.path.abspath(os.path.join(repoScanDir, "..")) for repoScanDir in repoScanDirs] for repoScanDir in repoScanDirs: popenCout, popenCerr = subprocess.Popen(currentRevisionCommand.split(), stdout=subprocess.PIPE, cwd=repoScanDir).communicate() self._config[repoScanDir] = popenCout.replace("\n", "")
def cleanupFiles(): # First get rid of modified files for l in ["l1", "l2", "l3"]: arcpy.Delete_management(l) for f in glob.glob("C:\\Arctmp\\*"): try: shutil.rmtree(f) except: print "UNABLE TO REMOVE:", f # Now remove the old directory for i in xrange(0, 1000000): new_workspace = "C:\\Arctmp\\workspace." + str(i) if not os.path.exists(new_workspace): break print "TESTING USING WORKSPACE", new_workspace # Now move in fresh copies shutil.copytree("C:\\Arcbase", new_workspace) print "CONTENTS:" arcpy.env.workspace = new_workspace for f in sorted(glob.glob(arcpy.env.workspace + "\\*.shp")): print f for f in sorted(glob.glob(arcpy.env.workspace + "\\*.lyr")): print f for f in sorted(glob.glob(arcpy.env.workspace + "\\*.gdb")): print f
def from_directory(cls, dirname): print "Loading from directory: ", dirname filenames = glob.glob(dirname+"/*.fits") + glob.glob(dirname+"/*.fits.gz") print 'got %d files' % len(filenames) cat_name=dirname.strip(os.path.sep).split(os.path.sep)[-1] cat = cls.from_multiple_fits(filenames, cat_name) return cat