def check_dir(self, path): def visit(arg, dirname, filenames): for filename in filenames: self.check_file(os.path.join(dirname, filename)) walk(path, visit, None) return not self.failed_paths
def _get_issues(self): for name in self.files_list: dataset_link = join(self.path_unpacked, name.split(".")[0], "dataset.xml") try: dataset_xml = parse(dataset_link) except Exception, err: register_exception(alert_admin=True, prefix="Elsevier error reading dataset.xml file.") self.logger.error("Error reading dataset.xml file: %s" % (dataset_link,)) print >> sys.stdout, "\nError reading dataset.xml file: %s" % (dataset_link,) continue journal_issues = dataset_xml.getElementsByTagName("journal-issue") if journal_issues: for journal_issue in journal_issues: filename = xml_to_text( journal_issue.getElementsByTagName("ml")[0].getElementsByTagName("pathname")[0] ) self.logger.info("Found issue %s in %s." % (filename, name)) pathname = join(self.path_unpacked, name.split(".")[0], filename) self.found_issues.append(pathname) else: def visit(arg, dirname, names): if "issue.xml" in names: self.found_issues.append(join(dirname, "issue.xml")) walk(join(self.path_unpacked, name.split(".")[0]), visit, None)
def __init__(self, session, args = None): Screen.__init__(self, session) Screen.setTitle(self, _("Skin setup")) self.skinlist = [] self.previewPath = "" path.walk(self.root, self.find, "") self["key_red"] = StaticText(_("Close")) self["key_green"] = StaticText(_("Save")) self["introduction"] = StaticText(_("Press OK to activate the selected skin.")) self["SkinList"] = MenuList(self.skinlist) self["Preview"] = Pixmap() self.skinlist.sort() self["actions"] = NumberActionMap(["SetupActions", "DirectionActions", "TimerEditActions", "ColorActions"], { "ok": self.ok, "cancel": self.close, "red": self.close, "green": self.ok, "up": self.up, "down": self.down, "left": self.left, "right": self.right, "log": self.info, }, -1) self.onLayoutFinish.append(self.layoutFinished)
def dir_list(dir, filter_list=(), filter_excludes=True): """Returns a list of files in 'dir' and its subdirs.""" global cache_dir import fnmatch #debug('dir_list: listing in %s' %dir) if dir in cache_dir: #debug('dir_list: using dir cache.') return cache_dir[dir] def append_files(arg, dir, fnames): arg.extend(map(lambda s:path.join(dir, s), fnames)) def filter(file): if path.isdir(file): return True elif filter_list: file = file[len(dir):] # pattern shouldn't match home dir for pattern in filter_list: if fnmatch.fnmatch(file, pattern): return filter_excludes return not filter_excludes file_list = [] filter_list = filter_list or get_config_log_filter() #debug('filter: %s' %filter_list) path.walk(dir, append_files, file_list) # get a list of log files, including in subdirs file_list = [ file for file in file_list if not filter(file) ] cache_dir[dir] = file_list return file_list
def _scan(self, file_object): import yara scan_results = [] full_yara_sig_paths = [] if path.exists(self._yara_rules_dir): path.walk(self._yara_rules_dir, self._scan_with_yara_sig, full_yara_sig_paths) yara_namespace_dict = dict([(self._make_namespace_from_path(rule_path),rule_path) for rule_path in full_yara_sig_paths]) rules = yara.compile(filepaths = yara_namespace_dict) result = None try: # Attempt to match the file to one or more rules result = rules.match(data=file_object.all_content) #todo narrow exception and print a bad file except: pass if result: scan_results.append(result) else: raise YaraRuleDirectoryNotFound() return scan_results
def runOneOrMany(options): """ Runs our main transformer with each of the input files. """ infile, outfile = options.inputfile, options.outputfile if infile and not isinstance(infile, file) and path.isdir(infile): if outfile and not isinstance(outfile, file) and not path.isdir(outfile): warning('Must specify output directory or stdout when using input directory.') return 2 def walker(arg, dirname, files): for name in [name for name in files if name.endswith('.java')]: fullname = path.join(dirname, name) options.inputfile = fullname info('opening %s', fullname) if outfile and outfile != '-' and not isinstance(outfile, file): full = path.abspath(path.join(outfile, fullname)) head, tail = path.split(full) tail = path.splitext(tail)[0] + '.py' if not path.exists(head): makedirs(head) options.outputfile = path.join(head, tail) runTransform(options) path.walk(infile, walker, None) return 0 else: return runTransform(options)
def _changed(self): mtime = 0 filelist = [] try: fp = expandpath(self.filepath) mtime = stat(fp)[8] # some Windows directories don't change mtime # when a file in them changes :-( # So keep a list of files as well, and see if that # changes path.walk(fp, _walker, filelist) filelist.sort() except: from zLOG import LOG, ERROR import sys LOG('DirectoryView', ERROR, 'Error checking for directory modification', error=sys.exc_info()) if mtime != self._v_last_read or filelist != self._v_last_filelist: self._v_last_read = mtime self._v_last_filelist = filelist return 1 return 0
def run(self, db_host, db_user, db_pass, db_db): def dir_visitor(arg, directory, files): if directory.endswith("src"): dock_command = __ant_command__ + ["dock", "-Dsource=" + directory, "-Dsource_name=" + self.source_name, "-Dsource_package=" + self.source_package, "-Dsource_url=" + self.source_url, "-Ddb_host=" + db_host, "-Ddb_user="******"-Ddb_pass="******"-Ddb_db=" + db_db] print(" ".join(dock_command)) call(dock_command) walk(self.source_path, dir_visitor, None) # Now, call source for each source file. def dir_visitor(arg, directory, files): for f in files: if f.endswith("java"): source_command = __ant_command__ + ["source", "-Dsource=" + directory + "/" + f, "-Dsource_name=" + self.source_name, "-Dsource_package=" + self.source_package, "-Dsource_url=" + self.source_url, "-Ddb_host=" + db_host, "-Ddb_user="******"-Ddb_pass="******"-Ddb_db=" + db_db] print(" ".join(source_command)) call(source_command) walk(self.source_path, dir_visitor, None)
def path_checksum(paths): """ Recursively calculates a checksum representing the contents of all files found with a sequence of file and/or directory paths. credits for this function : David Moss url: http://code.activestate.com/recipes/576973-getting-the-sha-1-or-md5-hash-of-a-directory/ """ if not hasattr(paths, '__iter__'): raise TypeError('sequence or iterable expected not %r!' % type(paths)) def _update_checksum(checksum, dirname, filenames): for filename in sorted(filenames): path = path_join(dirname, filename) if isfile(path): fh = open(path, 'rb') while 1: buf = fh.read(4096) if not buf : break checksum.update(buf) fh.close() chksum = sha1() for path in sorted([normpath(f) for f in paths]): if path_exists(path): if isdir(path): walk(path, _update_checksum, chksum) elif isfile(path): _update_checksum(chksum, dirname(path), basename(path)) return chksum.hexdigest()
def path_checksum(paths): """ Recursively calculates a checksum representing the contents of all files found with a sequence of file and/or directory paths. """ if not hasattr(paths, '__iter__'): raise TypeError('sequence or iterable expected not %r!' % type(paths)) def _update_checksum(checksum, dirname, filenames): for filename in sorted(filenames): path = path_join(dirname, filename) if isfile(path): print path fh = open(path, 'rb') while 1: buf = fh.read(4096) if not buf: break checksum.update(buf) fh.close() chksum = hashlib.sha1() for path in sorted([normpath(f) for f in paths]): if path_exists(path): if isdir(path): walk(path, _update_checksum, chksum) elif isfile(path): _update_checksum(chksum, dirname(path), basename(path)) return chksum.hexdigest()
def handle(self, *args, **options): self.directory = options.get('dir', None) if not self.directory: print "Please specify full path to directory with the --dir argument" return self.runArchiveUpdate() walk(self.directory, self.processDir, None)
def __init__(self, session, args=None): Screen.__init__(self, session) self.skinlist = [] self.previewPath = "" path.walk(self.root, self.find, "") self["key_red"] = StaticText(_("Close")) self["introduction"] = StaticText( _("Press OK to activate the selected skin.")) self.skinlist.sort() self["SkinList"] = MenuList(self.skinlist) self["Preview"] = Pixmap() self["actions"] = NumberActionMap( ["WizardActions", "InputActions", "EPGSelectActions"], { "ok": self.ok, "back": self.close, "red": self.close, "up": self.up, "down": self.down, "left": self.left, "right": self.right, "info": self.info, }, -1) self.onLayoutFinish.append(self.layoutFinished)
def get_algorithm_classes(package=ALGORITHM_PACKAGE): """ Read all classes located in the algorithm package. Assumes there is a package directory on workdir level, containing ONLY modules with classes in them, which inherit from the abstract class Algorithm. There can be multiple classes per module. Nested packages are not possible atm. Args: package (str): the name of the algorithm package. Returns (List): a list of classes. """ algorithm_names = set() if path.isdir(package): path.walk(package, __visit_algorithms__, algorithm_names) algorithm_classes = [] for algorithm_name in algorithm_names: algorithm_package_path = package + "." + algorithm_name algorithm_module = import_module(algorithm_package_path) if inspect.ismodule(algorithm_module): for name, obj in inspect.getmembers(algorithm_module): if inspect.isclass(obj) and name != 'Algorithm': algorithm_classes.append(obj) else: Log.log("Module " + algorithm_package_path + " could not be loaded") return algorithm_classes
def runOneOrMany(options): """ Runs our main transformer with each of the input files. """ infile, outfile = options.inputfile, options.outputfile if infile and not isinstance(infile, file) and path.isdir(infile): if outfile and not isinstance(outfile, file) and not path.isdir(outfile): warning( 'Must specify output directory or stdout when using input directory.' ) return 2 def walker(arg, dirname, files): for name in [name for name in files if name.endswith('.java')]: fullname = path.join(dirname, name) options.inputfile = fullname info('opening %s', fullname) if outfile and outfile != '-' and not isinstance( outfile, file): full = path.abspath(path.join(outfile, fullname)) head, tail = path.split(full) tail = path.splitext(tail)[0] + '.py' if not path.exists(head): makedirs(head) options.outputfile = path.join(head, tail) runTransform(options) path.walk(infile, walker, None) return 0 else: return runTransform(options)
def build_inputs(self): root_path = path.join(self._path_to_root, self._sample_name) root_path = path.expanduser(root_path) sample_dirs = [] path.walk(root_path, get_leaf_dirs, sample_dirs) proc_groups = {} for dir in sample_dirs: dirroot, name = path.split(dir) chanroot, procname = path.split(dirroot) dontcare, channame = path.split(chanroot) if channame not in proc_groups: proc_groups[channame] = {} if procname not in proc_groups[channame]: proc_groups[channame][procname] = [] isMod = [True] if (path.exists('%s.root' % dir)): path.walk(dir, needs_update, isMod) if isMod[0]: print 'Creating input file %s.root' % name command = 'hadd -v 0 -f %s.root %s/*.root' % (dir, dir) subprocess.call(command, shell=True) proc_groups[channame][procname].append('%s.root' % dir) self._proc_groups = proc_groups
def export(from_dir, to_dir, blacklist=BASE_BLACKLIST, ignore_ext=IGNORED_EXTENSIONS, verbose=0): """make a mirror of `from_dir` in `to_dir`, omitting directories and files listed in the black list or ending with one of the given extensions :type from_dir: str :param from_dir: directory to export :type to_dir: str :param to_dir: destination directory :type blacklist: list or tuple :param blacklist: list of files or directories to ignore, default to the content of `BASE_BLACKLIST` :type ignore_ext: list or tuple :param ignore_ext: list of extensions to ignore, default to the content of `IGNORED_EXTENSIONS` :type verbose: bool :param verbose: flag indicating wether information about exported files should be printed to stderr, default to False """ def make_mirror(_, directory, fnames): """walk handler""" for norecurs in blacklist: try: fnames.remove(norecurs) except ValueError: continue for filename in fnames: # don't include binary files for ext in ignore_ext: if filename.endswith(ext): break else: src = join(directory, filename) dest = to_dir + src[len(from_dir):] if verbose: print(src, '->', dest, file=sys.stderr) if isdir(src): if not exists(dest): mkdir(dest) else: if exists(dest): remove(dest) shutil.copy2(src, dest) try: mkdir(to_dir) except OSError: pass walk(from_dir, make_mirror, None)
def main(): global srcsite, dstsite usage = "usage: %prog [options] <fss> <src URL> <dst URL>" parser = optparse.OptionParser(usage=usage) parser.add_option parser.add_option("-u", "--user", default="admin", help="Plone user") parser.add_option("-p", "--password", default="leam4z", help="Plone user's password") (opts, args) = parser.parse_args() if len(args) != 3: parser.error("the URL and the fss path are required") else: fss, src, dst = args srcsite = LEAMsite(src, user=opts.user, passwd=opts.password) if srcsite.error: parser.error("the URL '%s' is not a valid site" % src) dstsite = LEAMsite(dst, user=opts.user, passwd=opts.password) if dstsite.error: parser.error("the URL '%s' is not a valid site" % dst) walk(fss, processFSS, (fss, src, dst))
def _crawl_elsevier_and_find_issue_xml(self): """ Information about the current volume, issue, etc. is available in a file called issue.xml that is available in a higher directory. """ self._found_issues = [] if not self.path and not self.package_name: for issue in self.conn._get_issues(): dirname = issue.rstrip('/issue.xml') try: self._normalize_issue_dir_with_dtd(dirname) self._found_issues.append(dirname) except Exception as err: register_exception() print("ERROR: can't normalize %s: %s" % (dirname, err)) else: def visit(dummy, dirname, names): if "issue.xml" in names: try: self._normalize_issue_dir_with_dtd(dirname) self._found_issues.append(dirname) except Exception as err: register_exception() print("ERROR: can't normalize %s: %s" % (dirname, err)) walk(self.path, visit, None)
def find_data_files(srcdir, destdir, *wildcards, **kw): """ get a list of all files under the srcdir matching wildcards, returned in a format to be used for install_data """ def walk_helper(arg, dirname, files): if '.svn' in dirname: return names = [] lst, wildcards, dirnameconverter, destdir = arg for wc in wildcards: wc_name = os.path.normpath(os.path.join(dirname, wc)) for f in files: filename = os.path.normpath(os.path.join(dirname, f)) if fnmatch.fnmatch(filename, wc_name) and not os.path.isdir(filename): names.append(filename) if names: destdirname = dirnameconverter.sub(destdir, dirname) lst.append( (destdirname, names ) ) file_list = [] recursive = kw.get('recursive', True) converter = re.compile('^({0})'.format(srcdir)) if recursive: walk(srcdir, walk_helper, (file_list, wildcards, converter, destdir)) else: walk_helper((file_list, wildcards, converter, destdir), srcdir, [os.path.basename(f) for f in glob.glob(os.path.join(srcdir, '*'))]) return file_list
def _crawl_elsevier_and_find_main_xml(self): """ A package contains several subdirectory corresponding to each article. An article is actually identified by the existence of a main.pdf and a main.xml in a given directory. """ self.found_articles = [] if not self.path and not self.package_name: for doc in self.conn.found_articles: dirname = doc['xml'].rstrip('/main.xml') try: self._normalize_article_dir_with_dtd(dirname) self.found_articles.append(dirname) except Exception as err: register_exception() print("ERROR: can't normalize %s: %s" % (dirname, err)) else: def visit(dummy, dirname, names): if "main.xml" in names and "main.pdf" in names: try: self._normalize_article_dir_with_dtd(dirname) self.found_articles.append(dirname) except Exception as err: register_exception() print("ERROR: can't normalize %s: %s" % (dirname, err)) walk(self.path, visit, None)
def _scan(self, file_object): import yara scan_results = [] full_yara_sig_paths = [] if path.exists(self._yara_rules_dir): path.walk(self._yara_rules_dir, self._scan_with_yara_sig, full_yara_sig_paths) yara_namespace_dict = dict([ (self._make_namespace_from_path(rule_path), rule_path) for rule_path in full_yara_sig_paths ]) rules = yara.compile(filepaths=yara_namespace_dict) result = None try: # Attempt to match the file to one or more rules result = rules.match(data=file_object.all_content) #todo narrow exception and print a bad file except: pass if result: scan_results.append(result) else: raise YaraRuleDirectoryNotFound() return scan_results
def _crawl_springer_and_find_main_xml(self): """ A package contains several subdirectory corresponding to each article. An article is actually identified by the existence of a main.pdf and a main.xml in a given directory. """ self.found_articles = [] def visit(arg, dirname, names): files = [filename for filename in names if "nlm.xml" in filename] if not files: files = [filename for filename in names if ".xml.scoap" in filename] if files: try: # self._normalize_article_dir_with_dtd(dirname) self.found_articles.append(dirname) except Exception as err: register_exception() print "ERROR: can't normalize %s: %s" % (dirname, err) if hasattr(self, 'path_unpacked'): for path in self.path_unpacked: walk(path, visit, None) elif self.path: walk(self.path, visit, None) else: self.logger.info("Nothing to do.")
def load_data(basepath): data = [] def load_file(fname): #debug("Loading data from: {0}".format(fname)) with codecs.open(fname, encoding='utf-8') as fp: lines = fix_chars(fp.read()).splitlines() #lines = fp.read().splitlines() if len(lines) > 3: cats = [c.strip() for c in lines[0].split(",")] pid = int(lines[1].strip()) title = lines[2] desc = lines[3] data.append([cats, pid, title, desc]) else: warn("Invalid input file: {0}. Lines: {1}. Skipping." .format(fname, len(lines))) def cb(arg, dirname, fnames): #print("Walking: {0}".format(dirname)) for fname in fnames: if fname.endswith(".txt"): load_file(pathjoin(dirname, fname)) walk(basepath, cb, None) return data
def build_inputs(self): root_path = path.join(self._path_to_root,self._sample_name) root_path = path.expanduser(root_path) sample_dirs = [] path.walk(root_path,get_leaf_dirs,sample_dirs) proc_groups = {} for dir in sample_dirs: dirroot,name = path.split(dir) chanroot,procname = path.split(dirroot) dontcare,channame = path.split(chanroot) if channame not in proc_groups: proc_groups[channame] = {} if procname not in proc_groups[channame]: proc_groups[channame][procname] = [] isMod = [True] if(path.exists('%s.root'%dir)): path.walk(dir,needs_update,isMod) if isMod[0]: print 'Creating input file %s.root'%name command = 'hadd -v 0 -f %s.root %s/*.root'%(dir,dir) subprocess.call(command,shell=True) proc_groups[channame][procname].append('%s.root'%dir) self._proc_groups = proc_groups
def dir_list(dir, filter_list=(), filter_excludes=True): """Returns a list of files in 'dir' and its subdirs.""" global cache_dir import fnmatch #debug('dir_list: listing in %s' %dir) if dir in cache_dir: #debug('dir_list: using dir cache.') return cache_dir[dir] def append_files(arg, dir, fnames): arg.extend(map(lambda s: path.join(dir, s), fnames)) def filter(file): if path.isdir(file): return True elif filter_list: file = file[len(dir):] # pattern shouldn't match home dir for pattern in filter_list: if fnmatch.fnmatch(file, pattern): return filter_excludes return not filter_excludes file_list = [] filter_list = filter_list or get_config_log_filter() #debug('filter: %s' %filter_list) path.walk(dir, append_files, file_list) # get a list of log files, including in subdirs file_list = [file for file in file_list if not filter(file)] cache_dir[dir] = file_list return file_list
def scan_store(name, dry=False): stats = dict(fnew=0, fdiffers=0, ftotal=0, fskipped=0, size_new=0, size_skipped=0) if name not in ICENV['WATCHED']: raise IOError('store does not exist!') path = ICENV['WATCHED'][name] stats['path'] = path logger = init_logger(join(LOG_DIR, '{}-{}-{}.log'.format(name, ICENV['ALGORITHM'], now()))) sql_hasher = SQLhash(dbfile(name, ICENV), ICENV['ALGORITHM']) tstart = time() try: if not dry: walk(path, rescan, (ICENV, logger, sql_hasher, stats)) except KeyboardInterrupt: logger.debug('caught KeyboardInterrupt; stop!') except Exception as err: logger.debug('undefined error: {}'.format(err)) raise err tstop = time() stats['size'] = sql_hasher.size() stats['tdiff'] = tstop - tstart stats['speed'] = (stats['size_new'] / pow(1024, 2)) / (stats['tdiff'] or 1) stats['line'] = 79 * '-' stats['algorithm'] = ICENV['ALGORITHM'] stats['label'] = name stats['filecount'] = sql_hasher.length() stats['runtime'] = round(tstop - tstart, 5) stats['size_sum'] = grab_unit(stats['size']) stats['took'] = ftime(stats['tdiff']) for key in ['size_new', 'size_skipped']: stats[key] = grab_unit(stats[key]) return stats
def __init__(self, session, args = None): self.skin = SkinSelector.skin Screen.__init__(self, session) self.skinlist = [] self.previewPath = "" path.walk(self.root, self.find, "") self.skinlist.sort() self["SkinList"] = MenuList(self.skinlist) self["Preview"] = Pixmap() self["actions"] = NumberActionMap(["WizardActions", "InputActions", "EPGSelectActions"], { "ok": self.ok, "back": self.close, "up": self.up, "down": self.down, "left": self.left, "right": self.right, "info": self.info, }, -1) self.onLayoutFinish.append(self.layoutFinished)
def path_checksum(paths): """ Recursively calculates a checksum representing the contents of all files found with a sequence of file and/or directory paths. http://code.activestate.com/recipes/576973-getting-the-sha-1-or-md5-hash-of-a-directory/ """ if not hasattr(paths, '__iter__'): self.failed_builds_counter += 1 raise TypeError('sequence or iterable expected not %r!' % type(paths)) def _update_checksum(checksum, dirname, filenames): """ Update the checksum for a file """ for filename in sorted(filenames): path = path_join(dirname, filename) if isfile(path): #print path file_handler = open(path, 'rb') while 1: buf = file_handler.read(4096) if not buf: break checksum.update(buf) file_handler.close() chksum = hashlib.sha1() for path in sorted([normpath(f) for f in paths]): if path_exists(path): if isdir(path): walk(path, _update_checksum, chksum) elif isfile(path): _update_checksum(chksum, dirname(path), basename(path)) return chksum.hexdigest()
def __init__(self, session, args = None): Screen.__init__(self, session) self.skinlist = [] self.previewPath = "" path.walk(self.root, self.find, "") self.skinlist.sort() self["SkinList"] = MenuList(self.skinlist) self["Preview"] = Pixmap() self["lab1"] = Label(_("Select skin:")) self["lab2"] = Label(_("Preview:")) self["lab3"] = Label(_("Select your skin and press OK to activate the selected skin.")) self["actions"] = NumberActionMap(["WizardActions", "InputActions", "EPGSelectActions"], { "ok": self.ok, "back": self.close, "up": self.up, "down": self.down, "left": self.left, "right": self.right, }, -1) self.onLayoutFinish.append(self.layoutFinished)
def scanAudiobooks(self): self._book_files = [] self._book_cover = '' db = sqlite3.connect('library.db') c = db.cursor() for book in listdir(self.mediaPath): if path.isdir(path.join(self.mediaPath,book)): print 'Found: ', book c.execute("SELECT * FROM audiobook where foldername=?", (book,)) if len(c.fetchall()) == 0: self._book_files = [] self._book_cover = '' self.length = 0 c.execute("INSERT INTO audiobook (title, foldername) values (?,?)",(book, book,)) rowid = c.lastrowid print 'Inserted book: %s with ID %i' % (book, rowid) path.walk(path.abspath(path.join(self.mediaPath,book)),self.scanAudiobooks_walker,id) firstfile = True i=1 for bookfile in sorted(self._book_files, key=itemgetter('filepath')): values = (rowid,bookfile['title'],bookfile['filepath'],bookfile['length'],i,) c.execute("INSERT INTO audiobookfiles (book,title,filepath,length,number) values (?,?,?,?,?)", values) i = i+1 c.execute("Update audiobook SET title=?, author=?, cover=?, length=?, progress_part=? where id=?", (self._book_files[0]['booktitle'],self._book_files[0]['author'],self._book_cover,self.length,1,rowid,)) db.commit() db.close()
def __init__(self, session, args=None): Screen.__init__(self, session) self.skinlist = [] self.previewPath = "" path.walk(self.root, self.find, "") self["key_red"] = StaticText(_("Close")) self["introduction"] = StaticText( _("Press OK to activate the selected skin.")) self.skinlist.sort() self["SkinList"] = MenuList(self.skinlist) self["SkinList"].onSelectionChanged.append(self.loadPreview) self["Preview"] = Pixmap() self["actions"] = NumberActionMap( ["EPGSelectActions", "OkCancelActions"], { "ok": self.ok, "cancel": self.close, "red": self.close, "info": self.info, }) self.onLayoutFinish.append(self.layoutFinished)
def stk_checksum(compare=False): """Recursively calculates a checksum representing the contents of all python files from STK. :param compare: compares stk's python files checksum against saved one :return: True if equal, False if not """ def _update_checksum(checksum, dir_, filenames): """updates checksum, called for each directory through walk :param checksum: algo to use :param dir_: dir name :param filenames: list of filenames """ for filename in sorted(filenames): path = join(dir_, filename) if isfile(path) and path.endswith('.py'): with open(path, 'rb') as fptr: while 1: buf = fptr.read(4096) if not buf: break checksum.update(buf) chksum = sha1() mydir = dirname(abspath(__file__)) walk(mydir, _update_checksum, chksum) if compare: with open(join(mydir, 'stk_hash.sha1'), 'rb') as chkf: saved = chkf.read() return saved == chksum.hexdigest() return chksum.hexdigest()
def _crawl_oxford_and_find_main_xml(self): """ A package contains several subdirectory corresponding to each article. An article is actually identified by the existence of a main.pdf and a main.xml in a given directory. """ self.found_articles = [] def visit(arg, dirname, names): files = [filename for filename in names if ".xml" in filename] if files: try: for f in files: self.found_articles.append(join(dirname, f)) except Exception as err: register_exception() print("ERROR: can't normalize %s: %s" % (dirname, err), file=sys.stderr) if hasattr(self, 'path_unpacked'): walk(self.path_unpacked, visit, None) elif self.path: walk(self.path, visit, None) else: self.logger.info("Nothing to do.")
def path_checksum(self, paths): """ Recursively calculates a checksum representing the contents of all files found with a sequence of file and/or directory paths. """ if not hasattr(paths, '__iter__'): raise TypeError('sequence or iterable expected not %r!' % type(paths)) def _update_checksum(checksum, dirname, filenames): for filename in sorted(filenames): path = path_join(dirname, filename) if isfile(path): fh = open(path, 'rb') while 1: buf = fh.read(4096) if not buf : break checksum.update(buf) fh.close() chksum = hashlib.sha1() for path in sorted([normpath(f) for f in paths]): if path_exists(path): if isdir(path): walk(path, _update_checksum, chksum) elif isfile(path): _update_checksum(chksum, dirname(path), basename(path)) return chksum.hexdigest()
def _changed(self): mtime=0 filelist=[] try: fp = expandpath(self.filepath) mtime = stat(fp)[8] # some Windows directories don't change mtime # when a file is added to or deleted from them :-( # So keep a list of files as well, and see if that # changes path.walk(fp,_walker,filelist) filelist.sort() except: LOG('DirectoryView', ERROR, 'Error checking for directory modification', error=exc_info()) if mtime != self._v_last_read or filelist != self._v_last_filelist: self._v_last_read = mtime self._v_last_filelist = filelist return 1 return 0
def main(): parser = argparse.ArgumentParser(description="Utility to add custom labels to TCGA feature matrix in MongoDB") parser.add_argument("--host", required=True, help="MongoDB host name") parser.add_argument("--port", required=True, type=int, help="MongoDB port") parser.add_argument("--db", required=True, help="Database name") parser.add_argument("--tumor", required=True, help="Tumor type") parser.add_argument("--root", required=True, help="Root path to search for FFN custom files") parser.add_argument("--dir", default="aux", help="directory to look for FFN custom files") parser.add_argument("--loglevel", default="INFO", help="Logging Level") args = parser.parse_args() configure_logging(args.loglevel.upper()) logging.info('starting add custom labels to feature matrix:\n\t%s' % (args)) args.topfiles = []; args.tumorfiles = [] path.walk path.walk(args.root, findFNFfiles, args) logging.info('%s %s', args.topfiles, args.tumorfiles) conn = pymongo.Connection(args.host, args.port) collection = conn[args.db]["feature_matrix"] if 0 == len(args.topfiles): raise ValueError('did not find a general custom file') else: for topfile in args.topfiles: addLabels(collection, topfile); if 0 == len(args.tumorfiles): logging.info('did not find a tumor type custom file') else: for tumorfile in args.tumorfiles: addLabels(collection, tumorfile); conn.close() logging.info('finished add custom labels to feature matrix')
def load_data(basepath): data = [] def load_file(fname): #debug("Loading data from: {0}".format(fname)) with codecs.open(fname, encoding='utf-8') as fp: lines = fix_chars(fp.read()).splitlines() #lines = fp.read().splitlines() if len(lines) > 3: cats = [c.strip() for c in lines[0].split(",")] pid = int(lines[1].strip()) title = lines[2] desc = lines[3] data.append([cats, pid, title, desc]) else: warn("Invalid input file: {0}. Lines: {1}. Skipping.".format( fname, len(lines))) def cb(arg, dirname, fnames): #print("Walking: {0}".format(dirname)) for fname in fnames: if fname.endswith(".txt"): load_file(pathjoin(dirname, fname)) walk(basepath, cb, None) return data
def find_data_files(srcdir, destdir, *wildcards, **kw): """ get a list of all files under the srcdir matching wildcards, returned in a format to be used for install_data """ def walk_helper(arg, dirname, files): if '.svn' in dirname: return names = [] lst, wildcards, dirnameconverter, destdir = arg for wc in wildcards: wc_name = os.path.normpath(os.path.join(dirname, wc)) for f in files: filename = os.path.normpath(os.path.join(dirname, f)) if fnmatch.fnmatch(filename, wc_name) and not os.path.isdir(filename): names.append(filename) if names: destdirname = dirnameconverter.sub(destdir, dirname) lst.append((destdirname, names)) file_list = [] recursive = kw.get('recursive', True) converter = re.compile('^({0})'.format(srcdir)) if recursive: walk(srcdir, walk_helper, (file_list, wildcards, converter, destdir)) else: walk_helper((file_list, wildcards, converter, destdir), srcdir, [ os.path.basename(f) for f in glob.glob(os.path.join(srcdir, '*')) ]) return file_list
def _changed(self): mtime = 0 filelist = [] try: mtime = stat(self._filepath)[8] if platform == 'win32': # some Windows directories don't change mtime # when a file is added to or deleted from them :-( # So keep a list of files as well, and see if that # changes path.walk(self._filepath, self._walker, filelist) filelist.sort() except: LOG('DirectoryView', ERROR, 'Error checking for directory modification', error=exc_info()) if mtime != self._v_last_read or filelist != self._v_last_filelist: self._v_last_read = mtime self._v_last_filelist = filelist return 1 return 0
def __init__(self, session, args=None): self.skin = MCS_SkinSelector.skin Screen.__init__(self, session) self.skinlist = [] self.previewPath = "" path.walk(self.root, self.find, "") self.skinlist.sort() self["SkinList"] = MenuList(self.skinlist) self["Preview"] = Pixmap() self["actions"] = NumberActionMap( ["WizardActions", "InputActions", "EPGSelectActions"], { "ok": self.ok, "back": self.close, "up": self.up, "down": self.down, "left": self.left, "right": self.right }, -1) self.onLayoutFinish.append(self.layoutFinished)
def __init__(self, session, args=None): Screen.__init__(self, session) self.clocklist = [] self.previewPath = "" path.walk(self.root, self.find, "") self.clocklist.sort() self["ClockList"] = MenuList(self.clocklist) self["Preview"] = Pixmap() self["lab1"] = Label(_("Select LCD clock:")) self["lab2"] = Label(_("Preview:")) self["lab3"] = Label( _("Select your LCD clock and press OK to activate the selected clock." )) self["actions"] = NumberActionMap( ["WizardActions", "InputActions", "EPGSelectActions"], { "ok": self.ok, "back": self.close, "up": self.up, "down": self.down, "left": self.left, "right": self.right, }, -1) self.onLayoutFinish.append(self.layoutFinished)
def __init__(self, session, args = None): Screen.__init__(self, session) self.skinlist = [] self.previewPath = "" path.walk(self.root, self.find, "") self["key_red"] = StaticText(_("Close")) self["introduction"] = StaticText(_("Press OK to activate the selected skin.")) self.skinlist.sort() self["SkinList"] = MenuList(self.skinlist) self["Preview"] = Pixmap() self["actions"] = NumberActionMap(["WizardActions", "InputActions", "EPGSelectActions"], { "ok": self.ok, "back": self.close, "red": self.close, "up": self.up, "down": self.down, "left": self.left, "right": self.right, "info": self.info, }, -1) self.onLayoutFinish.append(self.layoutFinished)
def write_zip_file(context, abs_path, rel_path): """ Write a file given by abs_path if it is a file If a directory, pass on to path.walk """ zip_file, work_dir, verbose = context def walk_zip_directory(zip_file, dirname, fnames, verbose=verbose, root=work_dir): """ Directory walker to recursively zip folders """ for fl in fnames: abs_path = path.join(dirname, fl) if path.isdir(abs_path): continue # extract path relative to working dir pfx = path.commonprefix([root, abs_path]) rel_path = abs_path[len(pfx):].lstrip("/") if verbose: print "Writing", rel_path zip_file.write(abs_path, rel_path) if path.isdir(abs_path): path.walk(abs_path, walk_zip_directory, zip_file) else: zip_file.write(abs_path, rel_path)
def find(directory, exts, exclude=False, blacklist=STD_BLACKLIST): """recursivly find files ending with the given extensions from the directory :type directory: str :param directory: directory where the search should start :type exts: basestring or list or tuple :param exts: extensions or lists or extensions to search :type exclude: boolean :param exts: if this argument is True, returning files NOT ending with the given extensions :type blacklist: list or tuple :param blacklist: optional list of files or directory to ignore, default to the value of `logilab.common.STD_BLACKLIST` :rtype: list :return: the list of all matching files """ if isinstance(exts, basestring): exts = (exts, ) if exclude: def match(filename, exts): for ext in exts: if filename.endswith(ext): return False return True else: def match(filename, exts): for ext in exts: if filename.endswith(ext): return True return False def func(files, directory, fnames): """walk handler""" # remove files/directories in the black list for norecurs in blacklist: try: fnames.remove(norecurs) except ValueError: continue for filename in fnames: src = join(directory, filename) if isdir(src): continue if match(filename, exts): files.append(src) files = [] walk(directory, func, files) return files
def configure(self, other=(), enable=(), disable=(), with_=(), without=()): self.builddir = P.join(self.workdir, 'build') def remove_danger(files, dirname, fnames): files.extend([P.join(dirname,f) for f in fnames if f == 'CMakeLists.txt']) files = [] P.walk(self.workdir, remove_danger, files) cmd = ['sed', '-ibak'] # strip out vars we must control from every CMakeLists.txt for var in self.BLACKLIST_VARS: cmd.append('-e') cmd.append('s/^[[:space:]]*[sS][eE][tT][[:space:]]*([[:space:]]*%s.*)/#BINARY BUILDER IGNORE /g' % var) cmd.extend(files) self.helper(*cmd) build_rules = P.join(self.env['BUILD_DIR'], 'my_rules.cmake') with file(build_rules, 'w') as f: print('SET (CMAKE_C_COMPILER "%s" CACHE FILEPATH "C compiler" FORCE)' % (findfile(self.env['CC'], self.env['PATH'])), file=f) print('SET (CMAKE_C_COMPILE_OBJECT "<CMAKE_C_COMPILER> <DEFINES> %s <FLAGS> -o <OBJECT> -c <SOURCE>" CACHE STRING "C compile command" FORCE)' % (self.env.get('CPPFLAGS', '')), file=f) print('SET (CMAKE_CXX_COMPILER "%s" CACHE FILEPATH "C++ compiler" FORCE)' % (findfile(self.env['CXX'], self.env['PATH'])), file=f) print('SET (CMAKE_Fortran_COMPILER "%s" CACHE FILEPATH "Fortran compiler" FORCE)' % (findfile(self.env['F77'], self.env['PATH'])), file=f) print('SET (CMAKE_CXX_COMPILE_OBJECT "<CMAKE_CXX_COMPILER> <DEFINES> %s <FLAGS> -o <OBJECT> -c <SOURCE>" CACHE STRING "C++ compile command" FORCE)' % (self.env.get('CPPFLAGS', '')), file=f) cmd = ['cmake'] args = [ '-DCMAKE_INSTALL_PREFIX=%(INSTALL_DIR)s' % self.env, '-DCMAKE_BUILD_TYPE=MyBuild', '-DCMAKE_USER_MAKE_RULES_OVERRIDE=%s' % build_rules, '-DCMAKE_SKIP_RPATH=YES', '-DCMAKE_INSTALL_DO_STRIP=OFF', ] if self.arch.os == 'osx': args.append('-DCMAKE_OSX_ARCHITECTURES=%s' % self.env['OSX_ARCH']) args.append('-DCMAKE_OSX_SYSROOT=%s' % self.env['OSX_SYSROOT']) args.append('-DCMAKE_OSX_DEPLOYMENT_TARGET=%s' % self.env['OSX_TARGET']) for arg in disable: args.append('-DENABLE_%s=OFF' % arg) for arg in enable: args.append('-DENABLE_%s=ON' % arg) for arg in without: args.append('-DWITH_%s=OFF' % arg) for arg in with_: args.append('-DWITH_%s=ON' % arg) args.extend([ '-DCMAKE_PREFIX_PATH=%(INSTALL_DIR)s;%(NOINSTALL_DIR)s' % self.env, '-DLIB_POSTFIX=', ]) [args.append(arg) for arg in other] os.mkdir(self.builddir) cmd = cmd + args + [self.workdir] self.helper(*cmd, cwd=self.builddir)
def export(from_dir, to_dir, blacklist=BASE_BLACKLIST, ignore_ext=IGNORED_EXTENSIONS, verbose=0): """make a mirror of `from_dir` in `to_dir`, omitting directories and files listed in the black list or ending with one of the given extensions :type from_dir: str :param from_dir: directory to export :type to_dir: str :param to_dir: destination directory :type blacklist: list or tuple :param blacklist: list of files or directories to ignore, default to the content of `BASE_BLACKLIST` :type ignore_ext: list or tuple :param ignore_ext: list of extensions to ignore, default to the content of `IGNORED_EXTENSIONS` :type verbose: bool :param verbose: flag indicating wether information about exported files should be printed to stderr, default to False """ def make_mirror(_, directory, fnames): """walk handler""" for norecurs in blacklist: try: fnames.remove(norecurs) except ValueError: continue for filename in fnames: # don't include binary files for ext in ignore_ext: if filename.endswith(ext): break else: src = join(directory, filename) dest = to_dir + src[len(from_dir):] if verbose: print >> sys.stderr, src, '->', dest if isdir(src): if not exists(dest): mkdir(dest) else: if exists(dest): remove(dest) shutil.copy2(src, dest) try: mkdir(to_dir) except OSError: pass walk(from_dir, make_mirror, None)
def handle(self, *args, **options): self.directory = options.get('dir', None) if not self.directory: print "Please specify full path to directory with the --dir argument" return #Run once only when the directory structure was changed. #self.runArchiveUpdate() walk(self.directory, self.processDir, None)
def check_local_path(local_path): if not path.exists(local_path): error = "不存在%s" % local_path print(error) raise Exception(error) if path.isdir(local_path): print("%s是一个目录, 其下有:" % local_path) path.walk(local_path, visit_dir, ())
def main(): print 'starting' data_root = sys.argv[1] catalog = dict() walk(data_root, catalog_files, (catalog, ALL_EXTENSIONS)) output_missing_files_report(catalog) output_incomplete_metadata_report(catalog) output_complete_metadata_report(catalog)
def find(directory, exts, exclude=False, blacklist=STD_BLACKLIST): """recursivly find files ending with the given extensions from the directory :type directory: str :param directory: directory where the search should start :type exts: basestring or list or tuple :param exts: extensions or lists or extensions to search :type exclude: boolean :param exts: if this argument is True, returning files NOT ending with the given extensions :type blacklist: list or tuple :param blacklist: optional list of files or directory to ignore, default to the value of `logilab.common.STD_BLACKLIST` :rtype: list :return: the list of all matching files """ if isinstance(exts, basestring): exts = (exts,) if exclude: def match(filename, exts): for ext in exts: if filename.endswith(ext): return False return True else: def match(filename, exts): for ext in exts: if filename.endswith(ext): return True return False def func(files, directory, fnames): """walk handler""" # remove files/directories in the black list for norecurs in blacklist: try: fnames.remove(norecurs) except ValueError: continue for filename in fnames: src = join(directory, filename) if isdir(src): continue if match(filename, exts): files.append(src) files = [] walk(directory, func, files) return files
def chmodwwdir(prefix): ''' ww is world writable ''' def visit (z, dirname, names): for name in names: path = os.path.normpath(os.path.join(dirname, name)) if isfile(path): os.chmod(path, 777) walk(prefix, visit, None)
def all_matching_files(d, pattern): def addfiles(fls, dir, nfiles): nfiles = fnmatch.filter(nfiles, pattern) nfiles = [path.join(dir, f) for f in nfiles] fls.extend(nfiles) files = [] path.walk(d, addfiles, files) return files
def __init__(self, session, screenTitle=_("Clock Skin")): SkinSelector.__init__(self, session, screenTitle=screenTitle) self.skinName = ["ClockSkinSelector", "SkinSelector"] self.rootDir = resolveFilename(SCOPE_LCDSKIN, "lcd_skin/") self.config = config.skin.clock_skin self.current = currentClockSkin self.xmlList = [] from os.path import walk walk(self.rootDir, self.find, "")
def count_lines_in_directory_tree(self, directory, extension): self.directory = directory self.clear() walk(directory, self.process_dir, extension) n_dirs = self.n_dirs n_files = self.n_files n_lines = self.n_lines self.clear() return n_dirs, n_files, n_lines
def SearchPath(self): #清空文件列表 del self.fileList[:] #列出所选目录中的全部文件 path = self.filePath if self.isRasterFile: walk(path, self.visitFiles, 0) else: ReadPathFromText(path)
def getSongList(self, artist, album): self._songList = [] albumPath = '/'.join([self.mediaPath, artist, album]) path.walk(path.abspath(albumPath), self.fillSongList, 0) if len(self._songList) > 0: return sorted(self._songList, key=itemgetter('path')) else: return []