def makePdbxPathList(fp, cachePath=".", skipFile=None): """Return the list of pdbx file paths in the current repository.""" try: skipD = {} if skipFile and os.access(skipFile, "r"): with open(skipFile, "r", encoding="utf-8") as ifh: for line in ifh: idcode = str(line[:-1]).strip().lower() + ".cif" skipD[idcode] = idcode logger.info("Skip list length %d", len(skipD)) # with open(fp, "w", encoding="utf-8") as ofh: for root, _, files in scandir.walk(cachePath, topdown=False): if "REMOVE" in root: continue for name in files: if name.endswith(".cif") and len( name) == 8 and name not in skipD: ofh.write("%s\n" % os.path.join(root, name)) # # logger.info("\nFound %d files in %s\n" % (len(pathList), cachePath)) return True except Exception as e: logger.exception("Failing with %s", str(e)) return False
def enumerate_tasks(self): # beware of infinite loop when followlinks is true. for dirpath,dirnames,filenames in walk(self.directory,followlinks=False): for filename in filenames: filepath = join(dirpath,filename) root, ext = splitext(filepath) # better on the remaining time estimator + memory if ext in ('.mp3','.aac'): yield filepath
def enumerate_tasks(self): # beware of infinite loop when followlinks is true. for dirpath, dirnames, filenames in walk(self.directory, followlinks=False): for filename in filenames: filepath = join(dirpath, filename) root, ext = splitext(filepath) # better on the remaining time estimator + memory if ext in ('.mp3', '.aac'): yield filepath
def list_files(dir, name, level=0, indentation=0,followlinks=True): str_ind = ' '*indentation print str_ind+'list_files in "%s" with name "%s"' % (dir, name) r = [] walk = scandir.walk(dir, followlinks=followlinks) print for root, dirs, files in walk: if '/root' in root: continue if '/eps' in root: continue if (len(files) > 0): for file in files: if file.endswith('png'): print 'continue in',root break if name in file: r.append(root + "/" + file) print str_ind+'found %d files' % len(r) return r
def _process_work_queue(self): """ Fill in docstring """ self.log.log(9, "_process_work_queue invoked") start_time = time.time() temp_work = [] try: work_item = self.work_queue.popleft() except: # No work items end_time = time.time() self._set_state('idle') return False work_type = work_item.get('type', None) self.log.log(9, "Work type: %s" % work_type) self._set_state('processing') if work_type == 'dir': work_dir = work_item.get('path') self.log.log(9, "Processing directory: %s" % work_dir) handled = self.handle_directory_pre(work_dir) if handled: self.stats['filtered_dirs'] += 1 else: temp_work = [] for root, dirs, files in scandir.walk(work_dir): # Filter subdirectories and files by calling the method in the derived class before_filter_dirs = len(dirs) before_filter_files = len(files) dirs[:], files[:] = self.filter_subdirectories( work_dir, dirs, files) after_filter_dirs = len(dirs) after_filter_files = len(files) if before_filter_dirs != after_filter_dirs: self.stats[ 'filtered_dirs'] += after_filter_dirs - before_filter_dirs if before_filter_files != after_filter_files: self.stats[ 'filtered_files'] += after_filter_files - before_filter_files # We queue up any new directories to the left in our work queue. # This leaves the directories closer to the initial ones on the right # side of the work queue. for dir in reversed(dirs): self.work_queue.appendleft({ 'type': 'dir', 'path': os.path.join(root, dir) }) self.stats['queued_dirs'] += len(dirs) for file in files: # Keep track of how long we have been processing this # directory. If the time exceeds LONG_PROCESSING_THRESHOLD # we will queue up the files and push them onto the # processing queue and let the main processing loop have a # chance to pick up new commands proc_time = time.time() if (proc_time - start_time ) > HydraUtils.LONG_PROCESSING_THRESHOLD: temp_work.append(file) continue if (self.handle_file(work_dir, file)): self.stats['processed_files'] += 1 else: self.stats['skipped_files'] += 1 # We actually want to abort the tree walk as we want to handle the directory structure 1 directory at a time dirs[:] = [] if temp_work: self.work_queue.appendleft({ 'type': 'partial_dir', 'path': work_dir, 'files': temp_work }) else: self.stats['queued_dirs'] -= 1 self.stats['processed_dirs'] += 1 handled = self.handle_directory_post(work_dir) elif work_type == 'partial_dir': # For a partial directory, we need to continue processing all the files # remaining in the directory. work_dir = work_item.get('path') self.log.log(logging.DEBUG, "Processing directory (continued): %s" % work_dir) for file in work_item.get('files'): # Keep track of how long we have been processing this directory. # If the time exceeds LONG_PROCESSING_THRESHOLD, we will queue up # the files and push them onto the processing queue and let the # main processing loop have a chance to pick up new commands proc_time = time.time() if (proc_time - start_time) > HydraUtils.LONG_PROCESSING_THRESHOLD: temp_work.append(file) continue if (self.handle_file(work_dir, file)): self.stats['processed_files'] += 1 # If temp_work is empty, we finished the remainder of the directory # so we will do the post directory processing # If not then we will re-queue the work and continue processing after # checking the command queue if temp_work: self.work_queue.appendleft({ 'type': 'partial_dir', 'path': work_dir, 'files': temp_work }) else: self.stats['queued_dirs'] -= 1 self.stats['processed_dirs'] += 1 handled = self.handle_directory_post(work_dir) #elif work_type == 'file': # # TODO: NOT YET IMPLEMENTED # self.handle_file(work_dir, file) # self.stats['processed_files'] += 1 else: self.log.error( "Unknown work type found in work queue. Queued work item: %r" % work_item) end_time = time.time() return True