def folder_walker(src_folder, dest_folder, target_height, target_width): src_fs = open_fs(src_folder) walker = Walker(filter=['*.jpg']) for path in walker.files(src_fs): image_path = os.path.normpath(src_folder + os.sep + path) process_image(image_path, dest_folder, target_height, target_width) return None
def run(self): if self.check_acrons(): for acron in self.acrons: logging.info("Process acronym: %s" % acron) walker = Walker(filter=["*.xml"], exclude=["*.*.xml"]) acron_folder = path.join(self.xml_fs.root_path, acron) for xml in walker.files(fs.open_fs(acron_folder)): if len(path.iteratepath(xml)) == 2: logging.info("Process XML: %s" % xml) issue_folder, pack_name = self.collect_xml(acron, xml) self.collect_pdf(acron, issue_folder, pack_name) self.collect_img(acron, issue_folder, pack_name) else: return False
def collect_pdf(self, acron, issue_folder, pack_name): walker = Walker(filter=["*" + pack_name + "*.pdf"], max_depth=2) pdf_path = path.join(self.pdf_fs.root_path, acron, issue_folder) for pdf in walker.files(fs.open_fs(pdf_path)): pdf_path = path.join(acron, issue_folder, path.basename(pdf)) target_pdf_path = path.join(acron, issue_folder, pack_name, self.rename_pdf_trans_filename(pdf)) self.copy(pdf_path, target_pdf_path, src_fs=self.pdf_fs)
def article_ALL_constructor(source_path: str, dest_path: str, in_place: bool = False) -> None: logger.info("Iniciando Construção dos XMLs") walker = Walker(filter=["*.xml"], exclude=["*.*.xml"]) list_files_xmls = walker.files(fs.open_fs(source_path)) for file_xml in tqdm(list_files_xmls): file_xml = source_path + file_xml try: article_xml_constructor(file_xml, dest_path, in_place) except Exception as ex: logger.info("não foi possível gerar o XML do Arquivo %s: %s", file_xml, ex)
def collect_img(self, acron, issue_folder, pack_name): walker = Walker(filter=["*" + pack_name + "*"], max_depth=2, exclude_dirs=["html"]) img_path = path.join(self.img_fs.root_path, acron, issue_folder) for img in walker.files(fs.open_fs(img_path)): img_path = path.join(acron, issue_folder, path.basename(img)) target_img_path = path.join(acron, issue_folder, pack_name, path.basename(img)) self.copy(img_path, target_img_path, src_fs=self.img_fs)
def read_data_from_files(file_start): from fs import open_fs from fs.walk import Walker home_fs = open_fs('./') walker = Walker(filter=[file_start+'*.txt']) data = [] for path in walker.files(home_fs): with open('.'+path) as f: A=np.loadtxt((x.replace('[',' ').replace(']',' ').replace(',',' ') for x in f)) # print(A) data.append(A) # else: data_list.append(np.loadtxt(dir_name+"/"+name, skiprows=9)) # pbar.update() # pbar.close() return np.array(data)
def index(self, conf): fs = open_fs(conf['url']) walker = Walker() for path in walker.files(fs): yield Pyfsfile(fs, path)