def execute(self, *args): # essential parameters check valid = EssentialParameters( self.__class__.__name__, [ self._src_dir, self._src_pattern, self._dest_dir, self._dest_pattern ], ) valid() # get a target file target_files = super().get_target_files(self._src_dir, self._src_pattern) if len(target_files) == 0: raise InvalidCount("An input file %s does not exist." % os.path.join(self._src_dir, self._src_pattern)) elif len(target_files) > 1: self._logger.error("Hit target files %s" % target_files) raise InvalidCount("Input files must be only one.") self._logger.info("A target file to be converted: %s" % os.path.join(target_files[0])) # convert _, dest_ext = os.path.splitext(self._dest_pattern) if dest_ext != ".csv": raise InvalidFormat( "%s is not supported format in %s. The supported format is .csv" % (dest_ext, self._dest_pattern)) df = pandas.read_excel(target_files[0], encoding=self._encoding) dest_path = os.path.join(self._dest_dir, self._dest_pattern) self._logger.info("Convert %s to %s" % (target_files[0], dest_path)) df.to_csv(dest_path, encoding=self._encoding)
def execute(self, *args): # essential parameters check valid = EssentialParameters( self.__class__.__name__, [ self._src_dir, self._src1_pattern, self._src2_pattern, self._dest_dir, self._dest_pattern, ], ) valid() if self._dest_pattern: self._logger.warning( "'dest_pattern' will be unavailable in the near future." + "'dest_pattern' will change to 'dest_name'." ) target1_files = File().get_target_files(self._src_dir, self._src1_pattern) target2_files = File().get_target_files(self._src_dir, self._src2_pattern) if len(target1_files) == 0: raise InvalidCount( "An input file %s does not exist." % os.path.join(self._src_dir, self._src1_pattern) ) elif len(target2_files) == 0: raise InvalidCount( "An input file %s does not exist." % os.path.join(self._src_dir, self._src2_pattern) ) elif len(target1_files) > 1: self._logger.error("Hit target files %s" % target1_files) raise InvalidCount("Input files must be only one.") elif len(target2_files) > 1: self._logger.error("Hit target files %s" % target2_files) raise InvalidCount("Input files must be only one.") self._logger.info("Merge %s and %s." % (target1_files[0], target2_files[0])) df1 = pandas.read_csv( os.path.join(self._src_dir, target1_files[0]), dtype=str, encoding=self._encoding, ) df2 = pandas.read_csv( os.path.join(self._src_dir, target2_files[0]), dtype=str, encoding=self._encoding, ) df = pandas.merge(df1, df2) if "Unnamed: 0" in df.index: del df["Unnamed: 0"] df.to_csv( os.path.join(self._dest_dir, self._dest_pattern), encoding=self._encoding, index=False, )
def execute(self, *args): for k, v in self.__dict__.items(): self._logger.debug("%s : %s" % (k, v)) # essential parameters check valid = EssentialParameters( self.__class__.__name__, [ self._src_dir, self.__src1_pattern, self.__src2_pattern, self._dest_dir, self._dest_pattern, ], ) valid() target1_files = File().get_target_files(self._src_dir, self.__src1_pattern) target2_files = File().get_target_files(self._src_dir, self.__src2_pattern) if len(target1_files) == 0: raise InvalidCount( "An input file %s does not exist." % os.path.join(self._src_dir, self.__src1_pattern)) elif len(target2_files) == 0: raise InvalidCount( "An input file %s does not exist." % os.path.join(self._src_dir, self.__src2_pattern)) elif len(target1_files) > 1: self._logger.error("Hit target files %s" % target1_files) raise InvalidCount("Input files must be only one.") elif len(target2_files) > 1: self._logger.error("Hit target files %s" % target2_files) raise InvalidCount("Input files must be only one.") self._logger.info("Merge %s and %s." % (target1_files[0], target2_files[0])) df1 = pandas.read_csv( os.path.join(self._src_dir, target1_files[0]), dtype=str, encoding=self._encoding, ) df2 = pandas.read_csv( os.path.join(self._src_dir, target2_files[0]), dtype=str, encoding=self._encoding, ) df = pandas.merge(df1, df2) if "Unnamed: 0" in df.index: del df["Unnamed: 0"] df.to_csv( os.path.join(self._dest_dir, self._dest_pattern), encoding=self._encoding, index=False, )
def execute(self, *args): # essential parameters check valid = EssentialParameters( self.__class__.__name__, [ self._src_dir, self._src_pattern, self._dest_dir, self._dest_pattern, self._headers, ], ) valid() if self._dest_pattern: self._logger.warning( "'dest_pattern' will be unavailable in the near future." + "Basically every classes which extends FileBaseTransform will be allowed" + " plural input files, and output files will be the same name with input" + " file names.\n" "At that time, if 'dest_dir' is given, transformed files will be created in the given directory.\n" # noqa + "If not, original files will be updated by transformed files." ) target_files = super().get_target_files(self._src_dir, self._src_pattern) if len(target_files) == 0: raise InvalidCount( "An input file %s does not exist." % os.path.join(self._src_dir, self._src_pattern) ) elif len(target_files) > 1: self._logger.error("Hit target files %s" % target_files) raise InvalidCount("Input files must be only one.") self._logger.info("A target file to be converted: %s") dest_path = os.path.join(self._dest_dir, self._dest_pattern) self._logger.info( "Convert header of %s. An output file is %s." % (target_files[0], dest_path) ) with open(target_files[0], "r", encoding=self._encoding) as s, open( dest_path, "w", encoding=self._encoding ) as d: reader = csv.reader(s) writer = csv.writer(d, quoting=csv.QUOTE_ALL) headers = next(reader, None) new_headers = self.__replace_headers(headers) writer.writerow(new_headers) for r in reader: writer.writerow(r) d.flush()
def execute(self, *args): # essential parameters check valid = EssentialParameters( self.__class__.__name__, [ self._src_dir, self._src_pattern, self._dest_dir, self._dest_pattern ], ) valid() if self._dest_pattern: self._logger.warning( "'dest_pattern' will be unavailable in the near future." + "Basically every classes which extends FileBaseTransform will be allowed" + " plural input files, and output files will be the same name with input" + " file names.\n" "At that time, if 'dest_dir' is given, transformed files will be created in the given directory.\n" # noqa + "If not, original files will be updated by transformed files.") # get a target file target_files = super().get_target_files(self._src_dir, self._src_pattern) if len(target_files) == 0: raise InvalidCount("An input file %s does not exist." % os.path.join(self._src_dir, self._src_pattern)) elif len(target_files) > 1: self._logger.error("Hit target files %s" % target_files) raise InvalidCount("Input files must be only one.") self._logger.info("A target file to be converted: %s" % os.path.join(target_files[0])) # convert _, dest_ext = os.path.splitext(self._dest_pattern) if dest_ext != ".csv": raise InvalidFormat( "%s is not supported format in %s. The supported format is .csv" % (dest_ext, self._dest_pattern)) df = pandas.read_excel(target_files[0], encoding=self._encoding) dest_path = os.path.join(self._dest_dir, self._dest_pattern) self._logger.info("Convert %s to %s" % (target_files[0], dest_path)) df.to_csv(dest_path, encoding=self._encoding)
def execute(self, *args): # essential parameters check valid = EssentialParameters( self.__class__.__name__, [ self._src_dir, self._src_pattern, self._dest_dir, self._dest_pattern, self._headers, ], ) valid() target_files = super().get_target_files(self._src_dir, self._src_pattern) if len(target_files) == 0: raise InvalidCount( "An input file %s does not exist." % os.path.join(self._src_dir, self._src_pattern) ) elif len(target_files) > 1: self._logger.error("Hit target files %s" % target_files) raise InvalidCount("Input files must be only one.") self._logger.info("A target file to be converted: %s") dest_path = os.path.join(self._dest_dir, self._dest_pattern) self._logger.info( "Convert header of %s. An output file is %s." % (target_files[0], dest_path) ) with open(target_files[0], "r", encoding=self._encoding) as s, open( dest_path, "w", encoding=self._encoding ) as d: reader = csv.reader(s) writer = csv.writer(d, quoting=csv.QUOTE_ALL) headers = next(reader, None) new_headers = self.__replace_headers(headers) writer.writerow(new_headers) for r in reader: writer.writerow(r) d.flush()
def execute(self, *args): # essential parameters check valid = EssentialParameters( self.__class__.__name__, [self._src_dir, self._dest_dir, self._dest_pattern], ) valid() if not self._src_pattern and not self._src_filenames: raise InvalidParameter( "Specifying either 'src_pattern' or 'src_filenames' is essential." ) if self._src_pattern and self._src_filenames: raise InvalidParameter( "Cannot specify both 'src_pattern' and 'src_filenames'.") if self._src_pattern: files = File().get_target_files(self._src_dir, self._src_pattern) else: files = [] for file in self._src_filenames: files.append(os.path.join(self._src_dir, file)) if len(files) < 2: raise InvalidCount("Two or more input files are required.") file = files.pop(0) df1 = pandas.read_csv( file, dtype=str, encoding=self._encoding, ) for file in files: df2 = pandas.read_csv( file, dtype=str, encoding=self._encoding, ) df1 = pandas.concat([df1, df2]) df1.to_csv( os.path.join(self._dest_dir, self._dest_pattern), encoding=self._encoding, index=False, )
def __call__(self): for scenario_yaml_dict in self.__scenario_yaml_list: multi_proc_cnt = scenario_yaml_dict.get("multi_process_count") parallel_steps = scenario_yaml_dict.get("parallel") if multi_proc_cnt: if multi_proc_cnt < StepQueue.DEFAULT_PARALLEL_CNT: raise InvalidCount( "Must specify more than %s as multi process count." % StepQueue.DEFAULT_PARALLEL_CNT) continue elif parallel_steps: for s in parallel_steps: self.__exists_step(s) self.__exists_class(s) else: self.__exists_step(scenario_yaml_dict) self.__exists_class(scenario_yaml_dict)
def execute(self, *args): valid = EssentialParameters( self.__class__.__name__, [self._src_dir, self._src_pattern, self._dest_pattern], ) valid() if self._dest_pattern: self._logger.warning( "'dest_pattern' will be unavailable in the near future." + "Basically every classes which extends FileBaseTransform will be allowed" + " plural input files, and output files will be the same name with input" + " file names.\n" "At that time, if 'dest_dir' is given, transformed files will be created in the given directory.\n" # noqa + "If not, original files will be updated by transformed files.") files = super().get_target_files(self._src_dir, self._src_pattern) self._logger.info("Files found %s" % files) if len(files) == 0: raise InvalidCount("No files are found.") dir = self._dest_dir if self._dest_dir is not None else self._src_dir dest_path = os.path.join(dir, (self._dest_pattern + ".%s" % self._format)) if self._format == "tar": with tarfile.open(dest_path, "w") as tar: for file in files: arcname = (os.path.join(self._dest_pattern, os.path.basename(file)) if self._create_dir else os.path.basename(file)) tar.add(file, arcname=arcname) elif self._format == "zip": with zipfile.ZipFile(dest_path, "w") as zp: for file in files: arcname = (os.path.join(self._dest_pattern, os.path.basename(file)) if self._create_dir else os.path.basename(file)) zp.write(file, arcname=arcname) else: raise InvalidParameter( "'format' must set one of the followings [tar, zip]")