def execute(self, *args): # essential parameters check valid = EssentialParameters( self.__class__.__name__, [ self._src_dir, self._src_pattern, self._dest_dir, self._dest_pattern ], ) valid() # get a target file target_files = super().get_target_files(self._src_dir, self._src_pattern) if len(target_files) == 0: raise InvalidCount("An input file %s does not exist." % os.path.join(self._src_dir, self._src_pattern)) elif len(target_files) > 1: self._logger.error("Hit target files %s" % target_files) raise InvalidCount("Input files must be only one.") self._logger.info("A target file to be converted: %s" % os.path.join(target_files[0])) # convert _, dest_ext = os.path.splitext(self._dest_pattern) if dest_ext != ".csv": raise InvalidFormat( "%s is not supported format in %s. The supported format is .csv" % (dest_ext, self._dest_pattern)) df = pandas.read_excel(target_files[0], encoding=self._encoding) dest_path = os.path.join(self._dest_dir, self._dest_pattern) self._logger.info("Convert %s to %s" % (target_files[0], dest_path)) df.to_csv(dest_path, encoding=self._encoding)
def execute(self, *args): # essential parameters check valid = EssentialParameters( self.__class__.__name__, [ self._src_dir, self._src_pattern, self._dest_dir, self._dest_pattern ], ) valid() if self._dest_pattern: self._logger.warning( "'dest_pattern' will be unavailable in the near future." + "Basically every classes which extends FileBaseTransform will be allowed" + " plural input files, and output files will be the same name with input" + " file names.\n" "At that time, if 'dest_dir' is given, transformed files will be created in the given directory.\n" # noqa + "If not, original files will be updated by transformed files.") # get a target file target_files = super().get_target_files(self._src_dir, self._src_pattern) if len(target_files) == 0: raise InvalidCount("An input file %s does not exist." % os.path.join(self._src_dir, self._src_pattern)) elif len(target_files) > 1: self._logger.error("Hit target files %s" % target_files) raise InvalidCount("Input files must be only one.") self._logger.info("A target file to be converted: %s" % os.path.join(target_files[0])) # convert _, dest_ext = os.path.splitext(self._dest_pattern) if dest_ext != ".csv": raise InvalidFormat( "%s is not supported format in %s. The supported format is .csv" % (dest_ext, self._dest_pattern)) df = pandas.read_excel(target_files[0], encoding=self._encoding) dest_path = os.path.join(self._dest_dir, self._dest_pattern) self._logger.info("Convert %s to %s" % (target_files[0], dest_path)) df.to_csv(dest_path, encoding=self._encoding)
def __format_insert_data(self, insert_rows): """ Format insert data to pass DataFrame as the below. insert_data = { "column1": [1, 2], "column2": ["spam", "spam"], ... } Args insert_rows: dictionary list of input cache """ insert_data = {} for c in self.__columns: v_list = [d.get(c) for d in insert_rows] if not v_list: raise InvalidFormat( "Specified column %s does not exist in an input file." % c) insert_data[c] = v_list return insert_data
def __create_insert_data(self, cache_list): """ Create insert data like the below. insert_data = { "column1": [1, 2], "column2": ["spam", "spam"], ... } Args cache_list: dictionary list of input cache """ insert_data = {} columns = [ name_and_type["name"] for name_and_type in self._table_schema ] for c in columns: v_list = [d.get(c) for d in cache_list] if not v_list: raise InvalidFormat( "Specified column %s does not exist in an input file." % c) insert_data[c] = v_list return insert_data