Exemple #1
0
    def execute(self, *args):
        # essential parameters check
        valid = EssentialParameters(
            self.__class__.__name__,
            [
                self._src_dir, self._src_pattern, self._dest_dir,
                self._dest_pattern
            ],
        )
        valid()

        # get a target file
        target_files = super().get_target_files(self._src_dir,
                                                self._src_pattern)
        if len(target_files) == 0:
            raise InvalidCount("An input file %s does not exist." %
                               os.path.join(self._src_dir, self._src_pattern))
        elif len(target_files) > 1:
            self._logger.error("Hit target files %s" % target_files)
            raise InvalidCount("Input files must be only one.")
        self._logger.info("A target file to be converted: %s" %
                          os.path.join(target_files[0]))

        # convert
        _, dest_ext = os.path.splitext(self._dest_pattern)
        if dest_ext != ".csv":
            raise InvalidFormat(
                "%s is not supported format in %s. The supported format is .csv"
                % (dest_ext, self._dest_pattern))

        df = pandas.read_excel(target_files[0], encoding=self._encoding)
        dest_path = os.path.join(self._dest_dir, self._dest_pattern)
        self._logger.info("Convert %s to %s" % (target_files[0], dest_path))
        df.to_csv(dest_path, encoding=self._encoding)
Exemple #2
0
    def execute(self, *args):
        # essential parameters check
        valid = EssentialParameters(
            self.__class__.__name__,
            [
                self._src_dir,
                self._src1_pattern,
                self._src2_pattern,
                self._dest_dir,
                self._dest_pattern,
            ],
        )
        valid()

        if self._dest_pattern:
            self._logger.warning(
                "'dest_pattern' will be unavailable in the near future."
                + "'dest_pattern' will change to 'dest_name'."
            )

        target1_files = File().get_target_files(self._src_dir, self._src1_pattern)
        target2_files = File().get_target_files(self._src_dir, self._src2_pattern)
        if len(target1_files) == 0:
            raise InvalidCount(
                "An input file %s does not exist."
                % os.path.join(self._src_dir, self._src1_pattern)
            )
        elif len(target2_files) == 0:
            raise InvalidCount(
                "An input file %s does not exist."
                % os.path.join(self._src_dir, self._src2_pattern)
            )
        elif len(target1_files) > 1:
            self._logger.error("Hit target files %s" % target1_files)
            raise InvalidCount("Input files must be only one.")
        elif len(target2_files) > 1:
            self._logger.error("Hit target files %s" % target2_files)
            raise InvalidCount("Input files must be only one.")

        self._logger.info("Merge %s and %s." % (target1_files[0], target2_files[0]))
        df1 = pandas.read_csv(
            os.path.join(self._src_dir, target1_files[0]),
            dtype=str,
            encoding=self._encoding,
        )
        df2 = pandas.read_csv(
            os.path.join(self._src_dir, target2_files[0]),
            dtype=str,
            encoding=self._encoding,
        )
        df = pandas.merge(df1, df2)
        if "Unnamed: 0" in df.index:
            del df["Unnamed: 0"]
        df.to_csv(
            os.path.join(self._dest_dir, self._dest_pattern),
            encoding=self._encoding,
            index=False,
        )
Exemple #3
0
    def execute(self, *args):
        for k, v in self.__dict__.items():
            self._logger.debug("%s : %s" % (k, v))

        # essential parameters check
        valid = EssentialParameters(
            self.__class__.__name__,
            [
                self._src_dir,
                self.__src1_pattern,
                self.__src2_pattern,
                self._dest_dir,
                self._dest_pattern,
            ],
        )
        valid()

        target1_files = File().get_target_files(self._src_dir,
                                                self.__src1_pattern)
        target2_files = File().get_target_files(self._src_dir,
                                                self.__src2_pattern)
        if len(target1_files) == 0:
            raise InvalidCount(
                "An input file %s does not exist." %
                os.path.join(self._src_dir, self.__src1_pattern))
        elif len(target2_files) == 0:
            raise InvalidCount(
                "An input file %s does not exist." %
                os.path.join(self._src_dir, self.__src2_pattern))
        elif len(target1_files) > 1:
            self._logger.error("Hit target files %s" % target1_files)
            raise InvalidCount("Input files must be only one.")
        elif len(target2_files) > 1:
            self._logger.error("Hit target files %s" % target2_files)
            raise InvalidCount("Input files must be only one.")

        self._logger.info("Merge %s and %s." %
                          (target1_files[0], target2_files[0]))
        df1 = pandas.read_csv(
            os.path.join(self._src_dir, target1_files[0]),
            dtype=str,
            encoding=self._encoding,
        )
        df2 = pandas.read_csv(
            os.path.join(self._src_dir, target2_files[0]),
            dtype=str,
            encoding=self._encoding,
        )
        df = pandas.merge(df1, df2)
        if "Unnamed: 0" in df.index:
            del df["Unnamed: 0"]
        df.to_csv(
            os.path.join(self._dest_dir, self._dest_pattern),
            encoding=self._encoding,
            index=False,
        )
Exemple #4
0
    def execute(self, *args):
        # essential parameters check
        valid = EssentialParameters(
            self.__class__.__name__,
            [
                self._src_dir,
                self._src_pattern,
                self._dest_dir,
                self._dest_pattern,
                self._headers,
            ],
        )
        valid()

        if self._dest_pattern:
            self._logger.warning(
                "'dest_pattern' will be unavailable in the near future."
                + "Basically every classes which extends FileBaseTransform will be allowed"
                + " plural input files, and output files will be the same name with input"
                + " file names.\n"
                "At that time, if 'dest_dir' is given, transformed files will be created in the given directory.\n" # noqa
                + "If not, original files will be updated by transformed files."
            )

        target_files = super().get_target_files(self._src_dir, self._src_pattern)
        if len(target_files) == 0:
            raise InvalidCount(
                "An input file %s does not exist."
                % os.path.join(self._src_dir, self._src_pattern)
            )
        elif len(target_files) > 1:
            self._logger.error("Hit target files %s" % target_files)
            raise InvalidCount("Input files must be only one.")
        self._logger.info("A target file to be converted: %s")

        dest_path = os.path.join(self._dest_dir, self._dest_pattern)
        self._logger.info(
            "Convert header of %s. An output file is %s." % (target_files[0], dest_path)
        )
        with open(target_files[0], "r", encoding=self._encoding) as s, open(
            dest_path, "w", encoding=self._encoding
        ) as d:
            reader = csv.reader(s)
            writer = csv.writer(d, quoting=csv.QUOTE_ALL)
            headers = next(reader, None)
            new_headers = self.__replace_headers(headers)
            writer.writerow(new_headers)
            for r in reader:
                writer.writerow(r)
            d.flush()
Exemple #5
0
    def execute(self, *args):
        # essential parameters check
        valid = EssentialParameters(
            self.__class__.__name__,
            [
                self._src_dir, self._src_pattern, self._dest_dir,
                self._dest_pattern
            ],
        )
        valid()

        if self._dest_pattern:
            self._logger.warning(
                "'dest_pattern' will be unavailable in the near future." +
                "Basically every classes which extends FileBaseTransform will be allowed"
                +
                " plural input files, and output files will be the same name with input"
                + " file names.\n"
                "At that time, if 'dest_dir' is given, transformed files will be created in the given directory.\n"  # noqa
                +
                "If not, original files will be updated by transformed files.")

        # get a target file
        target_files = super().get_target_files(self._src_dir,
                                                self._src_pattern)
        if len(target_files) == 0:
            raise InvalidCount("An input file %s does not exist." %
                               os.path.join(self._src_dir, self._src_pattern))
        elif len(target_files) > 1:
            self._logger.error("Hit target files %s" % target_files)
            raise InvalidCount("Input files must be only one.")
        self._logger.info("A target file to be converted: %s" %
                          os.path.join(target_files[0]))

        # convert
        _, dest_ext = os.path.splitext(self._dest_pattern)
        if dest_ext != ".csv":
            raise InvalidFormat(
                "%s is not supported format in %s. The supported format is .csv"
                % (dest_ext, self._dest_pattern))

        df = pandas.read_excel(target_files[0], encoding=self._encoding)
        dest_path = os.path.join(self._dest_dir, self._dest_pattern)
        self._logger.info("Convert %s to %s" % (target_files[0], dest_path))
        df.to_csv(dest_path, encoding=self._encoding)
Exemple #6
0
    def execute(self, *args):
        # essential parameters check
        valid = EssentialParameters(
            self.__class__.__name__,
            [
                self._src_dir,
                self._src_pattern,
                self._dest_dir,
                self._dest_pattern,
                self._headers,
            ],
        )
        valid()

        target_files = super().get_target_files(self._src_dir, self._src_pattern)
        if len(target_files) == 0:
            raise InvalidCount(
                "An input file %s does not exist."
                % os.path.join(self._src_dir, self._src_pattern)
            )
        elif len(target_files) > 1:
            self._logger.error("Hit target files %s" % target_files)
            raise InvalidCount("Input files must be only one.")
        self._logger.info("A target file to be converted: %s")

        dest_path = os.path.join(self._dest_dir, self._dest_pattern)
        self._logger.info(
            "Convert header of %s. An output file is %s." % (target_files[0], dest_path)
        )
        with open(target_files[0], "r", encoding=self._encoding) as s, open(
            dest_path, "w", encoding=self._encoding
        ) as d:
            reader = csv.reader(s)
            writer = csv.writer(d, quoting=csv.QUOTE_ALL)
            headers = next(reader, None)
            new_headers = self.__replace_headers(headers)
            writer.writerow(new_headers)
            for r in reader:
                writer.writerow(r)
            d.flush()
Exemple #7
0
    def execute(self, *args):
        # essential parameters check
        valid = EssentialParameters(
            self.__class__.__name__,
            [self._src_dir, self._dest_dir, self._dest_pattern],
        )
        valid()

        if not self._src_pattern and not self._src_filenames:
            raise InvalidParameter(
                "Specifying either 'src_pattern' or 'src_filenames' is essential."
            )
        if self._src_pattern and self._src_filenames:
            raise InvalidParameter(
                "Cannot specify both 'src_pattern' and 'src_filenames'.")

        if self._src_pattern:
            files = File().get_target_files(self._src_dir, self._src_pattern)
        else:
            files = []
            for file in self._src_filenames:
                files.append(os.path.join(self._src_dir, file))

        if len(files) < 2:
            raise InvalidCount("Two or more input files are required.")

        file = files.pop(0)
        df1 = pandas.read_csv(
            file,
            dtype=str,
            encoding=self._encoding,
        )

        for file in files:
            df2 = pandas.read_csv(
                file,
                dtype=str,
                encoding=self._encoding,
            )
            df1 = pandas.concat([df1, df2])

        df1.to_csv(
            os.path.join(self._dest_dir, self._dest_pattern),
            encoding=self._encoding,
            index=False,
        )
Exemple #8
0
 def __call__(self):
     for scenario_yaml_dict in self.__scenario_yaml_list:
         multi_proc_cnt = scenario_yaml_dict.get("multi_process_count")
         parallel_steps = scenario_yaml_dict.get("parallel")
         if multi_proc_cnt:
             if multi_proc_cnt < StepQueue.DEFAULT_PARALLEL_CNT:
                 raise InvalidCount(
                     "Must specify more than %s as multi process count." %
                     StepQueue.DEFAULT_PARALLEL_CNT)
             continue
         elif parallel_steps:
             for s in parallel_steps:
                 self.__exists_step(s)
                 self.__exists_class(s)
         else:
             self.__exists_step(scenario_yaml_dict)
             self.__exists_class(scenario_yaml_dict)
Exemple #9
0
    def execute(self, *args):
        valid = EssentialParameters(
            self.__class__.__name__,
            [self._src_dir, self._src_pattern, self._dest_pattern],
        )
        valid()

        if self._dest_pattern:
            self._logger.warning(
                "'dest_pattern' will be unavailable in the near future." +
                "Basically every classes which extends FileBaseTransform will be allowed"
                +
                " plural input files, and output files will be the same name with input"
                + " file names.\n"
                "At that time, if 'dest_dir' is given, transformed files will be created in the given directory.\n"  # noqa
                +
                "If not, original files will be updated by transformed files.")

        files = super().get_target_files(self._src_dir, self._src_pattern)
        self._logger.info("Files found %s" % files)
        if len(files) == 0:
            raise InvalidCount("No files are found.")

        dir = self._dest_dir if self._dest_dir is not None else self._src_dir
        dest_path = os.path.join(dir,
                                 (self._dest_pattern + ".%s" % self._format))

        if self._format == "tar":
            with tarfile.open(dest_path, "w") as tar:
                for file in files:
                    arcname = (os.path.join(self._dest_pattern,
                                            os.path.basename(file))
                               if self._create_dir else os.path.basename(file))
                    tar.add(file, arcname=arcname)
        elif self._format == "zip":
            with zipfile.ZipFile(dest_path, "w") as zp:
                for file in files:
                    arcname = (os.path.join(self._dest_pattern,
                                            os.path.basename(file))
                               if self._create_dir else os.path.basename(file))
                    zp.write(file, arcname=arcname)
        else:
            raise InvalidParameter(
                "'format' must set one of the followings [tar, zip]")