Example #1
0
    def execute(self, *args):
        valid = EssentialParameters(
            self.__class__.__name__,
            [self._src_dir, self._src_pattern, self._dest_dir],
        )
        valid()

        if not self._columns and not self._column_numbers:
            raise InvalidParameter(
                "Specifying either 'column' or 'column_numbers' is essential.")
        if self._columns and self._column_numbers:
            raise InvalidParameter(
                "Cannot specify both 'column' and 'column_numbers'.")

        files = super().get_target_files(self._src_dir, self._src_pattern)
        if len(files) == 0:
            raise FileNotFound("The specified csv file not found.")

        for f in files:
            _, filename = os.path.split(f)
            dest_path = os.path.join(self._dest_dir, filename)
            if self._columns:
                Csv.extract_columns_with_names(f, dest_path, self._columns)
            elif self._column_numbers:
                if isinstance(self._column_numbers, int) is True:
                    remain_column_numbers = []
                    remain_column_numbers.append(self._column_numbers)
                else:
                    column_numbers = self._column_numbers.split(",")
                    remain_column_numbers = [int(n) for n in column_numbers]
                Csv.extract_columns_with_numbers(f, dest_path,
                                                 remain_column_numbers)
Example #2
0
 def test_extract_columns_with_names(self):
     # create test csv
     os.makedirs(self._data_dir, exist_ok=True)
     test_csv = os.path.join(self._data_dir, "test.csv")
     test_csv_data = [["key", "data"], ["1", "spam"]]
     with open(test_csv, "w") as t:
         writer = csv.writer(t)
         writer.writerows(test_csv_data)
         t.flush()
     output_file = os.path.join(self._data_dir, "output.csv")
     try:
         remain_columns = ["key"]
         Csv.extract_columns_with_names(test_csv, output_file,
                                        remain_columns)
         with open(test_csv, "r") as o:
             reader = csv.DictReader(o)
             for r in reader:
                 assert r["key"] == test_csv_data[1][0]
     finally:
         shutil.rmtree(self._data_dir)