예제 #1
0
    def write_directory(self, pan_dat, dir_path, case_space_table_names=False, index=False, **kwargs):
        """
        write the PanDat data to a collection of csv files

        :param pan_dat: the PanDat object to write

        :param dir_path: the directory in which to write the csv files
                             Set to falsey if using con argument.

        :param case_space_table_names: boolean - make best guesses how to add spaces and upper case
                                       characters to table names

        :param index: boolean - whether or not to write the index.

        :param kwargs: additional named arguments to pass to pandas.to_csv

        :return:

        caveats: The row names (index) isn't written (unless kwargs indicates it should be).
        """
        verify(not os.path.isfile(dir_path), "A file is not a valid directory path")
        msg = []
        verify(self.pan_dat_factory.good_pan_dat_object(pan_dat, msg.append),
               "pan_dat not a good object for this factory : %s"%"\n".join(msg))
        verify("index" not in kwargs, "index should be passed as a non-kwargs argument")
        kwargs["index"] = index
        case_space_table_names = case_space_table_names and \
                                 len(set(self.pan_dat_factory.all_tables)) == \
                                 len(set(map(case_space_to_pretty, self.pan_dat_factory.all_tables)))
        if not os.path.isdir(dir_path) :
            os.mkdir(dir_path)
        for t in self.pan_dat_factory.all_tables :
            f = os.path.join(dir_path, (case_space_to_pretty(t) if case_space_table_names else t) + ".csv")
            getattr(pan_dat, t).to_csv(f, **kwargs)
예제 #2
0
    def write_file(self, pan_dat, file_path, case_space_sheet_names=False):
        """
        write the panDat data to an excel file

        :param pan_dat: the PanDat object to write

        :param file_path: The file path of the excel file to create

        :param case_space_sheet_names: boolean - make best guesses how to add spaces and upper case
                                      characters to sheet names

        :return:

        caveats: The row names (index) isn't written.
        """
        msg = []
        verify(
            self.pan_dat_factory.good_pan_dat_object(pan_dat, msg.append),
            "pan_dat not a good object for this factory : %s" % "\n".join(msg))
        pan_dat = self.pan_dat_factory._pre_write_adjustment(pan_dat)
        verify(not os.path.isdir(file_path),
               "A directory is not a valid xls file path")
        case_space_sheet_names = case_space_sheet_names and \
                                 len(set(self.pan_dat_factory.all_tables)) == \
                                 len(set(map(case_space_to_pretty, self.pan_dat_factory.all_tables)))
        writer = pd.ExcelWriter(file_path)
        for t in self.pan_dat_factory.all_tables:
            getattr(pan_dat, t).to_excel(
                writer,
                case_space_to_pretty(t) if case_space_sheet_names else t,
                index=False)
        writer.save()
예제 #3
0
    def write_directory(self, tic_dat, dir_path, allow_overwrite = False, dialect='excel',
                        write_header = True, case_space_table_names = False):
        """

        write the ticDat data to a collection of csv files

        :param tic_dat: the data object

        :param dir_path: the directory in which to write the csv files

        :param allow_overwrite: boolean - are we allowed to overwrite existing
                                files?

        :param dialect: the csv dialect. Consult csv documentation for details.

        :param write_header: Boolean. Should the header information be written
                             as the first row?

        :param case_space_table_names: boolean - make best guesses how to add spaces and upper case
                                       characters to table names

        :return:
        """
        verify(csv, "csv needs to be installed to use this subroutine")
        verify(dialect in csv.list_dialects(), "Invalid dialect %s"%dialect)
        verify(not os.path.isfile(dir_path), "A file is not a valid directory path")
        if self.tic_dat_factory.generic_tables:
            dat, tdf = create_generic_free(tic_dat, self.tic_dat_factory)
            return tdf.csv.write_directory(dat, dir_path, allow_overwrite, dialect, write_header)
        tdf = self.tic_dat_factory
        msg = []
        if not self.tic_dat_factory.good_tic_dat_object(tic_dat, lambda m : msg.append(m)) :
            raise TicDatError("Not a valid TicDat object for this schema : " + " : ".join(msg))
        if not allow_overwrite:
            for t in tdf.all_tables :
                f = os.path.join(dir_path, t + ".csv")
                verify(not os.path.exists(f), "The %s path exists and overwrite is not allowed"%f)
        if not os.path.isdir(dir_path) :
            os.mkdir(dir_path)
        case_space_table_names = case_space_table_names and \
                                 len(set(self.tic_dat_factory.all_tables)) == \
                                 len(set(map(case_space_to_pretty, self.tic_dat_factory.all_tables)))
        for t in tdf.all_tables :
            f = os.path.join(dir_path, (case_space_to_pretty(t) if case_space_table_names else t) + ".csv")
            with open(f, 'w', newline='') as csvfile:
                 writer = csv.DictWriter(csvfile,dialect=dialect, fieldnames=
                        tdf.primary_key_fields.get(t, ()) + tdf.data_fields.get(t, ()) )
                 writer.writeheader() if write_header else None
                 def infinty_io_dict(d):
                     return {f: self.tic_dat_factory._infinity_flag_write_cell(t, f, x) for f,x in d.items()}
                 _t =  getattr(tic_dat, t)
                 if dictish(_t) :
                     for p_key, data_row in _t.items() :
                         primaryKeyDict = {f:v for f,v in zip(tdf.primary_key_fields[t],
                                            p_key if containerish(p_key) else (p_key,))}
                         writer.writerow(infinty_io_dict(dict(data_row, **primaryKeyDict)))
                 else :
                     for data_row in (_t if containerish(_t) else _t()) :
                         writer.writerow(infinty_io_dict(data_row))
예제 #4
0
    def write_file(self,
                   pan_dat,
                   db_file_path,
                   con=None,
                   if_exists='replace',
                   case_space_table_names=False):
        """

        write the PanDat data to an excel file

        :param pan_dat: the PanDat object to write

        :param db_file_path: The file path of the SQLite file to create.
                             Set to falsey if using con argument.

        :param con: A connection object that can be passed to pandas to_sql.
                    Set to falsey if using db_file_path argument

        :param if_exists: ‘fail’, ‘replace’ or ‘append’. How to behave if the table already exists

        :param case_space_table_names: boolean - make best guesses how to add spaces and upper case
                                          characters to table names

        :return:

        caveats: The row names (index) isn't written. The default pandas schema generation is used,
                 and thus foreign key relationships aren't written.
        """
        # The code to generate foreign keys is written and tested as part of TicDatFactory, and
        # thus this shortcoming could be easily rectified if need be).
        # note - pandas has an unfortunate tendency to push types into SQLite columns. This can result in
        # writing-reading round trips converting your numbers to text if they are mixed type columns.
        verify(
            bool(db_file_path) != bool(con),
            "use either the con argument or the db_file_path argument but not both"
        )
        msg = []
        verify(
            self.pan_dat_factory.good_pan_dat_object(pan_dat, msg.append),
            "pan_dat not a good object for this factory : %s" % "\n".join(msg))
        pan_dat = self.pan_dat_factory._pre_write_adjustment(pan_dat)
        if db_file_path:
            verify(not os.path.isdir(db_file_path),
                   "A directory is not a valid SQLLite file path")
        case_space_table_names = case_space_table_names and \
                                 len(set(self.pan_dat_factory.all_tables)) == \
                                 len(set(map(case_space_to_pretty, self.pan_dat_factory.all_tables)))
        con_maker = lambda: _sql_con(
            db_file_path) if db_file_path else _DummyContextManager(con)
        with con_maker() as _:
            con_ = con or _
            for t in self.pan_dat_factory.all_tables:
                getattr(pan_dat, t).to_sql(name=case_space_to_pretty(t)
                                           if case_space_table_names else t,
                                           con=con_,
                                           if_exists=if_exists,
                                           index=False)
예제 #5
0
파일: xls.py 프로젝트: nandi6uc/ticdat
    def write_file(self,
                   tic_dat,
                   file_path,
                   allow_overwrite=False,
                   case_space_sheet_names=False):
        """
        write the ticDat data to an excel file

        :param tic_dat: the data object to write (typically a TicDat)

        :param file_path: The file path of the excel file to create
                          Needs to end in either ".xls" or ".xlsx"
                          The latter is capable of writing out larger tables,
                          but the former handles infinity seamlessly.
                          If ".xlsx", then be advised that +/- float("inf") will be replaced
                          with "inf"/"-inf", unless infinity_io_flag is being applied.

        :param allow_overwrite: boolean - are we allowed to overwrite an
                                existing file?
              case_space_sheet_names: boolean - make best guesses how to add spaces and upper case
                                      characters to sheet names

        :return:

        caveats: None may be written out as an empty string. This reflects the behavior of xlwt.
        """
        self._verify_differentiable_sheet_names()
        verify(
            utils.stringish(file_path)
            and (file_path.endswith(".xls") or file_path.endswith(".xlsx")),
            "file_path argument needs to end in .xls or .xlsx")
        msg = []
        if not self.tic_dat_factory.good_tic_dat_object(
                tic_dat, lambda m: msg.append(m)):
            raise TicDatError("Not a valid ticDat object for this schema : " +
                              " : ".join(msg))
        verify(not os.path.isdir(file_path),
               "A directory is not a valid xls file path")
        verify(allow_overwrite or not os.path.exists(file_path),
               "The %s path exists and overwrite is not allowed" % file_path)
        if self.tic_dat_factory.generic_tables:
            dat, tdf = utils.create_generic_free(tic_dat, self.tic_dat_factory)
            return tdf.xls.write_file(dat, file_path, allow_overwrite,
                                      case_space_sheet_names)
        case_space_sheet_names = case_space_sheet_names and \
                                 len(set(self.tic_dat_factory.all_tables)) == \
                                 len(set(map(case_space_to_pretty, self.tic_dat_factory.all_tables)))
        tbl_name_mapping = {
            t: case_space_to_pretty(t) if case_space_sheet_names else t
            for t in self.tic_dat_factory.all_tables
        }
        if file_path.endswith(".xls"):
            self._xls_write(tic_dat, file_path, tbl_name_mapping)
        else:
            self._xlsx_write(tic_dat, file_path, tbl_name_mapping)
예제 #6
0
    def write_file(self, pan_dat, json_file_path, case_space_table_names=False, orient='split',
                   index=False, indent=None, sort_keys=False, **kwargs):
        """
        write the PanDat data to a collection of csv files

        :param pan_dat: the PanDat object to write

        :param json_file_path: the json file into which the data is to be written. If falsey, will return a
                               JSON  string

        :param case_space_table_names: boolean - make best guesses how to add spaces and upper case
                                       characters to table names

        :param orient: Indication of expected JSON string format. See pandas.to_json for more details.

        :param index: boolean - whether or not to write the index.

        :param indent: None. See json.dumps

        :param sort_keys: See json.dumps

        :param kwargs: additional named arguments to pass to pandas.to_json

        :return:

        caveats:  +-float("inf") will be converted to "inf", "-inf"
        """
        msg = []
        verify(self.pan_dat_factory.good_pan_dat_object(pan_dat, msg.append),
               "pan_dat not a good object for this factory : %s"%"\n".join(msg))
        verify("orient" not in kwargs, "orient should be passed as a non-kwargs argument")
        verify("index" not in kwargs, "index should be passed as a non-kwargs argument")

        if self._modern_pandas:
            # FYI - pandas Exception: ValueError: 'index=False' is only valid when 'orient' is 'split' or 'table'
            kwargs["index"] = index if orient in ("split", "table") else True
        case_space_table_names = case_space_table_names and \
                                 len(set(self.pan_dat_factory.all_tables)) == \
                                 len(set(map(case_space_to_pretty, self.pan_dat_factory.all_tables)))
        rtn = {}
        for t in self.pan_dat_factory.all_tables:
            df = getattr(pan_dat, t).replace(float("inf"), "inf").replace(-float("inf"), "-inf")
            k = case_space_to_pretty(t) if case_space_table_names else t
            rtn[k] = json.loads(df.to_json(path_or_buf=None, orient=orient, **kwargs))
            if orient == 'split' and not index:
                rtn[k].pop("index", None)
        if json_file_path:
            with open(json_file_path, "w") as f:
                json.dump(rtn, f, indent=indent, sort_keys=sort_keys)
        else:
            return json.dumps(rtn, indent=indent, sort_keys=sort_keys)
예제 #7
0
    def write_file(self,
                   pan_dat,
                   json_file_path,
                   case_space_table_names=False,
                   orient='split',
                   index=False,
                   indent=2,
                   sort_keys=False,
                   **kwargs):
        """
        write the PanDat data to a json file (or json string)

        :param pan_dat: the PanDat object to write

        :param json_file_path: the json file into which the data is to be written. If falsey, will return a
                               JSON  string

        :param case_space_table_names: boolean - make best guesses how to add spaces and upper case
                                       characters to table names

        :param orient: Indication of expected JSON string format. See pandas.to_json for more details.

        :param index: boolean - whether or not to write the index.

        :param indent: 2. See json.dumps

        :param sort_keys: See json.dumps

        :param kwargs: additional named arguments to pass to pandas.to_json

        :return:
        """
        msg = []
        verify(
            self.pan_dat_factory.good_pan_dat_object(pan_dat, msg.append),
            "pan_dat not a good object for this factory : %s" % "\n".join(msg))
        verify("orient" not in kwargs,
               "orient should be passed as a non-kwargs argument")
        verify("index" not in kwargs,
               "index should be passed as a non-kwargs argument")
        pan_dat = self.pan_dat_factory._pre_write_adjustment(pan_dat)

        if self._modern_pandas:
            # FYI - pandas Exception: ValueError: 'index=False' is only valid when 'orient' is 'split' or 'table'
            kwargs["index"] = index if orient in ("split", "table") else True
        case_space_table_names = case_space_table_names and \
                                 len(set(self.pan_dat_factory.all_tables)) == \
                                 len(set(map(case_space_to_pretty, self.pan_dat_factory.all_tables)))
        rtn = {}
        from ticdat.pandatfactory import _faster_df_apply
        for t in self.pan_dat_factory.all_tables:
            df = getattr(pan_dat, t).copy(deep=True).replace(
                float("inf"), "inf").replace(-float("inf"), "-inf")
            for f in df.columns:
                dt = self.pan_dat_factory.data_types.get(t, {}).get(f, None)
                if dt and dt.datetime:
                    # pandas can be a real PIA when trying to mix types in a column
                    def fixed(row):  # this might not always fix things
                        if isinstance(row[f],
                                      (pd.Timestamp, numpy.datetime64)):
                            return str(row[f])
                        if pd.isnull(row[f]):
                            return None
                        return row[f]

                    df[f] = _faster_df_apply(df, fixed)
            k = case_space_to_pretty(t) if case_space_table_names else t
            rtn[k] = json.loads(
                df.to_json(path_or_buf=None, orient=orient, **kwargs))
            if orient == 'split' and not index:
                rtn[k].pop("index", None)
        if json_file_path:
            with open(json_file_path, "w") as f:
                json.dump(rtn, f, indent=indent, sort_keys=sort_keys)
        else:
            return json.dumps(rtn, indent=indent, sort_keys=sort_keys)