コード例 #1
0
 def test_issue_68_pd(self):
     # kind of a dumb test since the numpy types tend to be the ones pandas creates naturally, but no harm
     # in being rigorous
     if not self.can_run:
         return
     tdf = diet_schema.clone()
     pdf = PanDatFactory.create_from_full_schema(
         tdf.schema(include_ancillary_info=True))
     pgtf = pdf.pgsql
     pgtf.write_schema(self.engine,
                       test_schema,
                       include_ancillary_info=False)
     dat = tdf.copy_tic_dat(diet_dat)
     import numpy
     dat.categories["protein"]["Max Nutrition"] = numpy.int64(200)
     dat.categories["fat"]["Max Nutrition"] = numpy.float64(65)
     pan_dat = pdf.copy_pan_dat(
         tdf.copy_to_pandas(dat, drop_pk_columns=False))
     pgtf.write_data(pan_dat, self.engine, test_schema)
     pg_pan_dat = pgtf.create_pan_dat(self.engine, test_schema)
     self.assertTrue(pdf._same_data(pan_dat, pg_pan_dat))
     from ticdat.pandatfactory import _faster_df_apply
     pan_dat.categories["Max Nutrition"] = _faster_df_apply(
         pan_dat.categories, lambda row: numpy.int64(row["Max Nutrition"]))
     pan_dat.foods["Cost"] = _faster_df_apply(
         pan_dat.foods, lambda row: numpy.float64(row["Cost"]))
     from framework_utils.helper_utils import memo
     memo(pan_dat)
     pgtf.write_data(pan_dat, self.engine, test_schema)
     pg_pan_dat = pgtf.create_pan_dat(self.engine, test_schema)
     self.assertTrue(pdf._same_data(pan_dat, pg_pan_dat))
コード例 #2
0
    def write_file(self,
                   pan_dat,
                   json_file_path,
                   case_space_table_names=False,
                   orient='split',
                   index=False,
                   indent=2,
                   sort_keys=False,
                   **kwargs):
        """
        write the PanDat data to a json file (or json string)

        :param pan_dat: the PanDat object to write

        :param json_file_path: the json file into which the data is to be written. If falsey, will return a
                               JSON  string

        :param case_space_table_names: boolean - make best guesses how to add spaces and upper case
                                       characters to table names

        :param orient: Indication of expected JSON string format. See pandas.to_json for more details.

        :param index: boolean - whether or not to write the index.

        :param indent: 2. See json.dumps

        :param sort_keys: See json.dumps

        :param kwargs: additional named arguments to pass to pandas.to_json

        :return:
        """
        msg = []
        verify(
            self.pan_dat_factory.good_pan_dat_object(pan_dat, msg.append),
            "pan_dat not a good object for this factory : %s" % "\n".join(msg))
        verify("orient" not in kwargs,
               "orient should be passed as a non-kwargs argument")
        verify("index" not in kwargs,
               "index should be passed as a non-kwargs argument")
        pan_dat = self.pan_dat_factory._pre_write_adjustment(pan_dat)

        if self._modern_pandas:
            # FYI - pandas Exception: ValueError: 'index=False' is only valid when 'orient' is 'split' or 'table'
            kwargs["index"] = index if orient in ("split", "table") else True
        case_space_table_names = case_space_table_names and \
                                 len(set(self.pan_dat_factory.all_tables)) == \
                                 len(set(map(case_space_to_pretty, self.pan_dat_factory.all_tables)))
        rtn = {}
        from ticdat.pandatfactory import _faster_df_apply
        for t in self.pan_dat_factory.all_tables:
            df = getattr(pan_dat, t).copy(deep=True).replace(
                float("inf"), "inf").replace(-float("inf"), "-inf")
            for f in df.columns:
                dt = self.pan_dat_factory.data_types.get(t, {}).get(f, None)
                if dt and dt.datetime:
                    # pandas can be a real PIA when trying to mix types in a column
                    def fixed(row):  # this might not always fix things
                        if isinstance(row[f],
                                      (pd.Timestamp, numpy.datetime64)):
                            return str(row[f])
                        if pd.isnull(row[f]):
                            return None
                        return row[f]

                    df[f] = _faster_df_apply(df, fixed)
            k = case_space_to_pretty(t) if case_space_table_names else t
            rtn[k] = json.loads(
                df.to_json(path_or_buf=None, orient=orient, **kwargs))
            if orient == 'split' and not index:
                rtn[k].pop("index", None)
        if json_file_path:
            with open(json_file_path, "w") as f:
                json.dump(rtn, f, indent=indent, sort_keys=sort_keys)
        else:
            return json.dumps(rtn, indent=indent, sort_keys=sort_keys)