def test_issue_68_pd(self): # kind of a dumb test since the numpy types tend to be the ones pandas creates naturally, but no harm # in being rigorous if not self.can_run: return tdf = diet_schema.clone() pdf = PanDatFactory.create_from_full_schema( tdf.schema(include_ancillary_info=True)) pgtf = pdf.pgsql pgtf.write_schema(self.engine, test_schema, include_ancillary_info=False) dat = tdf.copy_tic_dat(diet_dat) import numpy dat.categories["protein"]["Max Nutrition"] = numpy.int64(200) dat.categories["fat"]["Max Nutrition"] = numpy.float64(65) pan_dat = pdf.copy_pan_dat( tdf.copy_to_pandas(dat, drop_pk_columns=False)) pgtf.write_data(pan_dat, self.engine, test_schema) pg_pan_dat = pgtf.create_pan_dat(self.engine, test_schema) self.assertTrue(pdf._same_data(pan_dat, pg_pan_dat)) from ticdat.pandatfactory import _faster_df_apply pan_dat.categories["Max Nutrition"] = _faster_df_apply( pan_dat.categories, lambda row: numpy.int64(row["Max Nutrition"])) pan_dat.foods["Cost"] = _faster_df_apply( pan_dat.foods, lambda row: numpy.float64(row["Cost"])) from framework_utils.helper_utils import memo memo(pan_dat) pgtf.write_data(pan_dat, self.engine, test_schema) pg_pan_dat = pgtf.create_pan_dat(self.engine, test_schema) self.assertTrue(pdf._same_data(pan_dat, pg_pan_dat))
def write_file(self, pan_dat, json_file_path, case_space_table_names=False, orient='split', index=False, indent=2, sort_keys=False, **kwargs): """ write the PanDat data to a json file (or json string) :param pan_dat: the PanDat object to write :param json_file_path: the json file into which the data is to be written. If falsey, will return a JSON string :param case_space_table_names: boolean - make best guesses how to add spaces and upper case characters to table names :param orient: Indication of expected JSON string format. See pandas.to_json for more details. :param index: boolean - whether or not to write the index. :param indent: 2. See json.dumps :param sort_keys: See json.dumps :param kwargs: additional named arguments to pass to pandas.to_json :return: """ msg = [] verify( self.pan_dat_factory.good_pan_dat_object(pan_dat, msg.append), "pan_dat not a good object for this factory : %s" % "\n".join(msg)) verify("orient" not in kwargs, "orient should be passed as a non-kwargs argument") verify("index" not in kwargs, "index should be passed as a non-kwargs argument") pan_dat = self.pan_dat_factory._pre_write_adjustment(pan_dat) if self._modern_pandas: # FYI - pandas Exception: ValueError: 'index=False' is only valid when 'orient' is 'split' or 'table' kwargs["index"] = index if orient in ("split", "table") else True case_space_table_names = case_space_table_names and \ len(set(self.pan_dat_factory.all_tables)) == \ len(set(map(case_space_to_pretty, self.pan_dat_factory.all_tables))) rtn = {} from ticdat.pandatfactory import _faster_df_apply for t in self.pan_dat_factory.all_tables: df = getattr(pan_dat, t).copy(deep=True).replace( float("inf"), "inf").replace(-float("inf"), "-inf") for f in df.columns: dt = self.pan_dat_factory.data_types.get(t, {}).get(f, None) if dt and dt.datetime: # pandas can be a real PIA when trying to mix types in a column def fixed(row): # this might not always fix things if isinstance(row[f], (pd.Timestamp, numpy.datetime64)): return str(row[f]) if pd.isnull(row[f]): return None return row[f] df[f] = _faster_df_apply(df, fixed) k = case_space_to_pretty(t) if case_space_table_names else t rtn[k] = json.loads( df.to_json(path_or_buf=None, orient=orient, **kwargs)) if orient == 'split' and not index: rtn[k].pop("index", None) if json_file_path: with open(json_file_path, "w") as f: json.dump(rtn, f, indent=indent, sort_keys=sort_keys) else: return json.dumps(rtn, indent=indent, sort_keys=sort_keys)