def test_cast_str_to_str(source_stype): DT = dt.Frame(S=["Right", "middle", None, "", "A" * 1000], stype=source_stype) assert DT.stypes == (source_stype, ) RES = DT[:, [dt.str32(f.S), dt.str64(f.S)]] frame_integrity_check(RES) assert RES.to_list() == DT.to_list() * 2
def test_cast_bool_to_str(): DT = dt.Frame(P=[False, None, True, True, False, True]) assert DT.stypes == (dt.bool8, ) RES = DT[:, [dt.str32(f.P), dt.str64(f.P)]] assert RES.stypes == (dt.str32, dt.str64) ans = ["False", None, "True", "True", "False", "True"] assert RES.to_list() == [ans, ans]
def test_cast_obj_to_str(): src = [noop, "Hello!", ..., {}, dt, print, None] DT = dt.Frame(src) assert DT.stypes == (dt.obj64, ) RES = DT[:, [dt.str32(f[0]), dt.str64(f[0])]] frame_integrity_check(RES) ans = [str(x) for x in src] ans[-1] = None assert RES.to_list() == [ans, ans]
def test_cast_float_to_str(source_stype): DT = dt.Frame(J=[3.5, 7.049, -3.18, math.inf, math.nan, 1.0, -math.inf, 1e16, 0], stype=source_stype) assert DT.stypes == (source_stype,) RES = DT[:, [dt.str32(f.J), dt.str64(f.J)]] frame_integrity_check(RES) ans = ["3.5", "7.049", "-3.18", "inf", None, "1.0", "-inf", "1.0e+16", "0.0"] assert RES.to_list() == [ans, ans]
def test_cast_int_to_str(source_stype): DT = dt.Frame([None, 0, -3, 189, 77, 14, None, 394831, -52939047130424957], stype=source_stype) assert DT.stypes == (source_stype, ) RES = DT[:, [dt.str32(f.C0), dt.str64(f.C0)]] frame_integrity_check(RES) assert RES.stypes == (dt.str32, dt.str64) assert RES.shape == (DT.nrows, 2) ans = [None if v is None else str(v) for v in DT.to_list()[0]] assert RES.to_list()[0] == ans
def test_cast_int_to_str(stype0): dt0 = dt.Frame( [None, 0, -3, 189, 77, 14, None, 394831, -52939047130424957], stype=stype0) dt1 = dt0[:, [dt.str32(f.C0), dt.str64(f.C0)]] dt1.internal.check() assert dt1.stypes == (dt.str32, dt.str64) assert dt1.shape == (dt0.nrows, 2) ans = [None if v is None else str(v) for v in dt0.topython()[0]] assert dt1.topython()[0] == ans
def test_cast_to_str(src): def to_str(x): if x is None: return None if isinstance(x, bool): return str(int(x)) # if isinstance(x, float) and math.isnan(x): return None return str(x) dt0 = dt.Frame(src) dt1 = dt0[:, [dt.str32(f[i]) for i in range(dt0.ncols)]] dt2 = dt0[:, [dt.str64(f[i]) for i in range(dt0.ncols)]] dt1.internal.check() dt2.internal.check() assert dt1.stypes == (dt.str32, ) * dt0.ncols assert dt2.stypes == (dt.str64, ) * dt0.ncols assert dt1.topython()[0] == [to_str(x) for x in src]
def _write_csv(self, df, file): # Before writing, we need to convert all columns to strings for two # reasons: # - We have to convert any obj64 types to str64: Frame.to_csv can't # process them. # - We have to replace None with NULL to tell MySQL, that we have # actual NULL values. An empty cell is sometimes, but not always a # NULL value. See #30 # - We have to check if the frame is empty. If so we have to # circumvent a bug in datatable: see #36 if df.shape[0] == 0: return None df = df[:, f[:].remove(f[:]).extend(str64(f[:]))][:, df.names] df.replace(None, "NULL") df.to_csv(path=file, header=False)
def transform(self, X: dt.Frame): logger = None if self.context and self.context.experiment_id: logger = make_experiment_logger( experiment_id=self.context.experiment_id, tmp_dir=self.context.tmp_dir, experiment_tmp_dir=self.context.experiment_tmp_dir) X = dt.Frame(X) original_zip_column_name = X.names[0] X = X[:, dt.str64(dt.f[0])] X.names = ['zip_key'] try: zip_list = dt.unique(X[~dt.isna(dt.f.zip_key), 0]).to_list()[0] + ['79936'] zip_features = [self.get_zipcode_features(x) for x in zip_list] X_g = dt.Frame({"zip_key": zip_list}) X_g.cbind(dt.Frame(zip_features)) X_g.key = 'zip_key' X_result = X[:, :, dt.join(X_g)] self._output_feature_names = [ "{}:{}.{}".format(self.transformer_name, original_zip_column_name, self.replaceBannedCharacters(f)) for f in list(X_result[:, 1:].names) ] self._feature_desc = [ "Property '{}' of zipcode column ['{}'] from US zipcode database (recipe '{}')" .format(f, original_zip_column_name, self.transformer_name) for f in list(X_result[:, 1:].names) ] return X_result[:, 1:] except ValueError as ve: loggerinfo( logger, "Column '{}' is not a zipcode: {}".format( original_zip_column_name, str(ve))) return self.get_zipcode_null_result(X, original_zip_column_name) except TypeError as te: loggerwarning( logger, "Column '{}' triggered TypeError: {}".format( original_zip_column_name, str(te))) raise te