def test_troublesome_text(self): """ Some text patterns have been trouble in the past. """ trouble = xport.Variable(["'<>"], dtype='string') dataset = xport.Dataset({'a': trouble}, name='trouble') library = xport.Library(dataset) with pytest.warns(UserWarning, match=r'Converting column dtypes'): assert self.dump_and_load(library) == library
def test_dumps_name_and_label_length_validation(self): """ Verify variable and dataset name and label length. """ # Names must be <= 8 characters. # Labels must be <= 40 characters. # SAS v8 Transport Files allow longer labels. invalid = [ xport.Library(xport.Dataset(), sas_version='a' * 9), xport.Library(xport.Dataset(name='a' * 9)), xport.Library(xport.Dataset(label='a' * 41)), xport.Library(xport.Dataset({'a' * 9: [1.0]})), xport.Library( xport.Dataset({'a': xport.Variable([1.0], label='a' * 41)})), ] for bad_metadata in invalid: with pytest.raises(ValueError): xport.v56.dumps(bad_metadata)
def test_invalid_values(self): invalid = [ '\N{snowman}', ] for bad in invalid: library = xport.Library(xport.Dataset({'a': [bad]})) with pytest.raises(ValueError): with pytest.warns(UserWarning, match=r'Converting column dtypes'): xport.v56.dumps(library)
def test_numeric_type_conversion(self): """ Verify numeric types convert to float when writing. """ numerics = [ 1, True, ] for x in numerics: with pytest.warns(UserWarning, match=r'Converting column dtypes'): library = xport.Library({'A': xport.Dataset({'x': [x]})}) output = self.dump_and_load(library) assert output['A']['x'].dtype.name == 'float64' assert output['A']['x'].iloc[0] == 1.0
def test_invalid_types(self): """ Verify invalid types raise errors on write. """ invalid = [ b'\x00', object(), (1, 2, 3), ] for bad in invalid: with pytest.warns(UserWarning, match=r'Converting column dtypes'): with pytest.raises(TypeError): library = xport.Library(xport.Dataset({'a': [bad]})) xport.v56.dumps(library)
def library(): """ Create a 4-column, 6-row dataset with numbers and text. """ ds = xport.Dataset( data={ 'VIT_STAT': ['ALIVE'] * 3 + ['DEAD'] * 3, 'ECON': ['POOR', 'NOT', 'UNK'] * 2, 'COUNT': [1216, 1761, 2517, 254, 60, 137], 'TEMP': [98.6, 95.4, 86.7, 93.4, 103.5, 56.7], }, name='ECON', label='Blank-padded dataset label', dataset_type='', ) ds.created = ds.modified = datetime(2015, 11, 13, 10, 35, 8) ds.sas_os = 'W32_7PRO' ds.sas_version = '9.3' ds['VIT_STAT'].label = 'Vital status' ds['VIT_STAT'].format = '$5.' ds['VIT_STAT'].informat = xport.Informat() ds['VIT_STAT'].width = 8 ds['ECON'].label = 'Economic status' ds['ECON'].format = xport.Format('$CHAR', 4, 0, xport.FormatAlignment.RIGHT) ds['ECON'].informat = xport.Informat() ds['ECON'].width = 8 ds['COUNT'].label = 'Count' ds['COUNT'].format = 'comma8.0' ds['COUNT'].informat = xport.Informat() ds['COUNT'].width = 8 ds['TEMP'].label = 'Temperature' ds['TEMP'].format = '8.1' ds['TEMP'].informat = xport.Informat() ds['TEMP'].width = 8 return xport.Library( members=[ds], created=ds.created, modified=ds.modified, sas_os=ds.sas_os, sas_version=ds.sas_version, )
def test_dataframe(self): lib = xport.Library(pd.DataFrame({'a': [1]})) with pytest.warns(UserWarning, match=r'Converting column dtypes'): result = xport.v56.loads(xport.v56.dumps(lib)) assert (result[''] == lib[None]).all(axis=None)
var_counter_<..> += 1 var_counter_rows += 1 """ # # = Generate SAS files: = = # Source: https://github.com/selik/xport # The SAS Transport (XPORT) format only supports two kinds of data. Each value is either numeric or character, so xport.load decodes the values as either str or float. # # TA: TA_ds = xport.Dataset(TA_df, name='TA', label='Trial Arms (TA) data') # SAS variable names are limited to 8 characters. As with Pandas dataframes, you must change the name on the dataset rather than the column directly. TA_ds = TA_ds.rename(columns={k: k.upper()[:8] for k in TA_ds}) # Libraries can have multiple datasets. TA_library = xport.Library({'TA': TA_ds}) # with open('TA.xpt', 'wb') as f: xport.v56.dump(TA_library, f) # # TE: TE_ds = xport.Dataset(TE_df, name='TE', label='Trial Elements (TE) data') # SAS variable names are limited to 8 characters. As with Pandas dataframes, you must change the name on the dataset rather than the column directly. TE_ds = TE_ds.rename(columns={k: k.upper()[:8] for k in TE_ds}) # Libraries can have multiple datasets. TE_library = xport.Library({'TE': TE_ds}) # with open('TE.xpt', 'wb') as f: xport.v56.dump(TE_library, f) # # TV:
def test_create_from_dataframe(self): lib = xport.Library(pd.DataFrame()) assert None in lib
def test_create_from_list(self): lib = xport.Library(xport.Dataset()) assert None in lib with pytest.warns(UserWarning, match=r'More than one dataset named'): xport.Library([xport.Dataset(), xport.Dataset()])
def test_create_from_mapping(self): with pytest.warns(UserWarning, match=r'Set dataset name'): lib = xport.Library({'x': xport.Dataset()}) assert 'x' in lib with pytest.raises(ValueError): xport.Library({'x': xport.Dataset(name='y')})
def test_create_empty(self): xport.Library()