Python Datasetの例、xport.Dataset Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_v56.py プロジェクト: sanders41/xport

 def test_dataset_modified(self):
     invalid = datetime(2100, 1, 1)
     ds = xport.Dataset(modified=invalid)
     assert ds.modified == invalid
     with pytest.raises(ValueError):
         xport.v56.dumps(ds)
     with pytest.raises((TypeError, AttributeError)):
         ds = xport.Dataset(modified=1)
         xport.v56.dumps(ds)

コード例 #2

0

ファイルを表示

ファイル: test_v56.py プロジェクト: sanders41/xport

 def test_dataset_created(self):
     invalid = datetime(1800, 1, 1)
     ds = xport.Dataset(created=invalid)
     assert ds.created == invalid
     with pytest.raises(ValueError):
         xport.v56.dumps(ds)
     with pytest.raises((TypeError, AttributeError)):
         ds = xport.Dataset(created='2000-Jan-01')
         xport.v56.dumps(ds)

コード例 #3

0

ファイルを表示

ファイル: test_xport.py プロジェクト: sanders41/xport

 def test_init(self):
     """
     Verify initialization.
     """
     v = xport.Dataset()
     for name in v._metadata:
         getattr(v, name)  # Does not raise an error.

コード例 #4

0

ファイルを表示

ファイル: test_v56.py プロジェクト: sanders41/xport

 def test_troublesome_text(self):
     """
     Some text patterns have been trouble in the past.
     """
     trouble = xport.Variable(["'<>"], dtype='string')
     dataset = xport.Dataset({'a': trouble}, name='trouble')
     library = xport.Library(dataset)
     with pytest.warns(UserWarning, match=r'Converting column dtypes'):
         assert self.dump_and_load(library) == library

コード例 #5

0

ファイルを表示

ファイル: test_v56.py プロジェクト: sanders41/xport

 def test_dumps_name_and_label_length_validation(self):
     """
     Verify variable and dataset name and label length.
     """
     # Names must be <= 8 characters.
     # Labels must be <= 40 characters.
     # SAS v8 Transport Files allow longer labels.
     invalid = [
         xport.Library(xport.Dataset(), sas_version='a' * 9),
         xport.Library(xport.Dataset(name='a' * 9)),
         xport.Library(xport.Dataset(label='a' * 41)),
         xport.Library(xport.Dataset({'a' * 9: [1.0]})),
         xport.Library(
             xport.Dataset({'a': xport.Variable([1.0], label='a' * 41)})),
     ]
     for bad_metadata in invalid:
         with pytest.raises(ValueError):
             xport.v56.dumps(bad_metadata)

コード例 #6

0

ファイルを表示

ファイル: test_v56.py プロジェクト: sanders41/xport

 def test_invalid_values(self):
     invalid = [
         '\N{snowman}',
     ]
     for bad in invalid:
         library = xport.Library(xport.Dataset({'a': [bad]}))
         with pytest.raises(ValueError):
             with pytest.warns(UserWarning,
                               match=r'Converting column dtypes'):
                 xport.v56.dumps(library)

コード例 #7

0

ファイルを表示

ファイル: test_cli.py プロジェクト: sanders41/xport

def test_decode(library, library_bytestring):
    """
    Verify the command line executable can decode a library.
    """
    cmd = 'python -m xport -'
    argv = cmd.split()
    proc = subprocess.run(argv, capture_output=True, input=library_bytestring)
    fp = StringIO(proc.stdout.decode())
    df = pd.read_csv(fp)
    ds = xport.Dataset(df)
    assert (ds == next(iter(library.values()))).all(axis=None)

コード例 #8

0

ファイルを表示

ファイル: test_cli.py プロジェクト: sanders41/xport

def test_output_file(library, library_bytestring, tmp_path):
    """
    Verify CLI can write output to a file.
    """
    filepath = tmp_path / 'tmp.csv'
    cmd = f'python -m xport - {filepath}'
    argv = cmd.split()
    subprocess.run(argv, capture_output=True, input=library_bytestring)
    with open(filepath) as f:
        df = pd.read_csv(f)
    ds = xport.Dataset(df)
    assert (ds == next(iter(library.values()))).all(axis=None)

コード例 #9

0

ファイルを表示

ファイル: test_v56.py プロジェクト: sanders41/xport

 def test_invalid_types(self):
     """
     Verify invalid types raise errors on write.
     """
     invalid = [
         b'\x00',
         object(),
         (1, 2, 3),
     ]
     for bad in invalid:
         with pytest.warns(UserWarning, match=r'Converting column dtypes'):
             with pytest.raises(TypeError):
                 library = xport.Library(xport.Dataset({'a': [bad]}))
                 xport.v56.dumps(library)

コード例 #10

0

ファイルを表示

ファイル: test_v56.py プロジェクト: sanders41/xport

 def test_numeric_type_conversion(self):
     """
     Verify numeric types convert to float when writing.
     """
     numerics = [
         1,
         True,
     ]
     for x in numerics:
         with pytest.warns(UserWarning, match=r'Converting column dtypes'):
             library = xport.Library({'A': xport.Dataset({'x': [x]})})
             output = self.dump_and_load(library)
             assert output['A']['x'].dtype.name == 'float64'
             assert output['A']['x'].iloc[0] == 1.0

コード例 #11

0

ファイルを表示

ファイル: test_xport.py プロジェクト: sanders41/xport

 def test_contents(self):
     """
     Verify variables metadata summary.
     """
     ds = xport.Dataset(
         data={
             'a': [1],
             'b': xport.Variable(['x'], label='Beta'),
             'c': [None],
         },
         name='EXAMPLE',
         label='Example',
     )
     ds['a'].vtype = xport.VariableType.NUMERIC
     ds['b'].vtype = xport.VariableType.CHARACTER
     got = ds.contents
     assert list(got.index) == [1, 2, 3]
     assert list(got['Label']) == ['', 'Beta', '']
     assert list(got['Type']) == ['Numeric', 'Character', '']

コード例 #12

0

ファイルを表示

ファイル: test_v56.py プロジェクト: sanders41/xport

def library():
    """
    Create a 4-column, 6-row dataset with numbers and text.
    """
    ds = xport.Dataset(
        data={
            'VIT_STAT': ['ALIVE'] * 3 + ['DEAD'] * 3,
            'ECON': ['POOR', 'NOT', 'UNK'] * 2,
            'COUNT': [1216, 1761, 2517, 254, 60, 137],
            'TEMP': [98.6, 95.4, 86.7, 93.4, 103.5, 56.7],
        },
        name='ECON',
        label='Blank-padded dataset label',
        dataset_type='',
    )
    ds.created = ds.modified = datetime(2015, 11, 13, 10, 35, 8)
    ds.sas_os = 'W32_7PRO'
    ds.sas_version = '9.3'
    ds['VIT_STAT'].label = 'Vital status'
    ds['VIT_STAT'].format = '$5.'
    ds['VIT_STAT'].informat = xport.Informat()
    ds['VIT_STAT'].width = 8
    ds['ECON'].label = 'Economic status'
    ds['ECON'].format = xport.Format('$CHAR', 4, 0,
                                     xport.FormatAlignment.RIGHT)
    ds['ECON'].informat = xport.Informat()
    ds['ECON'].width = 8
    ds['COUNT'].label = 'Count'
    ds['COUNT'].format = 'comma8.0'
    ds['COUNT'].informat = xport.Informat()
    ds['COUNT'].width = 8
    ds['TEMP'].label = 'Temperature'
    ds['TEMP'].format = '8.1'
    ds['TEMP'].informat = xport.Informat()
    ds['TEMP'].width = 8
    return xport.Library(
        members=[ds],
        created=ds.created,
        modified=ds.modified,
        sas_os=ds.sas_os,
        sas_version=ds.sas_version,
    )

コード例 #13

0

ファイルを表示

ファイル: test_xport.py プロジェクト: sanders41/xport

 def test_copy_metadata(self):
     """
     Verify ``DataFrame`` methods that copy will keep SAS metadata.
     """
     ds = xport.Dataset(
         data={
             'a': [1],
             'b': xport.Variable(['x'], label='Beta')
         },
         name='EXAMPLE',
         label='Example',
     )
     self.compare_metadata(ds.copy(), ds)
     self.compare_metadata(
         ds.append(pd.DataFrame({
             'a': [2],
             'b': ['y'],
         })),
         ds,
     )
     self.compare_metadata(pd.concat([ds, ds]), ds)

コード例 #14

0

ファイルを表示

ファイル: Generate_SDTM.py プロジェクト: phuse-org/PODR

	# Insert rows to DataFrame:
	TD_new_row =  {'Row': var_counter_rows, 'STUDYID': CT_STUDYID, 'DOMAIN': var_domain_<..>, ...}
	TD_df = TV_df.append(TD_new_row, ignore_index=True)
	#
	var_counter_<..> += 1
	var_counter_rows += 1

"""

#
# = Generate SAS files: = =
# Source: https://github.com/selik/xport
# The SAS Transport (XPORT) format only supports two kinds of data. Each value is either numeric or character, so xport.load decodes the values as either str or float.
#
# TA:
TA_ds = xport.Dataset(TA_df, name='TA', label='Trial Arms (TA) data')
# SAS variable names are limited to 8 characters. As with Pandas dataframes, you must change the name on the dataset rather than the column directly.
TA_ds = TA_ds.rename(columns={k: k.upper()[:8] for k in TA_ds})
# Libraries can have multiple datasets.
TA_library = xport.Library({'TA': TA_ds})
#
with open('TA.xpt', 'wb') as f:
    xport.v56.dump(TA_library, f)
#
# TE:
TE_ds = xport.Dataset(TE_df, name='TE', label='Trial Elements (TE) data')
# SAS variable names are limited to 8 characters. As with Pandas dataframes, you must change the name on the dataset rather than the column directly.
TE_ds = TE_ds.rename(columns={k: k.upper()[:8] for k in TE_ds})
# Libraries can have multiple datasets.
TE_library = xport.Library({'TE': TE_ds})
#

コード例 #15

0

ファイルを表示

ファイル: test_xport.py プロジェクト: sanders41/xport

 def test_create_from_list(self):
     lib = xport.Library(xport.Dataset())
     assert None in lib
     with pytest.warns(UserWarning, match=r'More than one dataset named'):
         xport.Library([xport.Dataset(), xport.Dataset()])

コード例 #16

0

ファイルを表示

ファイル: test_xport.py プロジェクト: sanders41/xport

 def test_create_from_mapping(self):
     with pytest.warns(UserWarning, match=r'Set dataset name'):
         lib = xport.Library({'x': xport.Dataset()})
     assert 'x' in lib
     with pytest.raises(ValueError):
         xport.Library({'x': xport.Dataset(name='y')})