Beispiel #1
0
    def test_nominal_as_category(self):
        data = pandas.DataFrame({
            'attr_nominal':
            pandas.Categorical(
                ['water', 'wine', 'beer', None, 'wine', 'water'],
                ordered=False),
            'attr_nominal_spaces':
            pandas.Categorical([
                "'red wine'", "'hard liquor'", None, 'mate', "'hard liquor'",
                'mate'
            ],
                               ordered=False)
        })

        fp = tempfile.NamedTemporaryFile(delete=False)
        try:
            with TextIOWrapper(fp) as tfp:
                writearff(data, tfp, relation_name='test_nominal', index=False)

            with open(fp.name, 'r') as fp:
                read_date = fp.readlines()
        finally:
            os.unlink(fp.name)

        expected = EXPECTED_1.copy()
        expected[
            3] = "@attribute attr_nominal_spaces\t{\"hard liquor\",\"red wine\",mate}\n"
        self.assertListEqual(expected, read_date)
Beispiel #2
0
def _make_and_write_data(fp,
                         n_samples,
                         n_features,
                         with_index,
                         with_labels,
                         seed,
                         column_prefix="V"):
    x, event, time = _make_survival_data(n_samples, n_features, seed)

    columns = ["{}{}".format(column_prefix, i) for i in range(n_features)]
    if with_labels:
        columns += ["event", "time"]
        arr = numpy.column_stack((x, event, time))
    else:
        arr = x

    if with_index:
        index = numpy.arange(n_samples, dtype=numpy.float_)
        numpy.random.RandomState(0).shuffle(index)
    else:
        index = None

    dataset = pandas.DataFrame(arr, index=index, columns=columns)
    dataset.index.name = "index"

    writearff(dataset, fp, index=with_index)
    return dataset
Beispiel #3
0
def test_writearff_unsupported_column_type(temp_file):
    data = pandas.DataFrame({
        "attr_datetime":
        numpy.array([2 + 3j, 45.1 - 1j, 0 - 1j, 7 + 0j, 132 - 3j, 1 - 0.41j],
                    dtype="complex128")
    })

    with pytest.raises(TypeError, match="unsupported type complex128"):
        writearff(data, temp_file, relation_name='test_delta', index=False)
Beispiel #4
0
def test_writearff_nominal_with_quotes(temp_file):
    data = pandas.DataFrame({'attr_nominal': ['water', 'wine', 'beer', None, 'wine', 'water'],
                             'attr_nominal_spaces': ["'red wine'", "'hard liquor'", None, 'mate', "'hard liquor'",
                                                     'mate']})

    writearff(data, temp_file, relation_name='test_nominal', index=False)

    with open(temp_file.name, 'r') as fp:
        read_date = fp.readlines()

    assert EXPECTED_1 == read_date
Beispiel #5
0
def test_writearff_datetime(temp_file):
    data = pandas.DataFrame(
        {"attr_datetime": numpy.array(
            ["2014-10-31 14:13:01", "2004-03-13 19:49:31", "1998-12-06 09:10:11"], dtype="datetime64")})

    writearff(data, temp_file, relation_name='test_datetime', index=False)

    with open(temp_file.name, 'r') as fp:
        read_date = fp.readlines()

    assert EXPECTED_DATETIME == read_date
Beispiel #6
0
def test_writearff_nominal_as_category(temp_file):
    data = pandas.DataFrame({'attr_nominal': pandas.Categorical(['water', 'wine', 'beer', None, 'wine', 'water'],
                                                                ordered=False),
                             'attr_nominal_spaces': pandas.Categorical(["'red wine'", "'hard liquor'", None,
                                                                        'mate', "'hard liquor'", 'mate'],
                                                                       ordered=False)})

    writearff(data, temp_file, relation_name='test_nominal', index=False)

    with open(temp_file.name, 'r') as fp:
        read_date = fp.readlines()

    expected = EXPECTED_1.copy()
    expected[3] = "@attribute attr_nominal_spaces\t{\"hard liquor\",\"red wine\",mate}\n"
    assert expected == read_date
Beispiel #7
0
def test_writearff_nominal_with_category_ordering(temp_file):
    data = pandas.DataFrame({'attr_nominal': pandas.Categorical(['water', 'wine', 'beer', None, 'wine', 'water'],
                                                                categories=['water', 'coke', 'beer', 'wine'],
                                                                ordered=False),
                             'attr_nominal_spaces': ["'red wine'", "'hard liquor'", None, 'mate', "'hard liquor'",
                                                     'mate']})

    writearff(data, temp_file, relation_name='test_nominal', index=False)

    with open(temp_file.name, 'r') as fp:
        read_date = fp.readlines()

    expected = EXPECTED_1.copy()
    expected[2] = "@attribute attr_nominal\t{water,coke,beer,wine}\n"
    assert expected == read_date
Beispiel #8
0
    def test_nominal_with_quotes(self):
        data = pandas.DataFrame({'attr_nominal': ['water', 'wine', 'beer', None, 'wine', 'water'],
                                 'attr_nominal_spaces': ["'red wine'", "'hard liquor'", None, 'mate', "'hard liquor'",
                                                         'mate']})

        fp = tempfile.NamedTemporaryFile(delete=False)
        try:
            with TextIOWrapper(fp) as tfp:
                writearff(data, tfp, relation_name='test_nominal', index=False)

            with open(fp.name, 'r') as fp:
                read_date = fp.readlines()
        finally:
            os.unlink(fp.name)

        self.assertListEqual(EXPECTED_1, read_date)
Beispiel #9
0
    def test_datetime(self):
        data = pandas.DataFrame(
            {"attr_datetime": numpy.array(
                ["2014-10-31 14:13:01", "2004-03-13 19:49:31", "1998-12-06 09:10:11"], dtype="datetime64")})

        fp = tempfile.NamedTemporaryFile(delete=False)
        try:
            with TextIOWrapper(fp) as tfp:
                writearff(data, tfp, relation_name='test_datetime', index=False)

            with open(fp.name, 'r') as fp:
                read_date = fp.readlines()
        finally:
            os.unlink(fp.name)

        self.assertListEqual(EXPECTED_DATETIME, read_date)