def test_badtype_parsing(self):
        """Test parsing wrong type of attribute from their value."""
        ofile = open(test3)
        rel, attrs = read_header(ofile)

        for name, value in attrs:
            assert_raises(ParseArffError, parse_type, value)
Example #2
0
    def test_badtype_parsing(self):
        """Test parsing wrong type of attribute from their value."""
        ofile = open(test3)
        rel, attrs = read_header(ofile)

        for name, value in attrs:
            assert_raises(ParseArffError, parse_type, value)
Example #3
0
    def test_type_parsing(self):
        # Test parsing type of attribute from their value.
        with open(test2) as ofile:
            rel, attrs = read_header(ofile)

        expected = ['numeric', 'numeric', 'numeric', 'numeric', 'numeric',
                    'numeric', 'string', 'string', 'nominal', 'nominal']

        for i in range(len(attrs)):
            assert_(attrs[i].type_name == expected[i])
Example #4
0
    def test_type_parsing(self):
        """Test parsing type of attribute from their value."""
        ofile = open(test2)
        rel, attrs = read_header(ofile)

        expected = ['numeric', 'numeric', 'numeric', 'numeric', 'numeric',
                    'numeric', 'string', 'string', 'nominal', 'nominal']

        for i in range(len(attrs)):
            assert_(parse_type(attrs[i][1]) == expected[i])
Example #5
0
    def test_type_parsing(self):
        """Test parsing type of attribute from their value."""
        ofile = open(test2)
        rel, attrs = read_header(ofile)

        expected = ['numeric', 'numeric', 'numeric', 'numeric', 'numeric',
                    'numeric', 'string', 'string', 'nominal', 'nominal']

        for i in range(len(attrs)):
            assert parse_type(attrs[i][1]) == expected[i]
Example #6
0
    def test_badtype_parsing(self):
        """Test parsing wrong type of attribute from their value."""
        ofile = open(test3)
        rel, attrs = read_header(ofile)

        for name, value in attrs:
            try:
                parse_type(value)
                raise Error("Could parse type of crap, should not happen.")
            except ParseArffError:
                pass
Example #7
0
    def test_badtype_parsing(self):
        """Test parsing wrong type of attribute from their value."""
        ofile = open(test3)
        rel, attrs = read_header(ofile)

        for name, value in attrs:
            try:
                parse_type(value)
                raise Error("Could parse type of crap, should not happen.")
            except ParseArffError:
                pass
Example #8
0
    def test_type_parsing(self):
        # Test parsing type of attribute from their value.
        ofile = open(test2)
        rel, attrs = read_header(ofile)
        ofile.close()

        expected = ['numeric', 'numeric', 'numeric', 'numeric', 'numeric',
                    'numeric', 'string', 'string', 'nominal', 'nominal']

        for i in range(len(attrs)):
            assert_(attrs[i].type_name == expected[i])
Example #9
0
def iter_arff(filepath_or_buffer,
              target: str = None,
              compression="infer") -> base.typing.Stream:
    """Iterates over rows from an ARFF file.

    Parameters
    ----------
    filepath_or_buffer
        Either a string indicating the location of a CSV file, or a buffer object that has a
        `read` method.
    target
        Name of the target field.
    compression
        For on-the-fly decompression of on-disk data. If this is set to 'infer' and
        `filepath_or_buffer` is a path, then the decompression method is inferred for the
        following extensions: '.gz', '.zip'.

    """

    # If a file is not opened, then we open it
    buffer = filepath_or_buffer
    if not hasattr(buffer, "read"):
        buffer = utils.open_filepath(buffer, compression)

    try:
        rel, attrs = arffread.read_header(buffer)
    except ValueError as e:
        msg = f"Error while parsing header, error was: {e}"
        raise arffread.ParseArffError(msg)

    names = [attr.name for attr in attrs]
    types = [
        float if isinstance(attr, arffread.NumericAttribute) else None
        for attr in attrs
    ]

    for r in buffer:
        x = {
            name: typ(val) if typ else val
            for name, typ, val in zip(names, types,
                                      r.rstrip().split(","))
        }
        try:
            y = x.pop(target) if target else None
        except KeyError as e:
            print(r)
            raise e

        yield x, y

    # Close the file if we opened it
    if buffer is not filepath_or_buffer:
        buffer.close()
    def test_dateheader_unsupported(self):
        ofile = open(test8)
        rel, attrs = read_header(ofile)
        ofile.close()

        assert_(rel == 'test8')

        assert_(len(attrs) == 2)
        assert_(attrs[0][0] == 'attr_datetime_utc')
        assert_(attrs[0][1] == 'DATE "yyyy-MM-dd HH:mm Z"')

        assert_(attrs[1][0] == 'attr_datetime_full')
        assert_(attrs[1][1] == 'DATE "yy-MM-dd HH:mm:ss z"')
Example #11
0
    def test_dateheader_unsupported(self):
        ofile = open(test8)
        rel, attrs = read_header(ofile)
        ofile.close()

        assert_(rel == 'test8')

        assert_(len(attrs) == 2)
        assert_(attrs[0][0] == 'attr_datetime_utc')
        assert_(attrs[0][1] == 'DATE "yyyy-MM-dd HH:mm Z"')

        assert_(attrs[1][0] == 'attr_datetime_full')
        assert_(attrs[1][1] == 'DATE "yy-MM-dd HH:mm:ss z"')
Example #12
0
    def test_fullheader1(self):
        # Parsing trivial header with nothing.
        with open(test1) as ofile:
            rel, attrs = read_header(ofile)

        # Test relation
        assert_(rel == 'test1')

        # Test numerical attributes
        assert_(len(attrs) == 5)
        for i in range(4):
            assert_(attrs[i].name == 'attr%d' % i)
            assert_(attrs[i].type_name == 'numeric')

        # Test nominal attribute
        assert_(attrs[4].name == 'class')
        assert_(attrs[4].values == ('class0', 'class1', 'class2', 'class3'))
Example #13
0
    def test_fullheader1(self):
        # Parsing trivial header with nothing.
        ofile = open(test1)
        rel, attrs = read_header(ofile)
        ofile.close()

        # Test relation
        assert_(rel == 'test1')

        # Test numerical attributes
        assert_(len(attrs) == 5)
        for i in range(4):
            assert_(attrs[i][0] == 'attr%d' % i)
            assert_(attrs[i][1] == 'REAL')

        # Test nominal attribute
        assert_(attrs[4][0] == 'class')
        assert_(attrs[4][1] == '{class0, class1, class2, class3}')
Example #14
0
    def test_fullheader1(self):
        """Parsing trivial header with nothing."""
        ofile = open(test1)
        rel, attrs = read_header(ofile)

        # Test relation
        assert rel == 'test1'

        # Test numerical attributes
        assert len(attrs) == 5
        for i in range(4):
            assert attrs[i][0] == 'attr%d' % i
            assert attrs[i][1] == 'REAL'
        classes = attrs[4][1]

        # Test nominal attribute
        assert attrs[4][0] == 'class'
        assert attrs[4][1] == '{class0, class1, class2, class3}'
    def test_fullheader1(self):
        """Parsing trivial header with nothing."""
        ofile = open(test1)
        rel, attrs = read_header(ofile)

        # Test relation
        assert_(rel == "test1")

        # Test numerical attributes
        assert_(len(attrs) == 5)
        for i in range(4):
            assert_(attrs[i][0] == "attr%d" % i)
            assert_(attrs[i][1] == "REAL")
        classes = attrs[4][1]

        # Test nominal attribute
        assert_(attrs[4][0] == "class")
        assert_(attrs[4][1] == "{class0, class1, class2, class3}")
Example #16
0
    def test_fullheader1(self):
        """Parsing trivial header with nothing."""
        ofile = open(test1)
        rel, attrs = read_header(ofile)

        # Test relation
        assert rel == 'test1'

        # Test numerical attributes
        assert len(attrs) == 5
        for i in range(4):
            assert attrs[i][0] == 'attr%d' % i
            assert attrs[i][1] == 'REAL'
        classes = attrs[4][1]

        # Test nominal attribute
        assert attrs[4][0] == 'class'
        assert attrs[4][1] == '{class0, class1, class2, class3}'
Example #17
0
    def test_fullheader1(self):
        # Parsing trivial header with nothing.
        ofile = open(test1)
        rel, attrs = read_header(ofile)
        ofile.close()

        # Test relation
        assert_(rel == 'test1')

        # Test numerical attributes
        assert_(len(attrs) == 5)
        for i in range(4):
            assert_(attrs[i].name == 'attr%d' % i)
            assert_(attrs[i].type_name == 'numeric')

        # Test nominal attribute
        assert_(attrs[4].name == 'class')
        assert_(attrs[4].values == ('class0', 'class1', 'class2', 'class3'))
    def test_fullheader1(self):
        # Parsing trivial header with nothing.
        ofile = open(test1)
        rel, attrs = read_header(ofile)
        ofile.close()

        # Test relation
        assert_(rel == 'test1')

        # Test numerical attributes
        assert_(len(attrs) == 5)
        for i in range(4):
            assert_(attrs[i][0] == 'attr%d' % i)
            assert_(attrs[i][1] == 'REAL')

        # Test nominal attribute
        assert_(attrs[4][0] == 'class')
        assert_(attrs[4][1] == '{class0, class1, class2, class3}')
Example #19
0
    def test_type_parsing(self):
        # Test parsing type of attribute from their value.
        ofile = open(test2)
        rel, attrs = read_header(ofile)
        ofile.close()

        expected = [
            "numeric",
            "numeric",
            "numeric",
            "numeric",
            "numeric",
            "numeric",
            "string",
            "string",
            "nominal",
            "nominal",
        ]

        for i in range(len(attrs)):
            assert_(parse_type(attrs[i][1]) == expected[i])
Example #20
0
    def test_dateheader(self):
        with open(test7) as ofile:
            rel, attrs = read_header(ofile)

        assert_(rel == 'test7')

        assert_(len(attrs) == 5)

        assert_(attrs[0].name == 'attr_year')
        assert_(attrs[0].date_format == '%Y')

        assert_(attrs[1].name == 'attr_month')
        assert_(attrs[1].date_format == '%Y-%m')

        assert_(attrs[2].name == 'attr_date')
        assert_(attrs[2].date_format == '%Y-%m-%d')

        assert_(attrs[3].name == 'attr_datetime_local')
        assert_(attrs[3].date_format == '%Y-%m-%d %H:%M')

        assert_(attrs[4].name == 'attr_datetime_missing')
        assert_(attrs[4].date_format == '%Y-%m-%d %H:%M')
Example #21
0
    def test_dateheader(self):
        ofile = open(test7)
        rel, attrs = read_header(ofile)
        ofile.close()

        assert_(rel == "test7")

        assert_(len(attrs) == 5)

        assert_(attrs[0][0] == "attr_year")
        assert_(attrs[0][1] == "DATE yyyy")

        assert_(attrs[1][0] == "attr_month")
        assert_(attrs[1][1] == "DATE yyyy-MM")

        assert_(attrs[2][0] == "attr_date")
        assert_(attrs[2][1] == "DATE yyyy-MM-dd")

        assert_(attrs[3][0] == "attr_datetime_local")
        assert_(attrs[3][1] == 'DATE "yyyy-MM-dd HH:mm"')

        assert_(attrs[4][0] == "attr_datetime_missing")
        assert_(attrs[4][1] == 'DATE "yyyy-MM-dd HH:mm"')
Example #22
0
    def test_dateheader(self):
        ofile = open(test7)
        rel, attrs = read_header(ofile)
        ofile.close()

        assert_(rel == 'test7')

        assert_(len(attrs) == 5)

        assert_(attrs[0].name == 'attr_year')
        assert_(attrs[0].date_format == '%Y')

        assert_(attrs[1].name == 'attr_month')
        assert_(attrs[1].date_format == '%Y-%m')

        assert_(attrs[2].name == 'attr_date')
        assert_(attrs[2].date_format == '%Y-%m-%d')

        assert_(attrs[3].name == 'attr_datetime_local')
        assert_(attrs[3].date_format == '%Y-%m-%d %H:%M')

        assert_(attrs[4].name == 'attr_datetime_missing')
        assert_(attrs[4].date_format == '%Y-%m-%d %H:%M')
    def test_dateheader(self):
        ofile = open(test7)
        rel, attrs = read_header(ofile)
        ofile.close()

        assert_(rel == 'test7')

        assert_(len(attrs) == 5)

        assert_(attrs[0][0] == 'attr_year')
        assert_(attrs[0][1] == 'DATE yyyy')

        assert_(attrs[1][0] == 'attr_month')
        assert_(attrs[1][1] == 'DATE yyyy-MM')

        assert_(attrs[2][0] == 'attr_date')
        assert_(attrs[2][1] == 'DATE yyyy-MM-dd')

        assert_(attrs[3][0] == 'attr_datetime_local')
        assert_(attrs[3][1] == 'DATE "yyyy-MM-dd HH:mm"')

        assert_(attrs[4][0] == 'attr_datetime_missing')
        assert_(attrs[4][1] == 'DATE "yyyy-MM-dd HH:mm"')
Example #24
0
    def test_dateheader(self):
        ofile = open(test7)
        rel, attrs = read_header(ofile)
        ofile.close()

        assert_(rel == 'test7')

        assert_(len(attrs) == 5)

        assert_(attrs[0][0] == 'attr_year')
        assert_(attrs[0][1] == 'DATE yyyy')

        assert_(attrs[1][0] == 'attr_month')
        assert_(attrs[1][1] == 'DATE yyyy-MM')

        assert_(attrs[2][0] == 'attr_date')
        assert_(attrs[2][1] == 'DATE yyyy-MM-dd')

        assert_(attrs[3][0] == 'attr_datetime_local')
        assert_(attrs[3][1] == 'DATE "yyyy-MM-dd HH:mm"')

        assert_(attrs[4][0] == 'attr_datetime_missing')
        assert_(attrs[4][1] == 'DATE "yyyy-MM-dd HH:mm"')
Example #25
0
 def read_dateheader_unsupported():
     with open(test8) as ofile:
         _, _ = read_header(ofile)
Example #26
0
 def badtype_read():
     with open(test3) as ofile:
         _, _ = read_header(ofile)
Example #27
0
 def read_dateheader_unsupported():
     ofile = open(test8)
     rel, attrs = read_header(ofile)
     ofile.close()
Example #28
0
 def badtype_read():
     ofile = open(test3)
     rel, attrs = read_header(ofile)
     ofile.close()
Example #29
0
 def read_dateheader_unsupported():
     ofile = open(test8)
     rel, attrs = read_header(ofile)
     ofile.close()
Example #30
0
 def badtype_read():
     ofile = open(test3)
     rel, attrs = read_header(ofile)
     ofile.close()