def test_append(self):
     # Get a private file so as to not mess the original one
     csv_file = tempfile.mktemp(".csv")
     with open(csv_file, "w") as f:
         f.write(self.buf)
     dd = CSV(csv_file, schema=self.schema, mode='r+')
     dd.extend([["k4", "v4", 4, True]])
     vals = [nd.as_py(v) for v in dd.chunks(blen=2)]
     self.assertEqual(vals, [[{
         u'f0': u'k1',
         u'f1': u'v1',
         u'f2': 1,
         u'f3': False
     }, {
         u'f0': u'k2',
         u'f1': u'v2',
         u'f2': 2,
         u'f3': True
     }],
                             [{
                                 u'f0': u'k3',
                                 u'f1': u'v3',
                                 u'f2': 3,
                                 u'f3': False
                             }, {
                                 u'f0': u'k4',
                                 u'f1': u'v4',
                                 u'f2': 4,
                                 u'f3': True
                             }]])
     self.assertRaises(ValueError, lambda: dd.extend([3.3]))
     os.remove(csv_file)
Example #2
0
def csv(schema):
    csv = CSV('test.csv', schema=schema, mode='w')
    csv.extend(data)
    yield csv
    try:
        os.remove(csv.path)
    except OSError:
        pass
Example #3
0
def test_table_resource():
    with tmpfile('csv') as filename:
        csv = CSV(filename, 'w', schema='{x: int, y: int}')
        csv.extend([[1, 2], [10, 20]])

        t = Data(filename)
        assert isinstance(t.data, CSV)
        assert list(compute(t)) == list(csv)
Example #4
0
def csv():
    csv = CSV('test.csv', schema=schema, mode='w')
    csv.extend(data)
    yield csv
    try:
        os.remove(csv.path)
    except OSError:
        pass
Example #5
0
def test_table_resource():
    with tmpfile('csv') as filename:
        csv = CSV(filename, 'w', schema='{x: int, y: int}')
        csv.extend([[1, 2], [10, 20]])

        t = Data(filename)
        assert isinstance(t.data, CSV)
        assert list(compute(t)) == list(csv)
class Test_Dialect(unittest.TestCase):

    buf = sanitize(u"""Name Amount
        Alice 100
        Bob 200
        Alice 50
    """)

    schema = "{ f0: string, f1: int }"

    def setUp(self):
        self.csv_file = tempfile.mktemp(".csv")
        with open(self.csv_file, "w") as f:
            f.write(self.buf)
        self.dd = CSV(self.csv_file,
                      dialect='excel',
                      schema=self.schema,
                      delimiter=' ',
                      mode='r+')

    def tearDown(self):
        os.remove(self.csv_file)

    def test_has_header(self):
        assert has_header(self.buf)

    def test_overwrite_delimiter(self):
        self.assertEquals(self.dd.dialect['delimiter'], ' ')

    def test_content(self):
        s = str(list(self.dd))
        assert 'Alice' in s and 'Bob' in s

    def test_append(self):
        self.dd.extend([('Alice', 100)])
        with open(self.csv_file) as f:
            self.assertEqual(f.readlines()[-1].strip(), 'Alice 100')

    def test_append_dict(self):
        self.dd.extend([{'f0': 'Alice', 'f1': 100}])
        with open(self.csv_file) as f:
            self.assertEqual(f.readlines()[-1].strip(), 'Alice 100')

    def test_extend_structured(self):
        with filetext('1,1.0\n2,2.0\n') as fn:
            csv = CSV(fn, 'r+', schema='{x: int32, y: float32}', delimiter=',')
            csv.extend([(3, 3)])
            assert (list(csv) == [[1, 1.0], [2, 2.0], [3, 3.0]]
                    or list(csv) == [{
                        'x': 1,
                        'y': 1.0
                    }, {
                        'x': 2,
                        'y': 2.0
                    }, {
                        'x': 3,
                        'y': 3.0
                    }])
Example #7
0
def test_a_mode():
    text = ("id, name, balance\n1, Alice, 100\n2, Bob, 200\n"
            "3, Charlie, 300\n4, Denis, 400\n5, Edith, 500")
    with filetext(text) as fn:
        csv = CSV(fn, 'a')
        csv.extend([(6, 'Frank', 600), (7, 'Georgina', 700)])

        result = set(csv[:, 'name'])
        assert 'Georgina' in result
Example #8
0
 def test_extend_structured_many_newlines(self):
     inan = np.array([np.nan]).astype('int32').item()
     with filetext('1,1.0\n2,2.0\n\n\n\n') as fn:
         csv = CSV(fn, 'r+', schema='{x: int32, y: float32}', delimiter=',')
         csv.extend([(3, 3)])
         result = tuplify(tuple(csv))
         expected = ((1, 1.0), (2, 2.0), (inan, np.nan), (inan, np.nan),
                     (inan, np.nan), (3, 3.0))
         assert np.isclose(result, expected, equal_nan=True).all()
Example #9
0
def date_data():
    data = [('Alice', 100.0, datetime(2014, 9, 11, 0, 0, 0, 0)),
            ('Alice', -200.0, datetime(2014, 9, 10, 0, 0, 0, 0)),
            ('Bob', 300.0, None)]
    schema = dshape('{name: string, amount: float32, date: ?datetime}')
    with tmpfile('.csv') as f:
        csv = CSV(f, schema=schema, mode='w')
        csv.extend(data)
        yield CSV(f, schema=schema, mode='r')
Example #10
0
    def test_a_mode(self):
        text = ("id, name, balance\n1, Alice, 100\n2, Bob, 200\n"
                "3, Charlie, 300\n4, Denis, 400\n5, Edith, 500")
        with filetext(text) as fn:
            csv = CSV(fn, 'a')
            csv.extend([(6, 'Frank', 600),
                        (7, 'Georgina', 700)])

            assert 'Georgina' in set(csv.py[:, 'name'])
Example #11
0
def date_data():
    data = [('Alice', 100.0, datetime(2014, 9, 11, 0, 0, 0, 0)),
            ('Alice', -200.0, datetime(2014, 9, 10, 0, 0, 0, 0)),
            ('Bob', 300.0, None)]
    schema = dshape('{name: string, amount: float32, date: ?datetime}')
    with tmpfile('.csv') as f:
        csv = CSV(f, schema=schema, mode='w')
        csv.extend(data)
        yield CSV(f, schema=schema, mode='r')
Example #12
0
def test_extend(tmpcsv, schema):
    dd = CSV(tmpcsv, 'w', schema=schema, delimiter=' ')
    dd.extend(data)
    with open(tmpcsv) as f:
        lines = f.readlines()
    expected_lines = 'Alice 100', 'Bob 200', 'Alice 50'
    for i, eline in enumerate(expected_lines):
        assert lines[i].strip() == eline

    expected_dshape = datashape.DataShape(datashape.Var(),
                                          datashape.dshape(schema))

    assert str(dd.dshape) == str(expected_dshape)
Example #13
0
    def test_json_csv_structured(self):
        data = [{'x': 1, 'y': 1}, {'x': 2, 'y': 2}]
        text = '\n'.join(map(json.dumps, data))
        schema = '{x: int, y: int}'

        with filetext(text) as json_fn:
            with filetext('') as csv_fn:
                js = JSON_Streaming(json_fn, schema=schema)
                csv = CSV(csv_fn, mode='r+', schema=schema)

                csv.extend(js)

                self.assertEquals(tuple(map(tuple, (csv))), ((1, 1), (2, 2)))
Example #14
0
def test_extend(tmpcsv, schema):
    dd = CSV(tmpcsv, 'w', schema=schema, delimiter=' ')
    dd.extend(data)
    with open(tmpcsv) as f:
        lines = f.readlines()
    expected_lines = 'Alice 100', 'Bob 200', 'Alice 50'
    for i, eline in enumerate(expected_lines):
        assert lines[i].strip() == eline

    expected_dshape = datashape.DataShape(datashape.Var(),
                                          datashape.dshape(schema))

    assert str(dd.dshape) == str(expected_dshape)
Example #15
0
    def test_extend(self):
        dd = CSV(self.filename, 'w', schema=self.schema, delimiter=' ')
        dd.extend(self.data)
        with open(self.filename) as f:
            lines = f.readlines()
            self.assertEqual(lines[0].strip(), 'Alice 100')
            self.assertEqual(lines[1].strip(), 'Bob 200')
            self.assertEqual(lines[2].strip(), 'Alice 50')

        expected_dshape = datashape.DataShape(datashape.Var(), self.schema)
        # TODO: datashape comparison is broken
        self.assertEqual(str(dd.dshape).replace(' ', ''),
                         str(expected_dshape).replace(' ', ''))
Example #16
0
    def test_json_csv_structured(self):
        data = [{'x': 1, 'y': 1}, {'x': 2, 'y': 2}]
        text = '\n'.join(map(json.dumps, data))
        schema = '{x: int, y: int}'

        with filetext(text) as json_fn:
            with filetext('') as csv_fn:
                js = JSON_Streaming(json_fn, schema=schema)
                csv = CSV(csv_fn, mode='r+', schema=schema)

                csv.extend(js)

                self.assertEquals(tuple(map(tuple, (csv))),
                                  ((1, 1), (2, 2)))
class Test_Dialect(unittest.TestCase):

    buf = sanitize(
    u"""Name Amount
        Alice 100
        Bob 200
        Alice 50
    """)

    schema = "{ f0: string, f1: int }"

    def setUp(self):
        self.csv_file = tempfile.mktemp(".csv")
        with open(self.csv_file, "w") as f:
            f.write(self.buf)
        self.dd = CSV(self.csv_file, dialect='excel', schema=self.schema,
                            delimiter=' ', mode='r+')

    def tearDown(self):
        os.remove(self.csv_file)

    def test_has_header(self):
        assert has_header(self.buf)

    def test_overwrite_delimiter(self):
        self.assertEquals(self.dd.dialect['delimiter'], ' ')

    def test_content(self):
        s = str(list(self.dd))
        assert 'Alice' in s and 'Bob' in s

    def test_append(self):
        self.dd.extend([('Alice', 100)])
        with open(self.csv_file) as f:
            self.assertEqual(f.readlines()[-1].strip(), 'Alice 100')

    def test_append_dict(self):
        self.dd.extend([{'f0': 'Alice', 'f1': 100}])
        with open(self.csv_file) as f:
            self.assertEqual(f.readlines()[-1].strip(), 'Alice 100')

    def test_extend_structured(self):
        with filetext('1,1.0\n2,2.0\n') as fn:
            csv = CSV(fn, 'r+', schema='{x: int32, y: float32}',
                            delimiter=',')
            csv.extend([(3, 3)])
            assert (list(csv) == [[1, 1.0], [2, 2.0], [3, 3.0]]
                 or list(csv) == [{'x': 1, 'y': 1.0},
                                  {'x': 2, 'y': 2.0},
                                  {'x': 3, 'y': 3.0}])
    def test_extend(self):
        dd = CSV(self.filename, 'w', schema=self.schema, delimiter=' ')
        dd.extend(self.data)
        with open(self.filename) as f:
            lines = f.readlines()
            self.assertEqual(lines[0].strip(), 'Alice 100')
            self.assertEqual(lines[1].strip(), 'Bob 200')
            self.assertEqual(lines[2].strip(), 'Alice 50')

        expected_dshape = datashape.DataShape(datashape.Var(), self.schema)
        # TODO: datashape comparison is broken
        self.assertEqual(
            str(dd.dshape).replace(' ', ''),
            str(expected_dshape).replace(' ', ''))
 def test_append(self):
     # Get a private file so as to not mess the original one
     csv_file = tempfile.mktemp(".csv")
     with open(csv_file, "w") as f:
         f.write(self.buf)
     dd = CSV(csv_file, schema=self.schema, mode='r+')
     dd.extend([["k4", "v4", 4, True]])
     vals = [nd.as_py(v) for v in dd.chunks(blen=2)]
     self.assertEqual(vals, [
         [{u'f0': u'k1', u'f1': u'v1', u'f2': 1, u'f3': False},
          {u'f0': u'k2', u'f1': u'v2', u'f2': 2, u'f3': True}],
         [{u'f0': u'k3', u'f1': u'v3', u'f2': 3, u'f3': False},
          {u'f0': u'k4', u'f1': u'v4', u'f2': 4, u'f3': True}]])
     self.assertRaises(ValueError, lambda: dd.extend([3.3]))
     os.remove(csv_file)
Example #20
0
    def test_re_dialect(self):
        dialect1 = {'delimiter': ',', 'lineterminator': '\n'}
        dialect2 = {'delimiter': ';', 'lineterminator': '--'}

        text = '1,1\n2,2\n'

        schema = '2 * int32'

        with filetext(text) as source_fn:
            with filetext('') as dest_fn:
                src = CSV(source_fn, schema=schema, **dialect1)
                dst = CSV(dest_fn, mode='w', schema=schema, **dialect2)

                # Perform copy
                dst.extend(src)

                with open(dest_fn) as f:
                    self.assertEquals(f.read(), '1;1--2;2--')
    def test_re_dialect(self):
        dialect1 = {'delimiter': ',', 'lineterminator': '\n'}
        dialect2 = {'delimiter': ';', 'lineterminator': '--'}

        text = '1,1\n2,2\n'

        schema = '2 * int32'

        with filetext(text) as source_fn:
            with filetext('') as dest_fn:
                src = CSV(source_fn, schema=schema, **dialect1)
                dst = CSV(dest_fn, mode='w', schema=schema, **dialect2)

                # Perform copy
                dst.extend(src)

                with open(dest_fn) as f:
                    self.assertEquals(f.read(), '1;1--2;2--')
Example #22
0
 def test_append(self):
     dd = CSV(self.filename, 'w', schema=self.schema, delimiter=' ')
     dd.extend([self.data[0]])
     with open(self.filename) as f:
         self.assertEqual(f.readlines()[0].strip(), 'Alice 100')
Example #23
0
 def test_extend_structured(self):
     with filetext('1,1.0\n2,2.0\n') as fn:
         csv = CSV(fn, 'r+', schema='{x: int32, y: float32}',
                         delimiter=',')
         csv.extend([(3, 3)])
         assert tuplify(tuple(csv)) == ((1, 1.0), (2, 2.0), (3, 3.0))
Example #24
0
class Test_Dialect(unittest.TestCase):

    buf = sanitize(
    u"""Name Amount
        Alice 100
        Bob 200
        Alice 50
    """)

    schema = "{ f0: string, f1: int }"

    def setUp(self):
        self.csv_file = tempfile.mktemp(".csv")
        with open(self.csv_file, "w") as f:
            f.write(self.buf)
        self.dd = CSV(self.csv_file, dialect='excel', schema=self.schema,
                            delimiter=' ', mode='r+')

    def tearDown(self):
        os.remove(self.csv_file)


    def test_schema_detection(self):
        dd = CSV(self.csv_file)
        assert dd.schema == dshape('{Name: string, Amount: ?int64}')

        dd = CSV(self.csv_file, columns=['foo', 'bar'])
        assert dd.schema == dshape('{foo: string, bar: ?int64}')

    @min_python_version
    def test_has_header(self):
        assert has_header(self.buf)

    def test_overwrite_delimiter(self):
        self.assertEquals(self.dd.dialect['delimiter'], ' ')

    def test_content(self):
        s = str(list(self.dd))
        assert 'Alice' in s and 'Bob' in s

    def test_append(self):
        self.dd.extend([('Alice', 100)])
        with open(self.csv_file) as f:
            self.assertEqual(f.readlines()[-1].strip(), 'Alice 100')

    def test_append_dict(self):
        self.dd.extend([{'f0': 'Alice', 'f1': 100}])
        with open(self.csv_file) as f:
            self.assertEqual(f.readlines()[-1].strip(), 'Alice 100')

    def test_extend_structured(self):
        with filetext('1,1.0\n2,2.0\n') as fn:
            csv = CSV(fn, 'r+', schema='{x: int32, y: float32}',
                            delimiter=',')
            csv.extend([(3, 3)])
            assert tuplify(tuple(csv)) == ((1, 1.0), (2, 2.0), (3, 3.0))

    def test_discover_dialect(self):
        s = '1,1\r\n2,2'
        self.assertEqual(discover_dialect(s),
                {'escapechar': None,
                 'skipinitialspace': False,
                 'quoting': 0,
                 'delimiter': ',',
                 'lineterminator': '\r\n',
                 'quotechar': '"',
                 'doublequote': False})
Example #25
0
def test_extend_structured_many_newlines():
    with filetext('1,1.0\n2,2.0\n\n\n\n') as fn:
        csv = CSV(fn, 'r+', schema='{x: int32, y: float32}', delimiter=',')
        csv.extend([(3, 3)])
        result = tuplify(tuple(csv))
        assert discover(result) == dshape('6 * (int64, float64)')
Example #26
0
def test_append(tmpcsv, schema):
    dd = CSV(tmpcsv, 'w', schema=schema, delimiter=' ')
    dd.extend([data[0]])
    with open(tmpcsv) as f:
        s = f.readlines()[0].strip()
    assert s == 'Alice 100'
Example #27
0
def test_extend_structured_no_newline():
    with filetext('1,1.0\n2,2.0') as fn:
        csv = CSV(fn, 'r+', schema='{x: int32, y: float32}', delimiter=',')
        csv.extend([(3, 3)])
        assert tuplify(tuple(csv)) == ((1, 1.0), (2, 2.0), (3, 3.0))
Example #28
0
def test_extend_structured_many_newlines():
    with filetext('1,1.0\n2,2.0\n\n\n\n') as fn:
        csv = CSV(fn, 'r+', schema='{x: int32, y: float32}', delimiter=',')
        csv.extend([(3, 3)])
        result = tuplify(tuple(csv))
        assert discover(result) == dshape('6 * (int64, float64)')
Example #29
0
def test_write_csv_with_header_emits_header():
    with tmpfile('.csv') as fn:
        csv = CSV(fn, header=True, schema='{a: int, b: int}', mode='w')
        csv.extend([(1, 2), (10, 20)])
        with open(fn) as f:
            assert 'a' in f.read()
 def test_append(self):
     dd = CSV(self.filename, 'w', schema=self.schema, delimiter=' ')
     dd.extend([self.data[0]])
     with open(self.filename) as f:
         self.assertEqual(f.readlines()[0].strip(), 'Alice 100')
Example #31
0
def test_append(tmpcsv, schema):
    dd = CSV(tmpcsv, 'w', schema=schema, delimiter=' ')
    dd.extend([data[0]])
    with open(tmpcsv) as f:
        s = f.readlines()[0].strip()
    assert s == 'Alice 100'