Ejemplo n.º 1
0
 def test_save(self):
     t = XArray([1, 2, 3])
     path = '{}/tmp/array-csv'.format(hdfs_prefix)
     t.save(path)
     success_path = os.path.join(path, '_SUCCESS')
     self.assertTrue(fileio.is_file(success_path))
     fileio.delete(path)
Ejemplo n.º 2
0
    def save_as_csv(self, path, **params):
        """
        Saves the RDD to file as text.
        """
        self._entry(path=path)

        # noinspection PyShadowingNames
        def to_csv(row, **params):
            sio = StringIO.StringIO()
            writer = csv.writer(sio, **params)
            try:
                writer.writerow([row], **params)
                ret = sio.getvalue()
                return ret
            except IOError:
                return ''

        fileio.delete(path)
        with fileio.open_file(path, 'w') as f:
            self.begin_iterator()
            elems_at_a_time = 10000
            ret = self.iterator_get_next(elems_at_a_time)
            while True:
                for row in ret:
                    line = to_csv(row, **params)
                    f.write(line)
                if len(ret) == elems_at_a_time:
                    ret = self.iterator_get_next(elems_at_a_time)
                else:
                    break
Ejemplo n.º 3
0
    def save_as_csv(self, path, **params):
        """
        Saves the RDD to file as text.
        """
        self._entry(path=path)

        # noinspection PyShadowingNames
        def to_csv(row, **params):
            sio = StringIO.StringIO()
            writer = csv.writer(sio, **params)
            try:
                writer.writerow([row], **params)
                ret = sio.getvalue()
                return ret
            except IOError:
                return ''

        fileio.delete(path)
        with fileio.open_file(path, 'w') as f:
            self.begin_iterator()
            elems_at_a_time = 10000
            ret = self.iterator_get_next(elems_at_a_time)
            while True:
                for row in ret:
                    line = to_csv(row, **params)
                    f.write(line)
                if len(ret) == elems_at_a_time:
                    ret = self.iterator_get_next(elems_at_a_time)
                else:
                    break
Ejemplo n.º 4
0
 def test_save_format(self):
     t = XArray([1, 2, 3])
     path = '{}/tmp/array-csv'.format(hdfs_prefix)
     t.save(path, format='csv')
     with fileio.open_file(path) as f:
         self.assertEqual('1', f.readline().strip())
         self.assertEqual('2', f.readline().strip())
         self.assertEqual('3', f.readline().strip())
     fileio.delete(path)
Ejemplo n.º 5
0
 def test_save(self):
     t = XFrame({'id': [30, 20, 10], 'val': ['a', 'b', 'c']})
     path = '{}/tmp/frame'.format(hdfs_prefix)
     t.save(path, format='binary')
     with fileio.open_file(os.path.join(path, '_metadata')) as f:
         metadata = pickle.load(f)
     self.assertListEqual([['id', 'val'], [int, str]], metadata)
     # TODO find some way to check the data
     fileio.delete(path)
Ejemplo n.º 6
0
    def test_save(self):
        t = XFrame({'id': [30, 20, 10], 'val': ['a', 'b', 'c']})
        path = '{}/tmp/frame-csv'.format(hdfs_prefix)
        t.save(path, format='csv')

        with fileio.open_file(path + '.csv') as f:
            heading = f.readline().rstrip()
            self.assertEqual('id,val', heading)
            self.assertEqual('30,a', f.readline().rstrip())
            self.assertEqual('20,b', f.readline().rstrip())
            self.assertEqual('10,c', f.readline().rstrip())
        fileio.delete(path + '.csv')
Ejemplo n.º 7
0
    def test_read_parquet_str(self):
        t = XFrame({'id': [1, 2, 3], 'val': ['a', 'b', 'c']})
        path = '{}/tmp/frame-parquet'.format(hdfs_prefix)
        t.save(path, format='parquet')

        res = XFrame('{}/tmp/frame-parquet.parquet'.format(hdfs_prefix))
        # results may not come back in the same order
        res = res.sort('id')
        self.assertEqualLen(3, res)
        self.assertListEqual(['id', 'val'], res.column_names())
        self.assertListEqual([int, str], res.column_types())
        self.assertDictEqual({'id': 1, 'val': 'a'}, res[0])
        self.assertDictEqual({'id': 2, 'val': 'b'}, res[1])
        self.assertDictEqual({'id': 3, 'val': 'c'}, res[2])
        fileio.delete(path)
Ejemplo n.º 8
0
    def save_as_text(self, path):
        """
        Saves the RDD to file as text.
        """
        self._entry(path=path)
        fileio.delete(path)
        try:
            self._rdd.saveAsTextFile(path)
        except:
            # TODO distinguish between filesystem errors and pickle errors
            raise TypeError('The XArray save failed.')
        metadata = self.elem_type
        metadata_path = os.path.join(path, '_metadata')
        with fileio.open_file(metadata_path, 'w') as f:
            # TODO detect filesystem errors
            pickle.dump(metadata, f)

        lineage_path = os.path.join(path, '_lineage')
        self.lineage.save(lineage_path)
Ejemplo n.º 9
0
    def save_as_text(self, path):
        """
        Saves the RDD to file as text.
        """
        self._entry(path=path)
        fileio.delete(path)
        try:
            self._rdd.saveAsTextFile(path)
        except:
            # TODO distinguish between filesystem errors and pickle errors
            raise TypeError('The XArray save failed.')
        metadata = self.elem_type
        metadata_path = os.path.join(path, '_metadata')
        with fileio.open_file(metadata_path, 'w') as f:
            # TODO detect filesystem errors
            pickle.dump(metadata, f)

        lineage_path = os.path.join(path, '_lineage')
        self.lineage.save(lineage_path)
Ejemplo n.º 10
0
    def save(self, path):
        """
        Saves the RDD to file in pickled form.
        """
        self._entry(path=path)
        # this only works for local files
        fileio.delete(path)
        try:
            self._rdd.saveAsPickleFile(path)  # action ?
        except:
            # TODO distinguish between filesystem errors and pickle errors
            raise TypeError('The XArray save failed.')
        metadata = self.elem_type
        metadata_path = os.path.join(path, '_metadata')
        with fileio.open_file(metadata_path, 'w') as f:
            # TODO detect filesystem errors
            pickle.dump(metadata, f)

        lineage_path = os.path.join(path, '_lineage')
        self.lineage.save(lineage_path)
Ejemplo n.º 11
0
    def save(self, path):
        """
        Saves the RDD to file in pickled form.
        """
        self._entry(path=path)
        # this only works for local files
        fileio.delete(path)
        try:
            self._rdd.saveAsPickleFile(path)          # action ?
        except:
            # TODO distinguish between filesystem errors and pickle errors
            raise TypeError('The XArray save failed.')
        metadata = self.elem_type
        metadata_path = os.path.join(path, '_metadata')
        with fileio.open_file(metadata_path, 'w') as f:
            # TODO detect filesystem errors
            pickle.dump(metadata, f)

        lineage_path = os.path.join(path, '_lineage')
        self.lineage.save(lineage_path)
Ejemplo n.º 12
0
 def test_save(self):
     t = XFrame({'id': [30, 20, 10], 'val': ['a', 'b', 'c']})
     path = '{}/tmp/frame-parquet'.format(hdfs_prefix)
     t.save(path, format='parquet')
     # TODO verify
     fileio.delete(path + '.parquet')