Example #1
0
    def test_excel_cell_error_na(self):
        _skip_if_no_xlrd()

        excel_data = ExcelFile(os.path.join(self.dirpath, 'test3.xls'))
        parsed = excel_data.parse('Sheet1')
        expected = DataFrame([[np.nan]], columns=['Test'])
        tm.assert_frame_equal(parsed, expected)
Example #2
0
    def test_sheets(self):
        _skip_if_no_xlrd()
        ext = self.ext
        path = '__tmp_to_excel_from_excel_sheets__.' + ext

        with ensure_clean(path) as path:
            self.frame['A'][:5] = nan

            self.frame.to_excel(path, 'test1')
            self.frame.to_excel(path, 'test1', cols=['A', 'B'])
            self.frame.to_excel(path, 'test1', header=False)
            self.frame.to_excel(path, 'test1', index=False)

            # Test writing to separate sheets
            writer = ExcelWriter(path)
            self.frame.to_excel(writer, 'test1')
            self.tsframe.to_excel(writer, 'test2')
            writer.save()
            reader = ExcelFile(path)
            recons = reader.parse('test1', index_col=0)
            tm.assert_frame_equal(self.frame, recons)
            recons = reader.parse('test2', index_col=0)
            tm.assert_frame_equal(self.tsframe, recons)
            np.testing.assert_equal(2, len(reader.sheet_names))
            np.testing.assert_equal('test1', reader.sheet_names[0])
            np.testing.assert_equal('test2', reader.sheet_names[1])
Example #3
0
    def test_excel_stop_iterator(self):
        _skip_if_no_xlrd()

        excel_data = ExcelFile(os.path.join(self.dirpath, "test2.xls"))
        parsed = excel_data.parse("Sheet1")
        expected = DataFrame([["aaaa", "bbbbb"]], columns=["Test", "Test1"])
        tm.assert_frame_equal(parsed, expected)
Example #4
0
    def test_to_excel_unicode_filename(self):
        _skip_if_no_xlrd()
        ext = self.ext
        filename = u('\u0192u.') + ext

        try:
            f = open(filename, 'wb')
        except UnicodeEncodeError:
            raise nose.SkipTest('no unicode file names on this system')
        else:
            f.close()

        df = DataFrame([[0.123456, 0.234567, 0.567567],
                        [12.32112, 123123.2, 321321.2]],
                        index=['A', 'B'], columns=['X', 'Y', 'Z'])

        with ensure_clean(filename) as filename:
            df.to_excel(filename, 'test1', float_format='%.2f')

            reader = ExcelFile(filename)
            rs = reader.parse('test1', index_col=None)
            xp = DataFrame([[0.12, 0.23, 0.57],
                            [12.32, 123123.20, 321321.20]],
                            index=['A', 'B'], columns=['X', 'Y', 'Z'])
            tm.assert_frame_equal(rs, xp)
Example #5
0
    def test_to_excel_multiindex_dates(self):
        _skip_if_no_xlrd()
        ext = self.ext
        path = '__tmp_to_excel_multiindex_dates__' + ext + '__.' + ext

        # try multiindex with dates
        tsframe = self.tsframe
        old_index = tsframe.index
        new_index = [old_index, np.arange(len(old_index))]
        tsframe.index = MultiIndex.from_arrays(new_index)

        with ensure_clean(path) as path:
            tsframe.to_excel(path, 'test1', index_label=['time', 'foo'])
            reader = ExcelFile(path)
            recons = reader.parse('test1', index_col=[0, 1])

            tm.assert_frame_equal(tsframe, recons, check_names=False)
            self.assertEquals(recons.index.names, ('time', 'foo'))

            # infer index
            tsframe.to_excel(path, 'test1')
            reader = ExcelFile(path)
            recons = reader.parse('test1')
            tm.assert_frame_equal(tsframe, recons)

            self.tsframe.index = old_index  # needed if setUP becomes classmethod
Example #6
0
    def test_to_excel_unicode_filename(self):
        _skip_if_no_excelsuite()

        for ext in ["xls", "xlsx"]:
            filename = u"\u0192u." + ext

            try:
                f = open(filename, "wb")
            except UnicodeEncodeError:
                raise nose.SkipTest("no unicode file names on this system")
            else:
                f.close()

            df = DataFrame(
                [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]],
                index=["A", "B"],
                columns=["X", "Y", "Z"],
            )

            with ensure_clean(filename) as filename:
                df.to_excel(filename, "test1", float_format="%.2f")

                reader = ExcelFile(filename)
                rs = reader.parse("test1", index_col=None)
                xp = DataFrame(
                    [[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]], index=["A", "B"], columns=["X", "Y", "Z"]
                )
                tm.assert_frame_equal(rs, xp)
Example #7
0
    def test_excel_table_sheet_by_index(self, read_ext, df_ref):

        excel = ExcelFile('test1' + read_ext)

        df1 = pd.read_excel(excel, 0, index_col=0)
        df2 = pd.read_excel(excel, 1, skiprows=[1], index_col=0)
        tm.assert_frame_equal(df1, df_ref, check_names=False)
        tm.assert_frame_equal(df2, df_ref, check_names=False)

        df1 = excel.parse(0, index_col=0)
        df2 = excel.parse(1, skiprows=[1], index_col=0)
        tm.assert_frame_equal(df1, df_ref, check_names=False)
        tm.assert_frame_equal(df2, df_ref, check_names=False)

        df3 = pd.read_excel(excel, 0, index_col=0, skipfooter=1)
        tm.assert_frame_equal(df3, df1.iloc[:-1])

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            df4 = pd.read_excel(excel, 0, index_col=0, skip_footer=1)
            tm.assert_frame_equal(df3, df4)

        df3 = excel.parse(0, index_col=0, skipfooter=1)
        tm.assert_frame_equal(df3, df1.iloc[:-1])

        import xlrd  # will move to engine-specific tests as new ones are added
        with pytest.raises(xlrd.XLRDError):
            pd.read_excel(excel, 'asdf')
Example #8
0
    def test_excel_stop_iterator(self):
        _skip_if_no_xlrd()

        excel_data = ExcelFile(os.path.join(self.dirpath, 'test2.xls'))
        parsed = excel_data.parse('Sheet1')
        expected = DataFrame([['aaaa', 'bbbbb']], columns=['Test', 'Test1'])
        tm.assert_frame_equal(parsed, expected)
Example #9
0
    def _check_extension_mixed(self, ext):
        path = "__tmp_to_excel_from_excel_mixed__." + ext

        with ensure_clean(path) as path:
            self.mixed_frame.to_excel(path, "test1")
            reader = ExcelFile(path)
            recons = reader.parse("test1", index_col=0)
            tm.assert_frame_equal(self.mixed_frame, recons)
Example #10
0
    def test_mixed(self):
        _skip_if_no_xlrd()

        with ensure_clean(self.ext) as path:
            self.mixed_frame.to_excel(path, 'test1')
            reader = ExcelFile(path)
            recons = reader.parse('test1', index_col=0)
            tm.assert_frame_equal(self.mixed_frame, recons)
Example #11
0
    def test_inf_roundtrip(self):
        _skip_if_no_xlrd()

        frame = DataFrame([(1, np.inf), (2, 3), (5, -np.inf)])
        with ensure_clean(self.ext) as path:
            frame.to_excel(path, 'test1')
            reader = ExcelFile(path)
            recons = reader.parse('test1')
            tm.assert_frame_equal(frame, recons)
Example #12
0
    def _check_extension_tsframe(self, ext):
        path = "__tmp_to_excel_from_excel_tsframe__." + ext

        df = tm.makeTimeDataFrame()[:5]

        with ensure_clean(path) as path:
            df.to_excel(path, "test1")
            reader = ExcelFile(path)
            recons = reader.parse("test1")
            tm.assert_frame_equal(df, recons)
Example #13
0
    def test_tsframe(self):
        _skip_if_no_xlrd()

        df = tm.makeTimeDataFrame()[:5]

        with ensure_clean(self.ext) as path:
            df.to_excel(path, 'test1')
            reader = ExcelFile(path)
            recons = reader.parse('test1')
            tm.assert_frame_equal(df, recons)
Example #14
0
class ExcelExtractor(Extractor):
    '''
    An extractor for excel files. 
    One sheet only for now. 
    Expects column names in first row, rest of rows mapped 1:1 to incoming table rows.
    Unique identifier (or unique for domain) in first column.
    '''
    
    def __init__(self, incoming_table_class, file_name):
        '''
        Constructor
        '''
        self._incoming_table_class = incoming_table_class
        self.file_name = file_name
        
        self.workbook = ExcelFile(os.path.join(conf.INPUT_DIR, file_name))
        
        super(ExcelExtractor, self).__init__(self._incoming_table_class)
        
    def _get_workbook_rowdicts(self):
        '''
        returns list of key-value dicts for all rows in sheet, with keys in first row. empty values are removed.
        '''
        rows = self.workbook.parse().to_dict(outtype='records')
        rows_ret = list()
        for row in rows:
            ret = dict((k, v) for k, v in row.iteritems() if notnull(v))
            rows_ret.append(ret)
        return rows_ret
        
    def _get_workbook_keys(self):
        '''
        returns list of key-value dicts from keys in first row
        '''
        return self.workbook.parse().to_dict().keys()
        
    
    def do_extract(self):
        
        db_col_keys = [k for k in self._get_workbook_keys() if k in self._get_db_cols]
        hstore_keys = [h for h in self._get_workbook_keys() if h not in self._get_db_cols]
        
        for row in self._get_workbook_rowdicts():
            db_col_dict = dict((k, v) for k, v in row.iteritems() if k in db_col_keys)
            hstore_col_dict = dict((k, unicode(v)) for k, v in row.iteritems() if k in hstore_keys)
            
            insert_dict = db_col_dict
            insert_dict[self._get_hstore_db_col] = hstore_col_dict

            self._incoming_table_class.create(**insert_dict)
            
    def do_cleanup(self):
        delete_q = self._incoming_table_class.delete()
        rows = delete_q.execute()
        logger.info('Deleted %d records in incoming data table %s' % (rows, self._incoming_table_class._meta.db_table))
Example #15
0
    def test_excel_passes_na(self):
        _skip_if_no_xlrd()

        excel_data = ExcelFile(os.path.join(self.dirpath, "test2.xlsx"))
        parsed = excel_data.parse("Sheet1", keep_default_na=False, na_values=["apple"])
        expected = DataFrame([["NA"], [1], ["NA"], [np.nan], ["rabbit"]], columns=["Test"])
        tm.assert_frame_equal(parsed, expected)

        parsed = excel_data.parse("Sheet1", keep_default_na=True, na_values=["apple"])
        expected = DataFrame([[np.nan], [1], [np.nan], [np.nan], ["rabbit"]], columns=["Test"])
        tm.assert_frame_equal(parsed, expected)
Example #16
0
    def test_sheet_name(self, read_ext, df_ref):
        filename = "test1"
        sheet_name = "Sheet1"

        excel = ExcelFile(filename + read_ext)
        df1_parse = excel.parse(sheet_name=sheet_name, index_col=0)  # doc
        df2_parse = excel.parse(index_col=0,
                                sheet_name=sheet_name)

        tm.assert_frame_equal(df1_parse, df_ref, check_names=False)
        tm.assert_frame_equal(df2_parse, df_ref, check_names=False)
Example #17
0
    def test_float_types(self):
        _skip_if_no_xlrd()

        for np_type in (np.float16, np.float32, np.float64):
            with ensure_clean(self.ext) as path:
                # Test np.float values read come back as float.
                frame = DataFrame(np.random.random_sample(10), dtype=np_type)
                frame.to_excel(path, 'test1')
                reader = ExcelFile(path)
                recons = reader.parse('test1').astype(np_type)
                tm.assert_frame_equal(frame, recons, check_dtype=False)
Example #18
0
    def test_bool_types(self):
        _skip_if_no_xlrd()

        for np_type in (np.bool8, np.bool_):
            with ensure_clean(self.ext) as path:
                # Test np.bool values read come back as float.
                frame = (DataFrame([1, 0, True, False], dtype=np_type))
                frame.to_excel(path, 'test1')
                reader = ExcelFile(path)
                recons = reader.parse('test1').astype(np_type)
                tm.assert_frame_equal(frame, recons)
Example #19
0
    def test_to_excel_periodindex(self):
        _skip_if_no_xlrd()

        frame = self.tsframe
        xp = frame.resample('M', kind='period')

        with ensure_clean(self.ext) as path:
            xp.to_excel(path, 'sht1')

            reader = ExcelFile(path)
            rs = reader.parse('sht1', index_col=0, parse_dates=True)
            tm.assert_frame_equal(xp, rs.to_period('M'))
Example #20
0
    def check_excel_sheet_by_name_raise(self, ext):
        import xlrd
        pth = os.path.join(self.dirpath, 'testit.{0}'.format(ext))

        with ensure_clean(pth) as pth:
            gt = DataFrame(np.random.randn(10, 2))
            gt.to_excel(pth)
            xl = ExcelFile(pth)
            df = xl.parse(0)
            tm.assert_frame_equal(gt, df)

            self.assertRaises(xlrd.XLRDError, xl.parse, '0')
Example #21
0
    def test_excel_sheet_by_name_raise(self):
        _skip_if_no_xlrd()
        import xlrd

        with ensure_clean(self.ext) as pth:
            gt = DataFrame(np.random.randn(10, 2))
            gt.to_excel(pth)
            xl = ExcelFile(pth)
            df = xl.parse(0)
            tm.assert_frame_equal(gt, df)

            self.assertRaises(xlrd.XLRDError, xl.parse, '0')
Example #22
0
    def test_excel_roundtrip_datetime(self):
        _skip_if_no_xlrd()

        # datetime.date, not sure what to test here exactly
        tsf = self.tsframe.copy()
        with ensure_clean(self.ext) as path:

            tsf.index = [x.date() for x in self.tsframe.index]
            tsf.to_excel(path, 'test1', merge_cells=self.merge_cells)
            reader = ExcelFile(path)
            recons = reader.parse('test1')
            tm.assert_frame_equal(self.tsframe, recons)
Example #23
0
 def test_stringio_writer(self):
     _skip_if_no_xlsxwriter()
     _skip_if_no_xlrd()
     
     path = BytesIO()
     with ExcelWriter(path, engine='xlsxwriter', **{'options': {'in-memory': True}}) as ew:
         self.frame.to_excel(ew, 'test1', engine='xlsxwriter')
         ew.save()
         path.seek(0)
         ef = ExcelFile(path)
         found_df = ef.parse('test1')
         tm.assert_frame_equal(self.frame, found_df)
     path.close()
Example #24
0
    def test_excel_read_buffer(self):
        _skip_if_no_xlrd()
        _skip_if_no_openpyxl()

        pth = os.path.join(self.dirpath, "test.xls")
        f = open(pth, "rb")
        xls = ExcelFile(f)
        # it works
        xls.parse("Sheet1", index_col=0, parse_dates=True)

        pth = os.path.join(self.dirpath, "test.xlsx")
        f = open(pth, "rb")
        xl = ExcelFile(f)
        df = xl.parse("Sheet1", index_col=0, parse_dates=True)
Example #25
0
    def test_to_excel_periodindex(self):
        _skip_if_no_excelsuite()

        for ext in ["xls", "xlsx"]:
            path = "__tmp_to_excel_periodindex__." + ext
            frame = self.tsframe
            xp = frame.resample("M", kind="period")

            with ensure_clean(path) as path:
                xp.to_excel(path, "sht1")

                reader = ExcelFile(path)
                rs = reader.parse("sht1", index_col=0, parse_dates=True)
                tm.assert_frame_equal(xp, rs.to_period("M"))
Example #26
0
    def test_excel_roundtrip_indexname(self):
        _skip_if_no_xlrd()

        df = DataFrame(np.random.randn(10, 4))
        df.index.name = 'foo'

        with ensure_clean(self.ext) as path:
            df.to_excel(path)

            xf = ExcelFile(path)
            result = xf.parse(xf.sheet_names[0], index_col=0)

            tm.assert_frame_equal(result, df)
            self.assertEqual(result.index.name, 'foo')
Example #27
0
    def test_to_excel_periodindex(self):
        _skip_if_no_excelsuite()

        for ext in ['xls', 'xlsx']:
            path = '__tmp_to_excel_periodindex__.' + ext
            frame = self.tsframe
            xp = frame.resample('M', kind='period')

            with ensure_clean(path) as path:
                xp.to_excel(path, 'sht1')

                reader = ExcelFile(path)
                rs = reader.parse('sht1', index_col=0, parse_dates=True)
                tm.assert_frame_equal(xp, rs.to_period('M'))
Example #28
0
    def test_excel_roundtrip_datetime(self):
        _skip_if_no_xlrd()
        _skip_if_no_xlwt()

        # datetime.date, not sure what to test here exactly
        path = "__tmp_excel_roundtrip_datetime__.xls"
        tsf = self.tsframe.copy()
        with ensure_clean(path) as path:

            tsf.index = [x.date() for x in self.tsframe.index]
            tsf.to_excel(path, "test1")
            reader = ExcelFile(path)
            recons = reader.parse("test1")
            tm.assert_frame_equal(self.tsframe, recons)
Example #29
0
    def test_parse_cols_int(self):
        _skip_if_no_openpyxl()
        _skip_if_no_xlrd()

        suffix = ["", "x"]

        for s in suffix:
            pth = os.path.join(self.dirpath, "test.xls%s" % s)
            xls = ExcelFile(pth)
            df = xls.parse("Sheet1", index_col=0, parse_dates=True, parse_cols=3)
            df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True)
            df2 = df2.reindex(columns=["A", "B", "C"])
            df3 = xls.parse("Sheet2", skiprows=[1], index_col=0, parse_dates=True, parse_cols=3)
            tm.assert_frame_equal(df, df2, check_names=False)  # TODO add index to xls file)
            tm.assert_frame_equal(df3, df2, check_names=False)
Example #30
0
    def test_excel_passes_na(self):
        _skip_if_no_xlrd()

        excel_data = ExcelFile(os.path.join(self.dirpath, 'test2.xlsx'))
        parsed = excel_data.parse('Sheet1', keep_default_na=False,
                                  na_values=['apple'])
        expected = DataFrame([['NA'], [1], ['NA'], [np.nan], ['rabbit']],
                             columns=['Test'])
        tm.assert_frame_equal(parsed, expected)

        parsed = excel_data.parse('Sheet1', keep_default_na=True,
                                  na_values=['apple'])
        expected = DataFrame([[np.nan], [1], [np.nan], [np.nan], ['rabbit']],
                             columns=['Test'])
        tm.assert_frame_equal(parsed, expected)
Example #31
0
    def test_float_types(self):
        _skip_if_no_xlrd()
        ext = self.ext
        path = '__tmp_to_excel_from_excel_float_types__.' + ext

        for np_type in (np.float16, np.float32, np.float64):
            with ensure_clean(path) as path:
                self.frame['A'][:5] = nan

                self.frame.to_excel(path, 'test1')
                self.frame.to_excel(path, 'test1', cols=['A', 'B'])
                self.frame.to_excel(path, 'test1', header=False)
                self.frame.to_excel(path, 'test1', index=False)

                # Test np.float values read come back as float.
                frame = DataFrame(np.random.random_sample(10), dtype=np_type)
                frame.to_excel(path, 'test1')
                reader = ExcelFile(path)
                recons = reader.parse('test1').astype(np_type)
                tm.assert_frame_equal(frame, recons, check_dtype=False)
Example #32
0
    def test_bool_types(self):
        _skip_if_no_xlrd()
        ext = self.ext
        path = '__tmp_to_excel_from_excel_bool_types__.' + ext

        for np_type in (np.bool8, np.bool_):
            with ensure_clean(path) as path:
                self.frame['A'][:5] = nan

                self.frame.to_excel(path, 'test1')
                self.frame.to_excel(path, 'test1', cols=['A', 'B'])
                self.frame.to_excel(path, 'test1', header=False)
                self.frame.to_excel(path, 'test1', index=False)

                # Test np.bool values read come back as float.
                frame = (DataFrame([1, 0, True, False], dtype=np_type))
                frame.to_excel(path, 'test1')
                reader = ExcelFile(path)
                recons = reader.parse('test1').astype(np_type)
                tm.assert_frame_equal(frame, recons)
Example #33
0
    def test_ts_frame(self, tsframe, path):
        df = tsframe

        # freq doesn't round-trip
        index = pd.DatetimeIndex(np.asarray(df.index), freq=None)
        df.index = index

        df.to_excel(path, "test1")
        with ExcelFile(path) as reader:
            recons = pd.read_excel(reader, sheet_name="test1", index_col=0)
        tm.assert_frame_equal(df, recons)
Example #34
0
    def test_float_types(self, np_type, path):
        # Test np.float values read come back as float.
        df = DataFrame(np.random.random_sample(10), dtype=np_type)
        df.to_excel(path, "test1")

        with ExcelFile(path) as reader:
            recons = pd.read_excel(reader, sheet_name="test1", index_col=0).astype(
                np_type
            )

        tm.assert_frame_equal(df, recons)
Example #35
0
    def _check_excel_multiindex(self, ext):
        path = '__tmp_to_excel_multiindex__' + ext + '__.' + ext

        frame = self.frame
        old_index = frame.index
        arrays = np.arange(len(old_index) * 2).reshape(2, -1)
        new_index = MultiIndex.from_arrays(arrays, names=['first', 'second'])
        frame.index = new_index

        with ensure_clean(path) as path:
            frame.to_excel(path, 'test1', header=False)
            frame.to_excel(path, 'test1', cols=['A', 'B'])

            # round trip
            frame.to_excel(path, 'test1')
            reader = ExcelFile(path)
            df = reader.parse('test1', index_col=[0, 1], parse_dates=False)
            tm.assert_frame_equal(frame, df)
            self.assertEqual(frame.index.names, df.index.names)
            self.frame.index = old_index  # needed if setUP becomes a classmethod
Example #36
0
    def test_excel_roundtrip_indexname(self, merge_cells, path):
        df = DataFrame(np.random.randn(10, 4))
        df.index.name = "foo"

        df.to_excel(path, merge_cells=merge_cells)

        xf = ExcelFile(path)
        result = pd.read_excel(xf, xf.sheet_names[0], index_col=0)

        tm.assert_frame_equal(result, df)
        assert result.index.name == "foo"
Example #37
0
    def test_excel_roundtrip_datetime(self, merge_cells, tsframe, path):
        # datetime.date, not sure what to test here exactly
        tsf = tsframe.copy()

        tsf.index = [x.date() for x in tsframe.index]
        tsf.to_excel(path, "test1", merge_cells=merge_cells)

        reader = ExcelFile(path)
        recons = pd.read_excel(reader, "test1", index_col=0)

        tm.assert_frame_equal(tsframe, recons)
Example #38
0
    def test_bool_types(self, np_type, path):
        # Test np.bool8 and np.bool_ values read come back as float.
        df = DataFrame([1, 0, True, False], dtype=np_type)
        df.to_excel(path, "test1")

        with ExcelFile(path) as reader:
            recons = pd.read_excel(reader, sheet_name="test1", index_col=0).astype(
                np_type
            )

        tm.assert_frame_equal(df, recons)
Example #39
0
    def test_reader_closes_file(self):
        _skip_if_no_xlrd()
        _skip_if_no_openpyxl()

        pth = os.path.join(self.dirpath, 'test.xlsx')
        f = open(pth, 'rb')
        with ExcelFile(f) as xlsx:
            # parses okay
            df = xlsx.parse('Sheet1', index_col=0)

        self.assertTrue(f.closed)
Example #40
0
    def test_to_excel_multiindex(self):
        _skip_if_no_xlrd()

        frame = self.frame
        arrays = np.arange(len(frame.index) * 2).reshape(2, -1)
        new_index = MultiIndex.from_arrays(arrays, names=['first', 'second'])
        frame.index = new_index

        with ensure_clean(self.ext) as path:
            frame.to_excel(path, 'test1', header=False)
            frame.to_excel(path, 'test1', cols=['A', 'B'])

            # round trip
            frame.to_excel(path, 'test1', merge_cells=self.merge_cells)
            reader = ExcelFile(path)
            df = reader.parse('test1',
                              index_col=[0, 1],
                              parse_dates=False,
                              has_index_names=self.merge_cells)
            tm.assert_frame_equal(frame, df)
            self.assertEqual(frame.index.names, df.index.names)
Example #41
0
    def test_excel_sheet_by_name_raise(self, path, engine):
        gt = DataFrame(np.random.randn(10, 2))
        gt.to_excel(path)

        with ExcelFile(path) as xl:
            df = pd.read_excel(xl, sheet_name=0, index_col=0)

        tm.assert_frame_equal(gt, df)

        msg = "Worksheet named '0' not found"
        with pytest.raises(ValueError, match=msg):
            pd.read_excel(xl, "0")
Example #42
0
    def test_to_excel_multiindex_dates(self, merge_cells, tsframe, path):
        # try multiindex with dates
        new_index = [tsframe.index, np.arange(len(tsframe.index))]
        tsframe.index = MultiIndex.from_arrays(new_index)

        tsframe.index.names = ["time", "foo"]
        tsframe.to_excel(path, "test1", merge_cells=merge_cells)
        reader = ExcelFile(path)
        recons = pd.read_excel(reader, "test1", index_col=[0, 1])

        tm.assert_frame_equal(tsframe, recons)
        assert recons.index.names == ("time", "foo")
Example #43
0
    def test_to_excel_empty_multiindex(self, path):
        # GH 19543.
        expected = DataFrame([], columns=[0, 1, 2])

        df = DataFrame([], index=MultiIndex.from_tuples([], names=[0, 1]), columns=[2])
        df.to_excel(path, "test1")

        with ExcelFile(path) as reader:
            result = pd.read_excel(reader, sheet_name="test1")
        tm.assert_frame_equal(
            result, expected, check_index_type=False, check_dtype=False
        )
Example #44
0
    def test_colaliases(self):
        _skip_if_no_xlrd()
        ext = self.ext
        path = '__tmp_to_excel_from_excel_aliases__.' + ext

        with ensure_clean(path) as path:
            self.frame['A'][:5] = nan

            self.frame.to_excel(path, 'test1')
            self.frame.to_excel(path, 'test1', cols=['A', 'B'])
            self.frame.to_excel(path, 'test1', header=False)
            self.frame.to_excel(path, 'test1', index=False)

            # column aliases
            col_aliases = Index(['AA', 'X', 'Y', 'Z'])
            self.frame2.to_excel(path, 'test1', header=col_aliases)
            reader = ExcelFile(path)
            rs = reader.parse('test1', index_col=0)
            xp = self.frame2.copy()
            xp.columns = col_aliases
            tm.assert_frame_equal(xp, rs)
Example #45
0
    def test_excel_read_buffer(self):
        _skip_if_no_xlrd()
        _skip_if_no_openpyxl()

        pth = os.path.join(self.dirpath, 'test.xls')
        f = open(pth, 'rb')
        xls = ExcelFile(f)
        # it works
        xls.parse('Sheet1', index_col=0, parse_dates=True)

        pth = os.path.join(self.dirpath, 'test.xlsx')
        f = open(pth, 'rb')
        xl = ExcelFile(f)
        xl.parse('Sheet1', index_col=0, parse_dates=True)
Example #46
0
    def test_to_excel_multiindex_dates(self):
        _skip_if_no_xlrd()
        ext = self.ext
        path = '__tmp_to_excel_multiindex_dates__' + ext + '__.' + ext

        # try multiindex with dates
        tsframe = self.tsframe
        old_index = tsframe.index
        new_index = [old_index, np.arange(len(old_index))]
        tsframe.index = MultiIndex.from_arrays(new_index)

        with ensure_clean(path) as path:
            tsframe.to_excel(path, 'test1', index_label=['time', 'foo'])
            reader = ExcelFile(path)
            recons = reader.parse('test1', index_col=[0, 1])

            tm.assert_frame_equal(tsframe, recons, check_names=False)
            self.assertEquals(recons.index.names, ('time', 'foo'))

            # infer index
            tsframe.to_excel(path, 'test1')
            reader = ExcelFile(path)
            recons = reader.parse('test1')
            tm.assert_frame_equal(tsframe, recons)

            self.tsframe.index = old_index  # needed if setUP becomes classmethod
Example #47
0
    def test_to_excel_multiindex(self, merge_cells, engine, ext, frame):
        arrays = np.arange(len(frame.index) * 2).reshape(2, -1)
        new_index = MultiIndex.from_arrays(arrays, names=['first', 'second'])
        frame.index = new_index

        frame.to_excel(self.path, 'test1', header=False)
        frame.to_excel(self.path, 'test1', columns=['A', 'B'])

        # round trip
        frame.to_excel(self.path, 'test1', merge_cells=merge_cells)
        reader = ExcelFile(self.path)
        df = pd.read_excel(reader, 'test1', index_col=[0, 1])
        tm.assert_frame_equal(frame, df)
Example #48
0
    def test_int_types(self):
        _skip_if_no_xlrd()

        for np_type in (np.int8, np.int16, np.int32, np.int64):

            with ensure_clean(self.ext) as path:
                # Test np.int values read come back as int (rather than float
                # which is Excel's format).
                frame = DataFrame(np.random.randint(-10, 10, size=(10, 2)),
                                  dtype=np_type)
                frame.to_excel(path, 'test1')
                reader = ExcelFile(path)
                recons = reader.parse('test1')
                int_frame = frame.astype(int)
                tm.assert_frame_equal(int_frame, recons)
                recons2 = read_excel(path, 'test1')
                tm.assert_frame_equal(int_frame, recons2)

                # test with convert_float=False comes back as float
                float_frame = frame.astype(float)
                recons = read_excel(path, 'test1', convert_float=False)
                tm.assert_frame_equal(recons, float_frame)
Example #49
0
    def test_excelwriter_contextmanager(self):
        _skip_if_no_xlrd()

        with ensure_clean(self.ext) as pth:
            with ExcelWriter(pth) as writer:
                self.frame.to_excel(writer, 'Data1')
                self.frame2.to_excel(writer, 'Data2')

            with ExcelFile(pth) as reader:
                found_df = reader.parse('Data1')
                found_df2 = reader.parse('Data2')
                tm.assert_frame_equal(found_df, self.frame)
                tm.assert_frame_equal(found_df2, self.frame2)
Example #50
0
    def test_to_excel_multiindex(self, merge_cells, frame, path):
        arrays = np.arange(len(frame.index) * 2).reshape(2, -1)
        new_index = MultiIndex.from_arrays(arrays, names=["first", "second"])
        frame.index = new_index

        frame.to_excel(path, "test1", header=False)
        frame.to_excel(path, "test1", columns=["A", "B"])

        # round trip
        frame.to_excel(path, "test1", merge_cells=merge_cells)
        reader = ExcelFile(path)
        df = pd.read_excel(reader, "test1", index_col=[0, 1])
        tm.assert_frame_equal(frame, df)
Example #51
0
    def test_excel_writer_context_manager(self, frame, path):
        with ExcelWriter(path) as writer:
            frame.to_excel(writer, "Data1")
            frame2 = frame.copy()
            frame2.columns = frame.columns[::-1]
            frame2.to_excel(writer, "Data2")

        with ExcelFile(path) as reader:
            found_df = pd.read_excel(reader, "Data1", index_col=0)
            found_df2 = pd.read_excel(reader, "Data2", index_col=0)

            tm.assert_frame_equal(found_df, frame)
            tm.assert_frame_equal(found_df2, frame2)
Example #52
0
    def test_excel_sheet_by_name_raise(self, path):
        import xlrd

        gt = DataFrame(np.random.randn(10, 2))
        gt.to_excel(path)

        xl = ExcelFile(path)
        df = pd.read_excel(xl, 0, index_col=0)

        tm.assert_frame_equal(gt, df)

        with pytest.raises(xlrd.XLRDError):
            pd.read_excel(xl, "0")
Example #53
0
    def test_to_excel_multiindex_dates(self, merge_cells, engine, ext,
                                       tsframe):
        # try multiindex with dates
        new_index = [tsframe.index, np.arange(len(tsframe.index))]
        tsframe.index = MultiIndex.from_arrays(new_index)

        tsframe.index.names = ['time', 'foo']
        tsframe.to_excel(self.path, 'test1', merge_cells=merge_cells)
        reader = ExcelFile(self.path)
        recons = pd.read_excel(reader, 'test1', index_col=[0, 1])

        tm.assert_frame_equal(tsframe, recons)
        assert recons.index.names == ('time', 'foo')
Example #54
0
    def test_to_excel_unicode_filename(self):
        _skip_if_no_xlrd()
        with ensure_clean(u('\u0192u.') + self.ext) as filename:
            try:
                f = open(filename, 'wb')
            except UnicodeEncodeError:
                raise nose.SkipTest('no unicode file names on this system')
            else:
                f.close()

            df = DataFrame([[0.123456, 0.234567, 0.567567],
                            [12.32112, 123123.2, 321321.2]],
                            index=['A', 'B'], columns=['X', 'Y', 'Z'])

            df.to_excel(filename, 'test1', float_format='%.2f')

            reader = ExcelFile(filename)
            rs = reader.parse('test1', index_col=None)
            xp = DataFrame([[0.12, 0.23, 0.57],
                            [12.32, 123123.20, 321321.20]],
                            index=['A', 'B'], columns=['X', 'Y', 'Z'])
            tm.assert_frame_equal(rs, xp)
Example #55
0
    def test_to_excel_float_format(self, engine, ext):
        df = DataFrame([[0.123456, 0.234567, 0.567567],
                        [12.32112, 123123.2, 321321.2]],
                       index=["A", "B"], columns=["X", "Y", "Z"])
        df.to_excel(self.path, "test1", float_format="%.2f")

        reader = ExcelFile(self.path)
        result = pd.read_excel(reader, "test1", index_col=0)

        expected = DataFrame([[0.12, 0.23, 0.57],
                              [12.32, 123123.20, 321321.20]],
                             index=["A", "B"], columns=["X", "Y", "Z"])
        tm.assert_frame_equal(result, expected)
Example #56
0
    def test_excel_date_datetime_format(self, engine, ext):
        # see gh-4133
        #
        # Excel output format strings
        df = DataFrame([[date(2014, 1, 31),
                         date(1999, 9, 24)],
                        [datetime(1998, 5, 26, 23, 33, 4),
                         datetime(2014, 2, 28, 13, 5, 13)]],
                       index=["DATE", "DATETIME"], columns=["X", "Y"])
        df_expected = DataFrame([[datetime(2014, 1, 31),
                                  datetime(1999, 9, 24)],
                                 [datetime(1998, 5, 26, 23, 33, 4),
                                  datetime(2014, 2, 28, 13, 5, 13)]],
                                index=["DATE", "DATETIME"], columns=["X", "Y"])

        with ensure_clean(ext) as filename2:
            writer1 = ExcelWriter(self.path)
            writer2 = ExcelWriter(filename2,
                                  date_format="DD.MM.YYYY",
                                  datetime_format="DD.MM.YYYY HH-MM-SS")

            df.to_excel(writer1, "test1")
            df.to_excel(writer2, "test1")

            writer1.close()
            writer2.close()

            reader1 = ExcelFile(self.path)
            reader2 = ExcelFile(filename2)

            rs1 = pd.read_excel(reader1, "test1", index_col=0)
            rs2 = pd.read_excel(reader2, "test1", index_col=0)

            tm.assert_frame_equal(rs1, rs2)

            # Since the reader returns a datetime object for dates,
            # we need to use df_expected to check the result.
            tm.assert_frame_equal(rs2, df_expected)
Example #57
0
    def test_excel_passes_na(self, read_ext):

        excel = ExcelFile('test4' + read_ext)

        parsed = pd.read_excel(excel,
                               'Sheet1',
                               keep_default_na=False,
                               na_values=['apple'])
        expected = DataFrame([['NA'], [1], ['NA'], [np.nan], ['rabbit']],
                             columns=['Test'])
        tm.assert_frame_equal(parsed, expected)

        parsed = pd.read_excel(excel,
                               'Sheet1',
                               keep_default_na=True,
                               na_values=['apple'])
        expected = DataFrame([[np.nan], [1], [np.nan], [np.nan], ['rabbit']],
                             columns=['Test'])
        tm.assert_frame_equal(parsed, expected)

        # 13967
        excel = ExcelFile('test5' + read_ext)

        parsed = pd.read_excel(excel,
                               'Sheet1',
                               keep_default_na=False,
                               na_values=['apple'])
        expected = DataFrame([['1.#QNAN'], [1], ['nan'], [np.nan], ['rabbit']],
                             columns=['Test'])
        tm.assert_frame_equal(parsed, expected)

        parsed = pd.read_excel(excel,
                               'Sheet1',
                               keep_default_na=True,
                               na_values=['apple'])
        expected = DataFrame([[np.nan], [1], [np.nan], [np.nan], ['rabbit']],
                             columns=['Test'])
        tm.assert_frame_equal(parsed, expected)
Example #58
0
def test_excel_file_warning_with_xlsx_file(datapath):
    # GH 29375
    path = datapath("io", "data", "excel", "test1.xlsx")
    has_openpyxl = import_optional_dependency("openpyxl", errors="ignore") is not None
    if not has_openpyxl:
        with tm.assert_produces_warning(
            FutureWarning,
            raise_on_extra_warnings=False,
            match="The xlrd engine is no longer maintained",
        ):
            ExcelFile(path, engine=None)
    else:
        with tm.assert_produces_warning(None):
            pd.read_excel(path, "Sheet1", engine=None)
Example #59
0
    def test_to_excel_interval_no_labels(self, path):
        # see gh-19242
        #
        # Test writing Interval without labels.
        df = DataFrame(np.random.randint(-10, 10, size=(20, 1)), dtype=np.int64)
        expected = df.copy()

        df["new"] = pd.cut(df[0], 10)
        expected["new"] = pd.cut(expected[0], 10).astype(str)

        df.to_excel(path, "test1")
        with ExcelFile(path) as reader:
            recons = pd.read_excel(reader, sheet_name="test1", index_col=0)
        tm.assert_frame_equal(expected, recons)
Example #60
0
    def test_parse_cols_int(self):
        _skip_if_no_openpyxl()
        _skip_if_no_xlrd()

        suffix = ['', 'x']

        for s in suffix:
            pth = os.path.join(self.dirpath, 'test.xls%s' % s)
            xls = ExcelFile(pth)
            df = xls.parse('Sheet1',
                           index_col=0,
                           parse_dates=True,
                           parse_cols=3)
            df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True)
            df2 = df2.reindex(columns=['A', 'B', 'C'])
            df3 = xls.parse('Sheet2',
                            skiprows=[1],
                            index_col=0,
                            parse_dates=True,
                            parse_cols=3)
            # TODO add index to xls file)
            tm.assert_frame_equal(df, df2, check_names=False)
            tm.assert_frame_equal(df3, df2, check_names=False)