예제 #1
0
파일: jsontd.py 프로젝트: nandi6uc/ticdat
    def _create_jdict(self, path_or_buf):
        if stringish(path_or_buf) and os.path.exists(path_or_buf):
            reasonble_string = path_or_buf
            verify(os.path.isfile(path_or_buf),
                   "json_file_path is not a valid file path.")
            try:
                with open(path_or_buf, "r") as fp:
                    jdict = json.load(fp)
            except Exception as e:
                raise TicDatError("Unable to interpret %s as json file : %s" %
                                  (path_or_buf, e))
        else:
            verify(stringish(path_or_buf), "%s isn't a string" % path_or_buf)
            reasonble_string = path_or_buf[:10]
            try:
                jdict = json.loads(path_or_buf)
            except Exception as e:
                raise TicDatError(
                    "Unable to interpret %s as json string : %s" %
                    (reasonble_string, e))

        verify(dictish(jdict),
               "%s failed to load a dictionary" % reasonble_string)
        verify(
            all(map(stringish, jdict)),
            "The dictionary loaded from %s isn't indexed by strings" %
            reasonble_string)
        verify(
            all(map(containerish, jdict.values())),
            "The dictionary loaded from %s doesn't have containers as values" %
            reasonble_string)
        return jdict
예제 #2
0
 def get_fld_type(t, f, default_type):
     if (t, f) in forced_field_types:
         return forced_field_types[t, f]
     if t == "parameters" and self.tdf.parameters:
         return "text"
     fld_type = self.tdf.data_types.get(t, {}).get(f)
     if not fld_type:
         return default_type
     if fld_type.datetime:
         return "timestamp"
     verify(
         not (fld_type.number_allowed and fld_type.strings_allowed),
         f"Select one of string or numeric for {t}.{f} if declaring type and using postgres"
     )
     if fld_type.strings_allowed:
         return 'text'
     if fld_type.number_allowed:
         if fld_type.must_be_int:
             return 'integer'
         else:
             return 'float'
     else:
         TicDatError(
             f"Allow one of text or numeric for {t}.{f} if declaring type and using postgres"
         )
예제 #3
0
 def write_file(self,
                tic_dat,
                json_file_path,
                allow_overwrite=False,
                verbose=False):
     """
     write the ticDat data to an excel file
     :param tic_dat: the data object to write (typically a TicDat)
     :param json_file_path: The file path of the json file to create.
     :param allow_overwrite: boolean - are we allowed to overwrite an
                             existing file?
     :param verbose: boolean. Verbose mode writes the data rows as dicts
                     keyed by field name. Otherwise, they are lists.
     :return:
     """
     _standard_verify(self.tic_dat_factory)
     verify(not (os.path.exists(json_file_path) and not allow_overwrite),
            "%s exists and allow_overwrite is not enabled" % json_file_path)
     msg = []
     if not self.tic_dat_factory.good_tic_dat_object(
             tic_dat, lambda m: msg.append(m)):
         raise TicDatError("Not a valid TicDat object for this schema : " +
                           " : ".join(msg))
     jdict = make_json_dict(self.tic_dat_factory, tic_dat, verbose)
     with open(json_file_path, "w") as fp:
         json.dump(jdict, fp, sort_keys=True, indent=2)
예제 #4
0
 def write_directory(self,
                     tic_dat,
                     dir_path,
                     allow_overwrite=False,
                     dialect='excel',
                     write_header=True):
     """
     write the ticDat data to a collection of csv files
     :param tic_dat: the data object
     :param dir_path: the directory in which to write the csv files
     :param allow_overwrite: boolean - are we allowed to overwrite existing
                             files?
     :param dialect: the csv dialect. Consult csv documentation for details.
     :param write_header: Boolean. Should the header information be written
                          as the first row?
     :return:
     """
     verify(csv, "csv needs to be installed to use this subroutine")
     verify(dialect in csv.list_dialects(), "Invalid dialect %s" % dialect)
     verify(not os.path.isfile(dir_path),
            "A file is not a valid directory path")
     if self.tic_dat_factory.generic_tables:
         dat, tdf = create_generic_free(tic_dat, self.tic_dat_factory)
         return tdf.csv.write_directory(dat, dir_path, allow_overwrite,
                                        dialect, write_header)
     tdf = self.tic_dat_factory
     msg = []
     if not self.tic_dat_factory.good_tic_dat_object(
             tic_dat, lambda m: msg.append(m)):
         raise TicDatError("Not a valid TicDat object for this schema : " +
                           " : ".join(msg))
     if not allow_overwrite:
         for t in tdf.all_tables:
             f = os.path.join(dir_path, t + ".csv")
             verify(not os.path.exists(f),
                    "The %s path exists and overwrite is not allowed" % f)
     if not os.path.isdir(dir_path):
         os.mkdir(dir_path)
     for t in tdf.all_tables:
         f = os.path.join(dir_path, t + ".csv")
         with open(f, 'w') as csvfile:
             writer = csv.DictWriter(
                 csvfile,
                 dialect=dialect,
                 fieldnames=tdf.primary_key_fields.get(t, ()) +
                 tdf.data_fields.get(t, ()))
             writer.writeheader() if write_header else None
             _t = getattr(tic_dat, t)
             if dictish(_t):
                 for p_key, data_row in _t.items():
                     primaryKeyDict = {
                         f: v
                         for f, v in zip(
                             tdf.primary_key_fields[t],
                             p_key if containerish(p_key) else (p_key, ))
                     }
                     writer.writerow(dict(data_row, **primaryKeyDict))
             else:
                 for data_row in (_t if containerish(_t) else _t()):
                     writer.writerow(dict(data_row))
예제 #5
0
    def write_db_data(self, tic_dat, db_file_path, allow_overwrite = False):
        """
        write the ticDat data to an SQLite database file

        :param tic_dat: the data object to write

        :param db_file_path: the file path of the SQLite database to populate

        :param allow_overwrite: boolean - are we allowed to overwrite pre-existing data

        :return:

        caveats : float("inf"), float("-inf") are written as "inf", "-inf"
        """
        verify(sql, "sqlite3 needs to be installed to use this subroutine")
        msg = []
        if not self.tic_dat_factory.good_tic_dat_object(tic_dat, lambda m : msg.append(m)) :
            raise TicDatError("Not a valid TicDat object for this schema : " + " : ".join(msg))
        verify(not os.path.isdir(db_file_path), "A directory is not a valid SQLite file path")
        if self.tic_dat_factory.generic_tables:
             dat, tdf = create_generic_free(tic_dat, self.tic_dat_factory)
             return tdf.sql.write_db_data(dat, db_file_path, allow_overwrite)
        if not os.path.exists(db_file_path) :
            self.write_db_schema(db_file_path)
        table_names = self._check_tables_fields(db_file_path, self.tic_dat_factory.all_tables)
        with _sql_con(db_file_path, foreign_keys=False) as con:
            for t in self.tic_dat_factory.all_tables:
                verify(table_names[t] == t, "Failed to find table %s in path %s"%
                                            (t, db_file_path))
                verify(allow_overwrite or not any(True for _ in  con.execute("Select * from %s"%t)),
                        "allow_overwrite is False, but there are already data records in %s"%t)
                con.execute("Delete from %s"%t) if allow_overwrite else None
            for sql_str, data in self._get_data(tic_dat, as_sql=False):
                con.execute(sql_str, list(data))
예제 #6
0
    def write_data(self,
                   tic_dat,
                   engine,
                   schema,
                   dsn=None,
                   pre_existing_rows=None):
        """
        write the ticDat data to a PostGres database

        :param tic_dat: the data object to write

        :param engine: a sqlalchemy database engine with drivertype postgres

        :param schema: the postgres schema to write to (call self.write_schema explicitly as needed)

        :param dsn: optional - if truthy, a dict that can be unpacked as arguments to
                    psycopg2.connect. Will speed up bulk writing compared to engine.execute
                    If truthy and not a dict, then will be passed directly to psycopg2.connect as the sole argument.

        :param pre_existing_rows: if provided, a dict mapping table name to either "delete" or "append"
                                  default behavior is "delete"

        :return:
        """
        verify(sa, "sqalchemy needs to be installed to use this subroutine")
        verify(engine.name == 'postgresql',
               "a sqlalchemy engine with drivername='postgres' is required")
        verify(not dsn or psycopg2,
               "need psycopg2 to use the faster dsn write option")
        self._check_good_pgtd_compatible_table_field_names()
        msg = []
        if not self.tdf.good_tic_dat_object(tic_dat, lambda m: msg.append(m)):
            raise TicDatError("Not a valid TicDat object for this schema : " +
                              " : ".join(msg))
        verify(not self.tdf.generic_tables,
               "TicDat for postgres does not yet support generic tables")
        self.check_tables_fields(
            engine, schema,
            error_on_missing_table=True)  # call self.write_schema as needed
        self._handle_prexisting_rows(engine, schema, pre_existing_rows or {})
        if dsn:
            connect_kwargs = dsn if dsn and dictish(dsn) else {}
            connect_args = [dsn] if dsn and not dictish(dsn) else []
            with psycopg2.connect(*connect_args, **connect_kwargs) as db:
                with db.cursor() as cursor:
                    for k, v in self._get_data(tic_dat,
                                               schema,
                                               dump_format="dict").items():
                        psycopg2.extras.execute_values(cursor, k, v)
        else:
            all_dat = self._get_data(tic_dat, schema)
            if len(all_dat) > 1000:
                print(
                    "***pgtd.py not using most efficient data writing technique**"
                )
            for sql_str, data in all_dat:
                engine.execute(sql_str, data)
예제 #7
0
파일: xls.py 프로젝트: nandi6uc/ticdat
    def write_file(self,
                   tic_dat,
                   file_path,
                   allow_overwrite=False,
                   case_space_sheet_names=False):
        """
        write the ticDat data to an excel file

        :param tic_dat: the data object to write (typically a TicDat)

        :param file_path: The file path of the excel file to create
                          Needs to end in either ".xls" or ".xlsx"
                          The latter is capable of writing out larger tables,
                          but the former handles infinity seamlessly.
                          If ".xlsx", then be advised that +/- float("inf") will be replaced
                          with "inf"/"-inf", unless infinity_io_flag is being applied.

        :param allow_overwrite: boolean - are we allowed to overwrite an
                                existing file?
              case_space_sheet_names: boolean - make best guesses how to add spaces and upper case
                                      characters to sheet names

        :return:

        caveats: None may be written out as an empty string. This reflects the behavior of xlwt.
        """
        self._verify_differentiable_sheet_names()
        verify(
            utils.stringish(file_path)
            and (file_path.endswith(".xls") or file_path.endswith(".xlsx")),
            "file_path argument needs to end in .xls or .xlsx")
        msg = []
        if not self.tic_dat_factory.good_tic_dat_object(
                tic_dat, lambda m: msg.append(m)):
            raise TicDatError("Not a valid ticDat object for this schema : " +
                              " : ".join(msg))
        verify(not os.path.isdir(file_path),
               "A directory is not a valid xls file path")
        verify(allow_overwrite or not os.path.exists(file_path),
               "The %s path exists and overwrite is not allowed" % file_path)
        if self.tic_dat_factory.generic_tables:
            dat, tdf = utils.create_generic_free(tic_dat, self.tic_dat_factory)
            return tdf.xls.write_file(dat, file_path, allow_overwrite,
                                      case_space_sheet_names)
        case_space_sheet_names = case_space_sheet_names and \
                                 len(set(self.tic_dat_factory.all_tables)) == \
                                 len(set(map(case_space_to_pretty, self.tic_dat_factory.all_tables)))
        tbl_name_mapping = {
            t: case_space_to_pretty(t) if case_space_sheet_names else t
            for t in self.tic_dat_factory.all_tables
        }
        if file_path.endswith(".xls"):
            self._xls_write(tic_dat, file_path, tbl_name_mapping)
        else:
            self._xlsx_write(tic_dat, file_path, tbl_name_mapping)
예제 #8
0
파일: jsontd.py 프로젝트: nandi6uc/ticdat
    def write_file(self,
                   tic_dat,
                   json_file_path,
                   allow_overwrite=False,
                   verbose=False,
                   to_pandas=False):
        """
        write the ticDat data to a json file (or json string)

        :param tic_dat: the data object to write (typically a TicDat)

        :param json_file_path: The file path of the json file to create. If empty string, then return a JSON string.

        :param allow_overwrite: boolean - are we allowed to overwrite an
                                existing file?

        :param verbose: boolean. Verbose mode writes the data rows as dicts
                        keyed by field name. Otherwise, they are lists.

        :param to_pandas: boolean. if truthy, then use the PanDatFactory method of writing to json.

        :return:
        """
        _standard_verify(self.tic_dat_factory)
        verify(not (to_pandas and verbose),
               "verbose argument is inconsistent with to_pandas")
        verify(
            not (json_file_path and os.path.exists(json_file_path)
                 and not allow_overwrite),
            "%s exists and allow_overwrite is not enabled" % json_file_path)
        if to_pandas:
            from ticdat import PanDatFactory
            pdf = PanDatFactory.create_from_full_schema(
                self.tic_dat_factory.schema(include_ancillary_info=True))
            return pdf.json.write_file(
                self.tic_dat_factory.copy_to_pandas(tic_dat,
                                                    drop_pk_columns=False),
                json_file_path)
        msg = []
        if not self.tic_dat_factory.good_tic_dat_object(
                tic_dat, lambda m: msg.append(m)):
            raise TicDatError("Not a valid TicDat object for this schema : " +
                              " : ".join(msg))
        jdict = make_json_dict(self.tic_dat_factory,
                               tic_dat,
                               verbose,
                               use_infinity_io_flag_if_provided=True)
        if not json_file_path:
            return json.dumps(jdict, sort_keys=True, indent=2)
        with open(json_file_path, "w") as fp:
            json.dump(jdict, fp, sort_keys=True, indent=2)
예제 #9
0
 def _create_jdict(self, json_file_path):
     verify(os.path.isfile(json_file_path), "json_file_path is not a valid file path.")
     try :
         with open(json_file_path, "r") as fp:
             jdict = json.load(fp)
     except Exception as e:
         raise TicDatError("Unable to interpret %s as json file : %s"%
                           (json_file_path, e.message))
     verify(dictish(jdict), "%s failed to load a dictionary"%json_file_path)
     verify(all(map(stringish, jdict)),
            "The dictionary loaded from %s isn't indexed by strings"%json_file_path)
     verify(all(map(containerish, jdict.values())),
            "The dictionary loaded from %s doesn't have containers as values"%json_file_path)
     return jdict
예제 #10
0
 def _get_sheet_names(self, xls_file_path):
     sheets = defaultdict(list)
     try :
         xl = pd.ExcelFile(xls_file_path)
     except Exception as e:
         raise TicDatError("Unable to open %s as xls file : %s"%(xls_file_path, e.message))
     for table, sheet in product(self.pan_dat_factory.all_tables, xl.sheet_names) :
         if table.lower()[:_longest_sheet] == sheet.lower().replace(' ', '_')[:_longest_sheet]:
             sheets[table].append(sheet)
     duplicated_sheets = tuple(_t for _t,_s in sheets.items() if len(_s) > 1)
     verify(not duplicated_sheets, "The following sheet names were duplicated : " +
            ",".join(duplicated_sheets))
     sheets = FrozenDict({k:v[0] for k,v in sheets.items()})
     return sheets
예제 #11
0
파일: xls.py 프로젝트: nandi6uc/ticdat
 def _get_sheets_and_fields(self,
                            xls_file_path,
                            all_tables,
                            row_offsets,
                            headers_present,
                            print_missing_tables=False):
     verify(
         utils.stringish(xls_file_path) and os.path.exists(xls_file_path),
         "xls_file_path argument %s is not a valid file path." %
         xls_file_path)
     try:
         book = xlrd.open_workbook(xls_file_path)
     except Exception as e:
         raise TicDatError("Unable to open %s as xls file : %s" %
                           (xls_file_path, e))
     sheets = defaultdict(list)
     for table, sheet in product(all_tables, book.sheets()):
         if table.lower()[:_longest_sheet] == sheet.name.lower().replace(
                 ' ', '_')[:_longest_sheet]:
             sheets[table].append(sheet)
     duplicated_sheets = tuple(_t for _t, _s in sheets.items()
                               if len(_s) > 1)
     verify(
         not duplicated_sheets,
         "The following sheet names were duplicated : " +
         ",".join(duplicated_sheets))
     sheets = FrozenDict({k: v[0] for k, v in sheets.items()})
     missing_tables = {t for t in all_tables if t not in sheets}
     if missing_tables and print_missing_tables:
         print(
             "The following table names could not be found in the %s file.\n%s\n"
             % (xls_file_path, "\n".join(missing_tables)))
     field_indicies, missing_fields, dup_fields = {}, {}, {}
     for table, sheet in sheets.items():
         field_indicies[table], missing_fields[table], dup_fields[table] = \
             self._get_field_indicies(table, sheet, row_offsets[table], headers_present)
     verify(
         not any(_ for _ in missing_fields.values()),
         "The following field names could not be found : \n" +
         "\n".join("%s : " % t + ",".join(bf)
                   for t, bf in missing_fields.items() if bf))
     verify(
         not any(_ for _ in dup_fields.values()),
         "The following field names were duplicated : \n" +
         "\n".join("%s : " % t + ",".join(bf)
                   for t, bf in dup_fields.items() if bf))
     return sheets, field_indicies, book.datemode
예제 #12
0
파일: xls.py 프로젝트: austin-bren/ticdat
 def _get_sheets_and_fields(self, xls_file_path, all_tables, row_offsets, headers_present,
                            print_missing_tables = False):
     verify(utils.stringish(xls_file_path) and os.path.exists(xls_file_path),
            "xls_file_path argument %s is not a valid file path."%xls_file_path)
     try :
         book = xlrd.open_workbook(xls_file_path) if xls_file_path.endswith(".xls") else \
             openpyxl.load_workbook(xls_file_path, data_only=True)
     except Exception as e:
         raise TicDatError("Unable to open %s as xls file : %s"%(xls_file_path, e))
     sheet_name = lambda sheet: sheet.name if xls_file_path.endswith(".xls") else sheet.title
     sheets = defaultdict(list)
     book_sheets = lambda: book.sheets() if xls_file_path.endswith(".xls") else book.worksheets
     for table, sheet in product(all_tables, book_sheets()) :
         if table.lower()[:_longest_sheet] == sheet_name(sheet).lower().replace(' ', '_')[:_longest_sheet]:
             sheets[table].append(sheet)
     duplicated_sheets = tuple(_t for _t,_s in sheets.items() if len(_s) > 1)
     verify(not duplicated_sheets, "The following sheet names were duplicated s: " +
            ",".join(duplicated_sheets))
     wrapped_sheet = lambda sheet: _XlrdSheetWrapper(sheet, book.datemode) if xls_file_path.endswith(".xls") else \
                                   _OpenPyxlSheetWrapper(sheet, prune_trailing_empty_rows=
                                     self.tic_dat_factory.xlsx_trailing_empty_rows == "prune")
     sheets = FrozenDict({k: wrapped_sheet(v[0]) for k,v in sheets.items()})
     missing_tables = {t for t in all_tables if t not in sheets}
     if missing_tables and print_missing_tables:
         print ("The following table names could not be found in the %s file.\n%s\n"%
                (xls_file_path,"\n".join(missing_tables)))
     field_indicies, missing_fields, dup_fields = {}, {}, {}
     for table, sheet in sheets.items() :
         field_indicies[table], missing_fields[table], dup_fields[table] = \
             self._get_field_indicies(table, sheet, row_offsets[table], headers_present)
     verify(not any(_ for _ in missing_fields.values()),
            "The following field names could not be found : \n" +
            "\n".join("%s : "%t + ",".join(bf) for t,bf in missing_fields.items() if bf))
     verify(not any(_ for _ in dup_fields.values()),
            "The following field names were duplicated : \n" +
            "\n".join("%s : "%t + ",".join(bf) for t,bf in dup_fields.items() if bf))
     return sheets, field_indicies
예제 #13
0
 def write_file(self, tic_dat, file_path, allow_overwrite=False):
     """
     write the ticDat data to an excel file
     :param tic_dat: the data object to write (typically a TicDat)
     :param file_path: The file path of the excel file to create
                       Needs to end in either ".xls" or ".xlsx"
                       The latter is capable of writing out larger tables,
                       but the former handles infinity seamlessly.
                       If ".xlsx", then be advised that +/- float("inf") will be replaced
                       with +/- 1e+100
     :param allow_overwrite: boolean - are we allowed to overwrite an
                             existing file?
     :return:
     caveats: None may be written out as an empty string. This reflects the behavior of xlwt.
     """
     self._verify_differentiable_sheet_names()
     verify(
         utils.stringish(file_path)
         and (file_path.endswith(".xls") or file_path.endswith(".xlsx")),
         "file_path argument needs to end in .xls or .xlsx")
     msg = []
     if not self.tic_dat_factory.good_tic_dat_object(
             tic_dat, lambda m: msg.append(m)):
         raise TicDatError("Not a valid ticDat object for this schema : " +
                           " : ".join(msg))
     verify(not os.path.isdir(file_path),
            "A directory is not a valid xls file path")
     verify(allow_overwrite or not os.path.exists(file_path),
            "The %s path exists and overwrite is not allowed" % file_path)
     if self.tic_dat_factory.generic_tables:
         dat, tdf = utils.create_generic_free(tic_dat, self.tic_dat_factory)
         return tdf.xls.write_file(dat, file_path, allow_overwrite)
     if file_path.endswith(".xls"):
         self._xls_write(tic_dat, file_path)
     else:
         self._xlsx_write(tic_dat, file_path)
예제 #14
0
 def write_file(self, tic_dat, mdb_file_path, allow_overwrite=False):
     """
     write the ticDat data to an SQLite database file
     :param tic_dat: the data object to write
     :param mdb_file_path: the file path of the SQLite database to populate
     :param allow_overwrite: boolean - are we allowed to overwrite pre-existing data
     :return:
     caveats : Numbers with absolute values larger than 1e+100 will
               be written as 1e+100 or -1e+100
     NB - thrown Exceptions of the form "Data type mismatch in criteria expression"
          generally result either from Access's inability to store different data
          types in the same field, or from a mismatch between the data object
          and the default field types ticdat uses when creating an Access schema.
          For the latter, feel free to call the write_schema function on the data
          file first with explicitly identified field types.
     """
     _standard_verify(self.tic_dat_factory.generic_tables)
     msg = []
     if not self.tic_dat_factory.good_tic_dat_object(
             tic_dat, lambda m: msg.append(m)):
         raise TicDatError("Not a valid TicDat object for this schema : " +
                           " : ".join(msg))
     verify(not os.path.isdir(mdb_file_path),
            "A directory is not a valid Access file path")
     if not os.path.exists(mdb_file_path):
         self.write_schema(mdb_file_path)
     table_names = self._check_tables_fields(
         mdb_file_path, self.tic_dat_factory.all_tables)
     with _connect(_connection_str(mdb_file_path)) as con:
         for t in self.tic_dat_factory.all_tables:
             verify(
                 table_names[t] == t,
                 "Failed to find table %s in path %s" % (t, mdb_file_path))
             if not allow_overwrite:
                 with con.cursor() as cur:
                     cur.execute("Select * from %s" % t)
                     verify(
                         not any(True for _ in cur.fetchall()),
                         "allow_overwrite is False, but there are already data records in %s"
                         % t)
             con.cursor().execute("Delete from %s" %
                                  t).commit() if allow_overwrite else None
             _t = getattr(tic_dat, t)
             if dictish(_t):
                 primary_keys = tuple(
                     self.tic_dat_factory.primary_key_fields[t])
                 for pk_row, sql_data_row in _t.items():
                     _items = tuple(sql_data_row.items())
                     fields = _brackets(primary_keys +
                                        tuple(x[0] for x in _items))
                     data_row = ((pk_row,) if len(primary_keys)==1 else pk_row) + \
                               tuple(_write_data(x[1]) for x in _items)
                     assert len(data_row) == len(fields)
                     str = "INSERT INTO %s (%s) VALUES (%s)"%\
                           (t, ",".join(fields), ",".join("?" for _ in fields))
                     con.cursor().execute(str, data_row).commit()
             else:
                 for sql_data_row in (_t if containerish(_t) else _t()):
                     str = "INSERT INTO %s (%s) VALUES (%s)" % (t, ",".join(
                         _brackets(sql_data_row.keys())), ",".join(
                             ["?"] * len(sql_data_row)))
                     con.cursor().execute(
                         str, tuple(map(_write_data,
                                        sql_data_row.values())))
예제 #15
0
    def create_pan_dat(self, xls_file_path, fill_missing_fields=False):
        """
        Create a PanDat object from an Excel file

        :param xls_file_path: An Excel file containing sheets whose names match
                              the table names in the schema.

        :param fill_missing_fields: boolean. If truthy, missing fields will be filled in
                                    with their default value. Otherwise, missing fields
                                    throw an Exception.

        :return: a PanDat object populated by the matching sheets.

        caveats: Missing sheets resolve to an empty table, but missing fields
                 on matching sheets throw an Exception (unless fill_missing_fields is truthy).
                 Table names are matched to sheets with with case-space insensitivity, but spaces and
                 case are respected for field names.
                 (ticdat supports whitespace in field names but not table names).

        Note that if you save a DataFrame to excel and then recover it, the type of data might change. For example

            df = pd.DataFrame({"a":["100", "200", "300"]})
            df.to_excel("something.xlsx")
            df2 = pd.read_excel("something.xlsx")

        results in a numeric column in df2. To address this, you need to either use set_data_type for your
        PanDatFactory.

        This problem is even worse with df = pd.DataFrame({"a":["0100", "1200", "2300"]})
        """
        rtn = {}
        try:
            xl = pd.ExcelFile(xls_file_path)
        except Exception as e:
            raise TicDatError("Unable to open %s as xls file : %s" %
                              (xls_file_path, e))
        for t, s in self._get_sheet_names(xl).items():
            rtn[t] = pd.read_excel(
                xl, s, dtype=self.pan_dat_factory._dtypes_for_pandas_read(t))
        missing_tables = {
            t
            for t in self.pan_dat_factory.all_tables if t not in rtn
        }
        if missing_tables:
            print(
                "The following table names could not be found in the %s file.\n%s\n"
                % (xls_file_path, "\n".join(missing_tables)))
        missing_fields = {(t, f)
                          for t in rtn
                          for f in all_fields(self.pan_dat_factory, t)
                          if f not in rtn[t].columns}
        if fill_missing_fields:
            for t, f in missing_fields:
                rtn[t][f] = self.pan_dat_factory.default_values[t][f]
        verify(
            fill_missing_fields or not missing_fields,
            "The following are (table, field) pairs missing from the %s file.\n%s"
            % (xls_file_path, missing_fields))
        xl.close()
        rtn = _clean_pandat_creator(self.pan_dat_factory,
                                    rtn,
                                    print_missing_tables=True)
        if self.pan_dat_factory.xlsx_trailing_empty_rows == "prune":
            from ticdat.pandatfactory import remove_trailing_all_nan
            for t in self.pan_dat_factory.all_tables:
                setattr(rtn, t, remove_trailing_all_nan(getattr(rtn, t)))
        return rtn