コード例 #1
0
ファイル: jsontd.py プロジェクト: nandi6uc/ticdat
    def _create_jdict(self, path_or_buf):
        if stringish(path_or_buf) and os.path.exists(path_or_buf):
            reasonble_string = path_or_buf
            verify(os.path.isfile(path_or_buf),
                   "json_file_path is not a valid file path.")
            try:
                with open(path_or_buf, "r") as fp:
                    jdict = json.load(fp)
            except Exception as e:
                raise TicDatError("Unable to interpret %s as json file : %s" %
                                  (path_or_buf, e))
        else:
            verify(stringish(path_or_buf), "%s isn't a string" % path_or_buf)
            reasonble_string = path_or_buf[:10]
            try:
                jdict = json.loads(path_or_buf)
            except Exception as e:
                raise TicDatError(
                    "Unable to interpret %s as json string : %s" %
                    (reasonble_string, e))

        verify(dictish(jdict),
               "%s failed to load a dictionary" % reasonble_string)
        verify(
            all(map(stringish, jdict)),
            "The dictionary loaded from %s isn't indexed by strings" %
            reasonble_string)
        verify(
            all(map(containerish, jdict.values())),
            "The dictionary loaded from %s doesn't have containers as values" %
            reasonble_string)
        return jdict
コード例 #2
0
def _read_data_format(x):
    if stringish(x) and x.lower() in ("inf", "-inf"):
        return float(x)
    if stringish(x) and x.lower() == "true":
        return True
    if stringish(x) and x.lower() == "false":
        return False
    return x
コード例 #3
0
 def _read_data_cell(self, t, f, x):
     if stringish(x) and x.lower() in ("inf", "-inf") and self.tic_dat_factory.infinity_io_flag == "N/A" and \
         not (t == "parameters" and self.tic_dat_factory.parameters):
         return float(x)
     if stringish(x) and x.lower() == "true":
         return True
     if stringish(x) and x.lower() == "false":
         return False
     return self.tic_dat_factory._general_read_cell(t, f, x)
コード例 #4
0
 def __init__(self, model_type='gurobi', model_name="model"):
     """
     or another
     :param model_type: either gurobi, cplex or xpress
     :return: a Model object that encapsulates the appropriate engine model object
     """
     engines = {"gurobi": gurobi, "cplex": cplex, "xpress": xpress}
     verify(model_type in engines,
            "engine_type needs to be one of 'gurobi', cplex', 'xpress'")
     verify(
         not utils.stringish(engines[model_type]),
         "You need to have the %s package installed to build this model type."
         % engines[model_type])
     env = {"env": gurobi_env()} if model_type == "gurobi" else {}
     self._core_model = getattr(engines[model_type], {
         "gurobi": "Model",
         "cplex": "Model",
         "xpress": "problem"
     }[model_type])(model_name, **env)
     self._model_type = model_type
     self._sum = ({
         "gurobi": lambda: gurobi.quicksum,
         "cplex": lambda: self.core_model.sum,
         "xpress": lambda: xpress.Sum
     }[model_type])()
コード例 #5
0
ファイル: xls.py プロジェクト: adampkehoe/ticdat
 def _read_cell(x, field):
     dv, dt = self._get_dv_dt(table, field)
     rtn = x[field_indicies[field]]
     if rtn == "" and ((dt and dt.nullable) or (not dt and dv is None)):
         return None
     if treat_inf_as_infinity and utils.stringish(
             rtn) and rtn.lower() in ["inf", "-inf"]:
         return float(rtn.lower())
     if utils.numericish(rtn) and utils.safe_apply(int)(
             rtn) == rtn and dt and dt.must_be_int:
         rtn = int(rtn)
     if rtn == "":
         try_rtn = self.tic_dat_factory._general_read_cell(
             table, field, None)  # None as infinity flagging
         if utils.numericish(try_rtn):
             return try_rtn
     if utils.numericish(rtn) and dt and dt.datetime:
         rtn = utils.safe_apply(
             lambda: xlrd.xldate_as_tuple(rtn, datemode))()
         if rtn is not None:
             f = datetime.datetime
             if utils.pd:
                 f = utils.pd.Timestamp
             return f(year=rtn[0],
                      month=rtn[1],
                      day=rtn[2],
                      hour=rtn[3],
                      minute=rtn[4],
                      second=rtn[5])
     return self.tic_dat_factory._general_read_cell(table, field, rtn)
コード例 #6
0
    def set_ampl_data(self, ampl_dat, ampl, table_to_set_name = None):
        """
        performs bulk setData on the AMPL-esque first argument.

        :param ampl_dat: an AmplTicDat object created by calling copy_to_ampl

        :param ampl: an amplpy.AMPL object

        :param table_to_set_name: a mapping of table_name to ampl set name

        :return:
        """
        verify(all(a.startswith("_") or a in self.all_tables for a in dir(ampl_dat)),
               "bad ampl_dat argument")
        verify(hasattr(ampl, "setData"), "bad ampl argument")
        table_to_set_name = table_to_set_name or {}
        verify(dictish(table_to_set_name) and all(hasattr(ampl_dat, k) and
                   utils.stringish(v) for k,v in table_to_set_name.items()),
               "bad table_to_set_name argument")
        for t in set(self.all_tables).intersection(dir(ampl_dat)):
            try:
                ampl.setData(getattr(ampl_dat, t), *([table_to_set_name[t]]
                                                if t in table_to_set_name else []))
            except:
                raise utils.TicDatError(t + " cannot be passed as an argument to AMPL.setData()")
コード例 #7
0
def read_lingo_text(tdf,results_text):
    """
    Read Lingo .ldt strings
    :param tdf: A TicDatFactory defining the schema
    :param results_text: A list of strings defining Lingo tables
    :return: A TicDat object consistent with tdf
    """

    for i in results_text.values():
        verify(stringish(i), "text needs to be a string")

    def _get_as_type(val):
        try:
            return float(val)
        except ValueError:
            return val

    dict_with_lists = defaultdict(list)

    for tbn in results_text:
        rows = []
        text = results_text[tbn].strip().split("\n")
        for line in text:
            rows.append(list(map(lambda k: _get_as_type(k),line.strip().split())))
        dict_with_lists[tbn] = rows


    assert not find_duplicates_from_dict_ticdat(tdf, dict_with_lists), \
            "duplicates were found - if asserts are disabled, duplicate rows will overwrite"

    return tdf.TicDat(**{k.replace(tdf.lingo_prepend,"",1):v for k,v in dict_with_lists.items()})
コード例 #8
0
ファイル: jsontd.py プロジェクト: nandi6uc/ticdat
 def _create_tic_dat_dict(self, jdict):
     tdf = self.tic_dat_factory
     rtn = {}
     table_keys = defaultdict(list)
     for t in tdf.all_tables:
         for t2 in jdict:
             if stringish(t2) and t.lower() == t2.replace(" ", "_").lower():
                 table_keys[t].append(t2)
         if len(table_keys[t]) >= 1:
             verify(
                 len(table_keys[t]) < 2,
                 "Found duplicate matching keys for table %s" % t)
             rtn[t] = jdict[table_keys[t][0]]
     orig_rtn, rtn = rtn, {}
     for t, rows in orig_rtn.items():
         all_fields = tdf.primary_key_fields.get(t,
                                                 ()) + tdf.data_fields.get(
                                                     t, ())
         rtn[t] = []
         for row in rows:
             if dictish(row):
                 rtn[t].append({
                     f: tdf._general_read_cell(t, f, x)
                     for f, x in row.items()
                 })
             else:
                 rtn[t].append([
                     tdf._general_read_cell(t, f, x)
                     for f, x in zip(all_fields, row)
                 ])
     return rtn
コード例 #9
0
ファイル: xls.py プロジェクト: nandi6uc/ticdat
 def _read_cell(x, field):
     # reminder - data fields have a default default of zero, primary keys don't get a default default
     dv = self.tic_dat_factory.default_values.get(table, {}).get(
         field, ["LIST", "NOT", "POSSIBLE"])
     dt = self.tic_dat_factory.data_types.get(table, {}).get(field)
     rtn = x[field_indicies[field]]
     if rtn == "" and ((dt and dt.nullable) or (not dt and dv is None)):
         return None
     if treat_inf_as_infinity and utils.stringish(
             rtn) and rtn.lower() in ["inf", "-inf"]:
         return float(rtn.lower())
     if utils.numericish(rtn) and utils.safe_apply(int)(
             rtn) == rtn and dt and dt.must_be_int:
         rtn = int(rtn)
     if rtn == "":
         try_rtn = self.tic_dat_factory._general_read_cell(
             table, field, None)  # None as infinity flagging
         if utils.numericish(try_rtn):
             return try_rtn
     if utils.numericish(rtn) and dt and dt.datetime:
         rtn = utils.safe_apply(
             lambda: xlrd.xldate_as_tuple(rtn, datemode))()
         if rtn is not None:
             f = datetime.datetime
             if utils.pd:
                 f = utils.pd.Timestamp
             return f(year=rtn[0],
                      month=rtn[1],
                      day=rtn[2],
                      hour=rtn[3],
                      minute=rtn[4],
                      second=rtn[5])
     return self.tic_dat_factory._general_read_cell(table, field, rtn)
コード例 #10
0
def convert_to_dicts_that_can_be_turned_into_DataFrames(
        tdf, dat, field_renamings=None):
    '''
    utility routine to help de-ticdat-ify small examples so that they can then be passed to
    amplpy team in a more easily understood notebook example with hard coded data.
    the inner dicts returned below can each be passed as an argument to pandas.DataFrame, and from there
    the `set_ampl_data` logic can be broken out explicitly
    :param tdf: a TicDatFactory
    :param dat: a TicDat object created by tdf
    :param field_renamings: the same argument used by copy_to_ampl
    :return:
    '''
    assert utils.dictish(field_renamings) and \
       all(utils.containerish(k) and len(k) == 2 and k[0] in tdf.all_tables and
           k[1] in tdf.primary_key_fields[k[0]] + tdf.data_fields[k[0]] and
           utils.stringish(v) and v not in tdf.primary_key_fields[k[0]] + tdf.data_fields[k[0]]
           for k,v in field_renamings.items()), "invalid field_renamings argument"
    dat = tdf.copy_to_pandas(dat, drop_pk_columns=False)

    def do_renames(t, df):
        for f in tdf.primary_key_fields[t] + tdf.data_fields[t]:
            if (t, f) in (field_renamings or []):
                df[field_renamings[t, f]] = df[f]
                df.drop(f, axis=1, inplace=True)
        return df

    rtn = {
        t: do_renames(t,
                      getattr(dat, t).reset_index(drop=True)).to_dict()
        for t in tdf.all_tables
    }
    return rtn
コード例 #11
0
ファイル: testmodel.py プロジェクト: austin-bren/ticdat
 def testGurobi(self):
     self.assertFalse(utils.stringish(gurobi))
     self._testCog("gurobi")
     self._testDiet("gurobi")
     self._testNetflow("gurobi")
     self._testFantop("gurobi")
     self._testParameters("gurobi")
コード例 #12
0
ファイル: testmodel.py プロジェクト: austin-bren/ticdat
 def testCplex(self):
     self.assertFalse(utils.stringish(cplex))
     self._testCog("cplex")
     self._testDiet("cplex")
     self._testNetflow("cplex")
     self._testFantop("cplex")
     self._testParameters("cplex")
コード例 #13
0
def _insert_format(x):
    # note that [1==True, 0==False, 1 is not True, 0 is not False] is all part of Python
    if stringish(x):
        return "'%s'" % _fix_str(x)
    if x in (float("inf"), -float("inf")) or isinstance(x, datetime.datetime):
        return "'%s'" % x
    if x is None:
        return "null"
    return str(x)
コード例 #14
0
ファイル: pgtd.py プロジェクト: austin-bren/ticdat
 def db_default(t, f):
     rtn = self.tdf.default_values[t][f]
     if forced_field_types.get((t, f)) in ("bool", "boolean"):
         return bool(rtn)
     if rtn is None or rtn == "":
         return "NULL"
     if stringish(rtn) and rtn:
         return f"'{rtn}'"
     return rtn
コード例 #15
0
    def set_data_type(self, table, field, number_allowed = True,
                      inclusive_min = True, inclusive_max = False, min = 0, max = float("inf"),
                      must_be_int = False, strings_allowed= (), nullable = False):
        """
        sets the data type for a field. By default, fields don't have types. Adding a data type doesn't block
        data of the wrong type from being entered. Data types are useful for recognizing errant data entries
        with find_data_type_failures(). Errant data entries can be replaced with replace_data_type_failures().

        :param table: a table in the schema

        :param field: a data field for this table

        :param number_allowed: boolean does this field allow numbers?

        :param inclusive_min: boolean : if number allowed, is the min inclusive?

        :param inclusive_max: boolean : if number allowed, is the max inclusive?

        :param min: if number allowed, the minimum value

        :param max: if number allowed, the maximum value

        :param must_be_int: boolean : if number allowed, must the number be integral?

        :param strings_allowed: if a collection - then a list of the strings allowed.
                                The empty collection prohibits strings.
                                If a "*", then any string is accepted.
        :param nullable : boolean : can this value contain null (aka None aka nan (since pandas treats null as nan))

        :return:
        """
        verify(not self._has_been_used,
               "The data types can't be changed after a PanDatFactory has been used.")
        verify(table in self.all_tables, "Unrecognized table name %s"%table)
        verify(table not in self.generic_tables, "Cannot set data type for generic table")
        verify(field in self.data_fields[table] + self.primary_key_fields[table],
               "%s does not refer to a field for %s"%(field, table))

        verify((strings_allowed == '*') or
               (containerish(strings_allowed) and all(utils.stringish(x) for x in strings_allowed)),
"""The strings_allowed argument should be a container of strings, or the single '*' character.""")
        if utils.containerish(strings_allowed):
            strings_allowed = tuple(strings_allowed) # defensive copy
        if number_allowed:
            verify(utils.numericish(max), "max should be numeric")
            verify(utils.numericish(min), "min should be numeric")
            verify(max >= min, "max cannot be smaller than min")
            self._data_types[table][field] = TypeDictionary(number_allowed=True,
                strings_allowed=strings_allowed,  nullable = bool(nullable),
                min = min, max = max, inclusive_min= bool(inclusive_min), inclusive_max = bool(inclusive_max),
                must_be_int = bool(must_be_int))
        else :
            self._data_types[table][field] = TypeDictionary(number_allowed=False,
                strings_allowed=strings_allowed,  nullable = bool(nullable),
                min = 0, max = float("inf"), inclusive_min= True, inclusive_max = True,
                must_be_int = False)
コード例 #16
0
ファイル: xls.py プロジェクト: nandi6uc/ticdat
    def write_file(self,
                   tic_dat,
                   file_path,
                   allow_overwrite=False,
                   case_space_sheet_names=False):
        """
        write the ticDat data to an excel file

        :param tic_dat: the data object to write (typically a TicDat)

        :param file_path: The file path of the excel file to create
                          Needs to end in either ".xls" or ".xlsx"
                          The latter is capable of writing out larger tables,
                          but the former handles infinity seamlessly.
                          If ".xlsx", then be advised that +/- float("inf") will be replaced
                          with "inf"/"-inf", unless infinity_io_flag is being applied.

        :param allow_overwrite: boolean - are we allowed to overwrite an
                                existing file?
              case_space_sheet_names: boolean - make best guesses how to add spaces and upper case
                                      characters to sheet names

        :return:

        caveats: None may be written out as an empty string. This reflects the behavior of xlwt.
        """
        self._verify_differentiable_sheet_names()
        verify(
            utils.stringish(file_path)
            and (file_path.endswith(".xls") or file_path.endswith(".xlsx")),
            "file_path argument needs to end in .xls or .xlsx")
        msg = []
        if not self.tic_dat_factory.good_tic_dat_object(
                tic_dat, lambda m: msg.append(m)):
            raise TicDatError("Not a valid ticDat object for this schema : " +
                              " : ".join(msg))
        verify(not os.path.isdir(file_path),
               "A directory is not a valid xls file path")
        verify(allow_overwrite or not os.path.exists(file_path),
               "The %s path exists and overwrite is not allowed" % file_path)
        if self.tic_dat_factory.generic_tables:
            dat, tdf = utils.create_generic_free(tic_dat, self.tic_dat_factory)
            return tdf.xls.write_file(dat, file_path, allow_overwrite,
                                      case_space_sheet_names)
        case_space_sheet_names = case_space_sheet_names and \
                                 len(set(self.tic_dat_factory.all_tables)) == \
                                 len(set(map(case_space_to_pretty, self.tic_dat_factory.all_tables)))
        tbl_name_mapping = {
            t: case_space_to_pretty(t) if case_space_sheet_names else t
            for t in self.tic_dat_factory.all_tables
        }
        if file_path.endswith(".xls"):
            self._xls_write(tic_dat, file_path, tbl_name_mapping)
        else:
            self._xlsx_write(tic_dat, file_path, tbl_name_mapping)
コード例 #17
0
 def _create_tic_dat_dict(self, jdict):
     tdf = self.tic_dat_factory
     rtn = {}
     table_keys = defaultdict(list)
     for t in tdf.all_tables:
         for t2 in jdict:
             if stringish(t2) and t.lower() == t2.replace(" ", "_").lower():
                 table_keys[t].append(t2)
         verify(len(table_keys[t]) >= 1, "Unable to find a matching key for table %s"%t)
         verify(len(table_keys[t]) < 2, "Found duplicate matching keys for table %s"%t)
         rtn[t] = jdict[table_keys[t][0]]
     return rtn
コード例 #18
0
ファイル: lingo.py プロジェクト: vn8317x/opalytics-ticdat
def create_lingo_text(tdf, tic_dat, infinity=INFINITY):
    """
    Generate a Lingo .ldt string from a TicDat object

    :param tdf: A TicDatFactory defining the schema

    :param tic_dat: A TicDat object consistent with tdf

    :param infinity: A number used to represent infinity in lingo

    :return: A string consistent with the Lingo .ldt format
    """
    msg = []
    verify(tdf.good_tic_dat_object(tic_dat, msg.append),
           "tic_dat not a good object for this factory : %s" % "\n".join(msg))
    verify(not tdf.generator_tables, "doesn't work with generator tables.")
    verify(
        not tdf.generic_tables,
        "doesn't work with generic tables. (not yet - will add ASAP as needed) "
    )
    dict_with_lists = defaultdict(list)
    dict_tables = {t for t, pk in tdf.primary_key_fields.items() if pk}
    prepend = getattr(tdf, "lingo_prepend", "")
    for t in dict_tables:
        for k, r in getattr(tic_dat, t).items():
            row = list(k) if containerish(k) else [k]
            for f in tdf.data_fields.get(t, []):
                row.append(r[f])
            dict_with_lists[t].append(row)
    for t in set(tdf.all_tables).difference(dict_tables):
        for r in getattr(tic_dat, t):
            row = [r[f] for f in tdf.data_fields[t]]
            dict_with_lists[t].append(row)
    rtn = "data:\n"
    for t in _sorted_tables(tdf):
        rtn += "%s" % (prepend + t)
        for field in tdf.data_fields[t]:
            rtn += ',' + prepend + t + "_" + field.replace(" ", "_").lower()
        rtn += "=\n"
        for row in dict_with_lists[t]:
            rtn += "\t"
            for field in row:
                if stringish(field):
                    rtn += field + " "
                else:
                    rtn += str(infinity) if float(
                        'inf') == field else str(field) + " "
            rtn += "\n"
        rtn += ";\n"
    rtn += "enddata"
    return rtn
コード例 #19
0
    def copy_to_ampl(self, pan_dat, field_renamings = None, excluded_tables = None):
        """
        copies the pan_dat object into a new pan_dat object populated with amplpy.DataFrame objects
        performs a deep copy

        :param pan_dat: a PanDat object

        :param field_renamings: dict or None. If fields are to be renamed in the copy, then
                                a mapping from (table_name, field_name) -> new_field_name
                                If a data field is to be omitted, then new_field can be falsey
                                table_name cannot refer to an excluded table. (see below)
                                field_name doesn't have to refer to a field to an element of
                                self.data_fields[t], but it doesn't have to refer to a column in
                                the pan_dat.table_name DataFrame

        :param excluded_tables: If truthy, a list of tables to be excluded from the copy.
                                Tables without primary key fields are always excluded.

        :return: a deep copy of the tic_dat argument into amplpy.DataFrames
        """
        verify(amplpy, "amplpy needs to be installed in order to enable AMPL functionality")
        msg  = []
        verify(self.good_pan_dat_object(pan_dat, msg.append),
               "pan_dat not a good object for this factory : %s"%"\n".join(msg))
        verify(not excluded_tables or (containerish(excluded_tables) and
                                       set(excluded_tables).issubset(self.all_tables)),
               "bad excluded_tables argument")
        copy_tables = {t for t in self.all_tables if self.primary_key_fields[t]}.\
                      difference(excluded_tables or [])
        field_renamings = field_renamings or {}
        verify(dictish(field_renamings), "invalid field_renamings argument")
        for k,v in field_renamings.items():
            verify(containerish(k) and len(k) == 2 and k[0] in copy_tables and
                   k[1] in getattr(pan_dat, k[0]).columns and
                   ((v and utils.stringish(v)) or (not bool(v) and k[1] not in self.primary_key_fields[k[0]])),
                   "invalid field_renamings argument %s:%s"%(k,v))
        class AmplPanDat(object):
            def __repr__(self):
                return "td:" + tuple(copy_tables).__repr__()
        rtn = AmplPanDat()
        for t in copy_tables:
            rename = lambda f : field_renamings.get((t, f), f)
            df_ampl = amplpy.DataFrame(index=tuple(map(rename, self.primary_key_fields[t])))
            for f in self.primary_key_fields[t]:
                df_ampl.setColumn(rename(f), list(getattr(pan_dat, t)[f]))
            for f in {f for _t,f in field_renamings if _t == t}.union(self.data_fields[t]):
                if rename(f):
                    df_ampl.addColumn(rename(f), list(getattr(pan_dat, t)[f]))
            setattr(rtn, t, df_ampl)
        return rtn
コード例 #20
0
ファイル: pandatio.py プロジェクト: vn8317x/opalytics-ticdat
    def create_pan_dat(self, path_or_buf, fill_missing_fields=False, orient='split', **kwargs):
        """
        Create a PanDat object from a SQLite database file

        :param path_or_buf:  a valid JSON string or file-like

        :param fill_missing_fields: boolean. If truthy, missing fields will be filled in
                                    with their default value. Otherwise, missing fields
                                    throw an Exception.

        :param orient: Indication of expected JSON string format. See pandas.read_json for more details.

        :param kwargs: additional named arguments to pass to pandas.read_json

        :return: a PanDat object populated by the matching tables.

        caveats: Missing tables always throw an Exception.
                 Table names are matched with case-space insensitivity, but spaces
                 are respected for field names.
                 (ticdat supports whitespace in field names but not table names).
                 +- "inf", "-inf" strings will be converted to +-float("inf")
        """
        if os.path.exists(path_or_buf):
            verify(os.path.isfile(path_or_buf), "%s appears to be a directory and not a file." % path_or_buf)
            with open(path_or_buf, "r") as f:
                loaded_dict = json.load(f)
        else:
            verify(stringish(path_or_buf), "%s isn't a string" % path_or_buf)
            loaded_dict = json.loads(path_or_buf)
        verify(dictish(loaded_dict), "path_or_buf to json.load as a dict")
        verify(all(map(dictish, loaded_dict.values())),
               "the json.load result doesn't resolve to a dictionary whose values are themselves dictionaries")

        tbl_names = self._get_table_names(loaded_dict)
        verify("orient" not in kwargs, "orient should be passed as a non-kwargs argument")
        rtn = {t: pd.read_json(json.dumps(loaded_dict[f]), orient=orient, **kwargs) for t,f in tbl_names.items()}
        missing_fields = {(t, f) for t in rtn for f in all_fields(self.pan_dat_factory, t)
                          if f not in rtn[t].columns}
        if fill_missing_fields:
            for t,f in missing_fields:
                rtn[t][f] = self.pan_dat_factory.default_values[t][f]
        verify(fill_missing_fields or not missing_fields,
               "The following (table, field) pairs are missing fields.\n%s" % [(t, f) for t,f in missing_fields])
        for v in rtn.values():
            v.replace("inf", float("inf"), inplace=True)
            v.replace("-inf", -float("inf"), inplace=True)
        rtn = self.pan_dat_factory.PanDat(**rtn)
        msg = []
        assert self.pan_dat_factory.good_pan_dat_object(rtn, msg.append), str(msg)
        return rtn
コード例 #21
0
ファイル: opl.py プロジェクト: nandi6uc/ticdat
def create_opl_text(tdf, tic_dat, infinity=INFINITY):
    """
    Generate a OPL .dat string from a TicDat object
    :param tdf: A TicDatFactory defining the schema
    :param tic_dat: A TicDat object consistent with tdf
    :param infinity: A number used to represent infinity in OPL
    :return: A string consistent with the OPL .dat format
    """
    msg = []
    verify(tdf.good_tic_dat_object(tic_dat, msg.append),
           "tic_dat not a good object for this factory : %s" % "\n".join(msg))
    verify(not tdf.generator_tables, "doesn't work with generator tables.")
    verify(
        not tdf.generic_tables,
        "doesn't work with generic tables. (not yet - will add ASAP as needed) "
    )
    dict_with_lists = defaultdict(list)
    dict_tables = {t for t, pk in tdf.primary_key_fields.items() if pk}
    for t in dict_tables:
        for k, r in getattr(tic_dat, t).items():
            row = list(k) if containerish(k) else [k]
            for f in tdf.data_fields.get(t, []):
                row.append(r[f])
            dict_with_lists[t].append(row)
    for t in set(tdf.all_tables).difference(dict_tables):
        for r in getattr(tic_dat, t):
            row = [r[f] for f in tdf.data_fields[t]]
            dict_with_lists[t].append(row)

    rtn = ""
    for i, (t, l) in enumerate(dict_with_lists.items()):
        rtn += "\n" if i > 0 else ""
        rtn += "%s = {" % (tdf.opl_prepend + t)
        if len(l[0]) > 1:
            rtn += "\n"
        for x in range(len(l)):
            r = l[x]
            if len(r) > 1:
                rtn += "<"
            for i, v in enumerate(r):
                rtn += ('"%s"' % v if stringish(v) else
                        (str(infinity) if float('inf') == v else str(v))) + (
                            ", " if i < len(r) - 1 else "")
            if len(r) == 1 and len(l) - 1 != x:
                rtn += ', '
            if len(r) > 1:
                rtn += ">\n"
        rtn += "};\n"

    return rtn
コード例 #22
0
ファイル: xls.py プロジェクト: austin-bren/ticdat
 def _read_cell(x, field):
     dv, dt = self._get_dv_dt(table, field)
     rtn = x[field_indicies[field]]
     if rtn == "" and ((dt and dt.nullable) or (not dt and dv is None)):
         return None
     if treat_inf_as_infinity and utils.stringish(rtn) and rtn.lower() in ["inf", "-inf"]:
         return float(rtn.lower())
     if utils.numericish(rtn) and utils.safe_apply(int)(rtn) == rtn and dt and dt.must_be_int:
         rtn = int(rtn)
     if rtn == "":
         try_rtn = self.tic_dat_factory._general_read_cell(table, field, None) # None as infinity flagging
         if utils.numericish(try_rtn):
             return try_rtn
     if utils.numericish(rtn) and dt and dt.datetime and hasattr(sheet, "xldate_as_tuple_munge"):
         rtn = sheet.xldate_as_tuple_munge(rtn)
     return self.tic_dat_factory._general_read_cell(table, field, rtn)
コード例 #23
0
ファイル: xls.py プロジェクト: nandi6uc/ticdat
 def _get_sheets_and_fields(self,
                            xls_file_path,
                            all_tables,
                            row_offsets,
                            headers_present,
                            print_missing_tables=False):
     verify(
         utils.stringish(xls_file_path) and os.path.exists(xls_file_path),
         "xls_file_path argument %s is not a valid file path." %
         xls_file_path)
     try:
         book = xlrd.open_workbook(xls_file_path)
     except Exception as e:
         raise TicDatError("Unable to open %s as xls file : %s" %
                           (xls_file_path, e))
     sheets = defaultdict(list)
     for table, sheet in product(all_tables, book.sheets()):
         if table.lower()[:_longest_sheet] == sheet.name.lower().replace(
                 ' ', '_')[:_longest_sheet]:
             sheets[table].append(sheet)
     duplicated_sheets = tuple(_t for _t, _s in sheets.items()
                               if len(_s) > 1)
     verify(
         not duplicated_sheets,
         "The following sheet names were duplicated : " +
         ",".join(duplicated_sheets))
     sheets = FrozenDict({k: v[0] for k, v in sheets.items()})
     missing_tables = {t for t in all_tables if t not in sheets}
     if missing_tables and print_missing_tables:
         print(
             "The following table names could not be found in the %s file.\n%s\n"
             % (xls_file_path, "\n".join(missing_tables)))
     field_indicies, missing_fields, dup_fields = {}, {}, {}
     for table, sheet in sheets.items():
         field_indicies[table], missing_fields[table], dup_fields[table] = \
             self._get_field_indicies(table, sheet, row_offsets[table], headers_present)
     verify(
         not any(_ for _ in missing_fields.values()),
         "The following field names could not be found : \n" +
         "\n".join("%s : " % t + ",".join(bf)
                   for t, bf in missing_fields.items() if bf))
     verify(
         not any(_ for _ in dup_fields.values()),
         "The following field names were duplicated : \n" +
         "\n".join("%s : " % t + ",".join(bf)
                   for t, bf in dup_fields.items() if bf))
     return sheets, field_indicies, book.datemode
コード例 #24
0
def _try_create_space_case_mapping(tdf, ticdat):
    '''
    :param tdf: a TicDatFactory
    :param ticdat: a ticdat for the tdf
    :return: {"mapping:mapping} if a good mapping can be made, else {"failures":failures}
    '''
    assert tdf.good_tic_dat_object(ticdat), "ticdat not a good object for the tdf"
    rtn = defaultdict(set)
    for t in tdf.all_tables:
        if tdf.primary_key_fields.get(t):
            for ks in getattr(ticdat, t):
                for k in (ks if containerish(ks) else [ks]):
                    if stringish(k):
                        newk = ''.join(list(map(lambda c: c.upper() if c.isalnum() else '_', k)))
                        rtn[newk].add(k)
    failures = {k:tuple(sorted(v)) for k,v in rtn.items() if len(v) > 1}
    if failures:
        return {"failures":failures}
    return {"mapping": {k:next(iter(v)) for k,v in rtn.items()}}
コード例 #25
0
ファイル: xls.py プロジェクト: austin-bren/ticdat
 def _get_sheets_and_fields(self, xls_file_path, all_tables, row_offsets, headers_present,
                            print_missing_tables = False):
     verify(utils.stringish(xls_file_path) and os.path.exists(xls_file_path),
            "xls_file_path argument %s is not a valid file path."%xls_file_path)
     try :
         book = xlrd.open_workbook(xls_file_path) if xls_file_path.endswith(".xls") else \
             openpyxl.load_workbook(xls_file_path, data_only=True)
     except Exception as e:
         raise TicDatError("Unable to open %s as xls file : %s"%(xls_file_path, e))
     sheet_name = lambda sheet: sheet.name if xls_file_path.endswith(".xls") else sheet.title
     sheets = defaultdict(list)
     book_sheets = lambda: book.sheets() if xls_file_path.endswith(".xls") else book.worksheets
     for table, sheet in product(all_tables, book_sheets()) :
         if table.lower()[:_longest_sheet] == sheet_name(sheet).lower().replace(' ', '_')[:_longest_sheet]:
             sheets[table].append(sheet)
     duplicated_sheets = tuple(_t for _t,_s in sheets.items() if len(_s) > 1)
     verify(not duplicated_sheets, "The following sheet names were duplicated s: " +
            ",".join(duplicated_sheets))
     wrapped_sheet = lambda sheet: _XlrdSheetWrapper(sheet, book.datemode) if xls_file_path.endswith(".xls") else \
                                   _OpenPyxlSheetWrapper(sheet, prune_trailing_empty_rows=
                                     self.tic_dat_factory.xlsx_trailing_empty_rows == "prune")
     sheets = FrozenDict({k: wrapped_sheet(v[0]) for k,v in sheets.items()})
     missing_tables = {t for t in all_tables if t not in sheets}
     if missing_tables and print_missing_tables:
         print ("The following table names could not be found in the %s file.\n%s\n"%
                (xls_file_path,"\n".join(missing_tables)))
     field_indicies, missing_fields, dup_fields = {}, {}, {}
     for table, sheet in sheets.items() :
         field_indicies[table], missing_fields[table], dup_fields[table] = \
             self._get_field_indicies(table, sheet, row_offsets[table], headers_present)
     verify(not any(_ for _ in missing_fields.values()),
            "The following field names could not be found : \n" +
            "\n".join("%s : "%t + ",".join(bf) for t,bf in missing_fields.items() if bf))
     verify(not any(_ for _ in dup_fields.values()),
            "The following field names were duplicated : \n" +
            "\n".join("%s : "%t + ",".join(bf) for t,bf in dup_fields.items() if bf))
     return sheets, field_indicies
コード例 #26
0
 def write_file(self, tic_dat, file_path, allow_overwrite=False):
     """
     write the ticDat data to an excel file
     :param tic_dat: the data object to write (typically a TicDat)
     :param file_path: The file path of the excel file to create
                       Needs to end in either ".xls" or ".xlsx"
                       The latter is capable of writing out larger tables,
                       but the former handles infinity seamlessly.
                       If ".xlsx", then be advised that +/- float("inf") will be replaced
                       with +/- 1e+100
     :param allow_overwrite: boolean - are we allowed to overwrite an
                             existing file?
     :return:
     caveats: None may be written out as an empty string. This reflects the behavior of xlwt.
     """
     self._verify_differentiable_sheet_names()
     verify(
         utils.stringish(file_path)
         and (file_path.endswith(".xls") or file_path.endswith(".xlsx")),
         "file_path argument needs to end in .xls or .xlsx")
     msg = []
     if not self.tic_dat_factory.good_tic_dat_object(
             tic_dat, lambda m: msg.append(m)):
         raise TicDatError("Not a valid ticDat object for this schema : " +
                           " : ".join(msg))
     verify(not os.path.isdir(file_path),
            "A directory is not a valid xls file path")
     verify(allow_overwrite or not os.path.exists(file_path),
            "The %s path exists and overwrite is not allowed" % file_path)
     if self.tic_dat_factory.generic_tables:
         dat, tdf = utils.create_generic_free(tic_dat, self.tic_dat_factory)
         return tdf.xls.write_file(dat, file_path, allow_overwrite)
     if file_path.endswith(".xls"):
         self._xls_write(tic_dat, file_path)
     else:
         self._xlsx_write(tic_dat, file_path)
コード例 #27
0
def _deep_anonymize(x)  :
    if not hasattr(x, "__contains__") or utils.stringish(x):
        return x
    if utils.dictish(x) :
        return {_deep_anonymize(k):_deep_anonymize(v) for k,v in x.items()}
    return list(map(_deep_anonymize,x))
コード例 #28
0
def read_opl_text(tdf,text, commaseperator = True):
    """
    Read an OPL .dat string
    :param tdf: A TicDatFactory defining the schema
    :param text: A string consistent with the OPL .dat format
    :return: A TicDat object consistent with tdf
    """
    verify(stringish(text), "text needs to be a string")
    # probably want to verify something about the ticdat factory, look at the wiki
    dict_with_lists = defaultdict(list)
    NONE, TABLE, ROW, ROWSTRING, ROWNUM, FIELD, STRING,  NUMBER = 1, 2, 3, 4, 5, 6, 7, 8
    mode = NONE
    field = ''
    table_name = ''
    row = []

    def to_number(st, pos):
        try:
            return float(st)
        except ValueError:
            verify(False,
                   "Badly formatted string - Field '%s' is not a valid number. Character position [%s]." % (st, pos))

    for i,c in enumerate(text):
        if mode not in [STRING, ROWSTRING] and (c.isspace() or c == '{' or c == ';'):
            if mode in [NUMBER, ROWNUM, FIELD] and not commaseperator:
                c = ','
            else:
                continue
        if mode in [STRING, ROWSTRING]:
            if c == '"':
                if text[i-1] == '\\':
                    field = field[:-1] + '"'
                else:
                    if mode is ROWSTRING:
                        row.append(field)
                        field = ''
                        verify(len(row) == len((dict_with_lists[table_name] or [row])[0]),
                               "Inconsistent row lengths found for table %s" % table_name)
                        dict_with_lists[table_name].append(row)
                        row = []
                        mode = TABLE
                    else:
                        mode = FIELD
            else:
                field += c
        elif c == '=':
            verify(mode is NONE, "Badly formatted string, unrecognized '='. Character position [%s]"%i)
            verify(len(table_name) > 0, "Badly formatted string, table name can't be blank. Character position [%s]"%i)
            verify(table_name not in dict_with_lists.keys(), "Can't have duplicate table name. [Character position [%s]"%i)
            dict_with_lists[table_name] = []
            mode = TABLE
        elif c == '<':
            verify(mode is TABLE, "Badly formatted string, unrecognized '<'. Character position [%s]"%i)
            mode = ROW

        elif c == ',':
            verify(mode in [ROW, FIELD, NUMBER, ROWNUM, TABLE], "Badly formatted string, unrecognized ','. \
                                                                    Character position [%s]"%i)
            if mode is TABLE:
                continue
            if mode is ROWNUM:
                field = to_number(field,i)
                row.append(field)
                field = ''
                verify(len(row) == len((dict_with_lists[table_name] or [row])[0]),
                       "Inconsistent row lengths found for table %s" % table_name)
                dict_with_lists[table_name].append(row)
                row = []
                mode = TABLE
            else:
                if mode is NUMBER:
                    field = to_number(field,i)
                row.append(field)
                field = ''
                mode = ROW

        elif c == '"':
            verify(mode in [ROW, TABLE], "Badly formatted string, unrecognized '\"'. Character position [%s]"%i)
            if mode is ROW:
                mode = STRING
            if mode is TABLE:
                mode = ROWSTRING

        elif c == '}':
            verify(mode in [TABLE, ROWNUM], "Badly formatted string, unrecognized '}'. Character position [%s]"%i)
            if mode is ROWNUM:
                field = to_number(field,i)
                row.append(field)
                field = ''
                verify(len(row) == len((dict_with_lists[table_name] or [row])[0]),
                       "Inconsistent row lengths found for table %s" % table_name)
                dict_with_lists[table_name].append(row)
            row = []
            table_name = ''
            mode = NONE

        elif c == '>':
            verify(mode in [ROW, FIELD, NUMBER], "Badly formatted string, unrecognized '>'. \
                                                                    Character position [%s]"%i)
            if mode is NUMBER:
                field = to_number(field,i)
                mode = FIELD
            if mode is FIELD:
                row.append(field)
                field = ''
            verify(len(row) == len((dict_with_lists[table_name] or [row])[0]),
                   "Inconsistent row lengths found for table %s"%table_name)
            dict_with_lists[table_name].append(row)
            row = []
            mode = TABLE
        else:
            verify(mode in [NONE, ROW, ROWNUM, FIELD, NUMBER], "Badly formatted string, \
                                                                    unrecognized '%s'. Character position [%s]"%(c,i))
            if mode is NONE:
                table_name += c
            elif mode is TABLE:
                mode = ROWNUM
                field += c
            else:
                mode = NUMBER
                field += c
    assert not find_duplicates_from_dict_ticdat(tdf, dict_with_lists), \
            "duplicates were found - if asserts are disabled, duplicate rows will overwrite"

    return tdf.TicDat(**{k.replace(tdf.opl_prepend,"",1):v for k,v in dict_with_lists.items()})
コード例 #29
0
    def create_pan_dat(self,
                       path_or_buf,
                       fill_missing_fields=False,
                       orient='split',
                       **kwargs):
        """
        Create a PanDat object from a JSON file or string

        :param path_or_buf:  a valid JSON string or file-like

        :param fill_missing_fields: boolean. If truthy, missing fields will be filled in
                                    with their default value. Otherwise, missing fields
                                    throw an Exception.

        :param orient: Indication of expected JSON string format. See pandas.read_json for more details.

        :param kwargs: additional named arguments to pass to pandas.read_json

        :return: a PanDat object populated by the matching tables.

        caveats: Missing tables always resolve to an empty table.

                 Table names are matched with case-space insensitivity, but spaces
                 are respected for field names.

                 (ticdat supports whitespace in field names but not table names).

        Note that if you save a DataFrame to json and then recover it, the type of data might change.
        Specifically, text that looks numeric might be recovered as a number, to include the loss of leading zeros.
        To address this, you need to either use set_data_type for your
        PanDatFactory, or specify "dtype" in kwargs. (The former is obviously better).
        """
        if stringish(path_or_buf) and os.path.exists(path_or_buf):
            verify(
                os.path.isfile(path_or_buf),
                "%s appears to be a directory and not a file." % path_or_buf)
            with open(path_or_buf, "r") as f:
                loaded_dict = json.load(f)
        else:
            verify(stringish(path_or_buf), "%s isn't a string" % path_or_buf)
            loaded_dict = json.loads(path_or_buf)
        verify(dictish(loaded_dict),
               "the json.load result doesn't resolve to a dictionary")
        verify(
            all(map(dictish, loaded_dict.values())),
            "the json.load result doesn't resolve to a dictionary whose values are themselves dictionaries"
        )

        tbl_names = self._get_table_names(loaded_dict)
        verify("orient" not in kwargs,
               "orient should be passed as a non-kwargs argument")
        rtn = {}
        for t, f in tbl_names.items():
            kwargs_ = dict(kwargs)
            if "dtype" not in kwargs_:
                kwargs_[
                    "dtype"] = self.pan_dat_factory._dtypes_for_pandas_read(t)
            rtn[t] = pd.read_json(json.dumps(loaded_dict[f]),
                                  orient=orient,
                                  **kwargs_)
        missing_fields = {(t, f)
                          for t in rtn
                          for f in all_fields(self.pan_dat_factory, t)
                          if f not in rtn[t].columns}
        if fill_missing_fields:
            for t, f in missing_fields:
                rtn[t][f] = self.pan_dat_factory.default_values[t][f]
        verify(
            fill_missing_fields or not missing_fields,
            "The following (table, field) pairs are missing fields.\n%s" %
            [(t, f) for t, f in missing_fields])
        missing_tables = sorted(
            set(self.pan_dat_factory.all_tables).difference(rtn))
        if missing_tables:
            print(
                "The following table names could not be found in the SQLite database.\n%s\n"
                % "\n".join(missing_tables))
        return _clean_pandat_creator(self.pan_dat_factory, rtn, json_read=True)
コード例 #30
0
 def _find_table_matchings(self, inputset):
     rtn = defaultdict(list)
     for t, x in product(self.tic_dat_factory.all_tables, inputset.schema):
         if stringish(x) and t.lower() == x.lower().replace(" ", "_"):
             rtn[t].append(x)
     return rtn