コード例 #1
0
ファイル: xls.py プロジェクト: vn8317x/opalytics-ticdat
 def _xls_write(self, tic_dat, file_path, tbl_name_mapping):
     verify(xlwt,
            "Can't write .xls files because xlwt package isn't installed.")
     tdf = self.tic_dat_factory
     book = xlwt.Workbook()
     for t in sorted(sorted(tdf.all_tables),
                     key=lambda x: len(tdf.primary_key_fields.get(x, ()))):
         sheet = book.add_sheet(tbl_name_mapping[t][:_longest_sheet])
         for i, f in enumerate(
                 tdf.primary_key_fields.get(t, ()) +
                 tdf.data_fields.get(t, ())):
             sheet.write(0, i, f)
         _t = getattr(tic_dat, t)
         if utils.dictish(_t):
             for row_ind, (p_key, data) in enumerate(_t.items()):
                 for field_ind, cell in enumerate(
                     (p_key if containerish(p_key) else (p_key, )) +
                         tuple(data[_f]
                               for _f in tdf.data_fields.get(t, ()))):
                     sheet.write(row_ind + 1, field_ind, cell)
         else:
             for row_ind, data in enumerate(
                     _t if containerish(_t) else _t()):
                 for field_ind, cell in enumerate(
                         tuple(data[_f] for _f in tdf.data_fields[t])):
                     sheet.write(row_ind + 1, field_ind, cell)
     if os.path.exists(file_path):
         os.remove(file_path)
     book.save(file_path)
コード例 #2
0
 def write_directory(self,
                     tic_dat,
                     dir_path,
                     allow_overwrite=False,
                     dialect='excel',
                     write_header=True):
     """
     write the ticDat data to a collection of csv files
     :param tic_dat: the data object
     :param dir_path: the directory in which to write the csv files
     :param allow_overwrite: boolean - are we allowed to overwrite existing
                             files?
     :param dialect: the csv dialect. Consult csv documentation for details.
     :param write_header: Boolean. Should the header information be written
                          as the first row?
     :return:
     """
     verify(csv, "csv needs to be installed to use this subroutine")
     verify(dialect in csv.list_dialects(), "Invalid dialect %s" % dialect)
     verify(not os.path.isfile(dir_path),
            "A file is not a valid directory path")
     if self.tic_dat_factory.generic_tables:
         dat, tdf = create_generic_free(tic_dat, self.tic_dat_factory)
         return tdf.csv.write_directory(dat, dir_path, allow_overwrite,
                                        dialect, write_header)
     tdf = self.tic_dat_factory
     msg = []
     if not self.tic_dat_factory.good_tic_dat_object(
             tic_dat, lambda m: msg.append(m)):
         raise TicDatError("Not a valid TicDat object for this schema : " +
                           " : ".join(msg))
     if not allow_overwrite:
         for t in tdf.all_tables:
             f = os.path.join(dir_path, t + ".csv")
             verify(not os.path.exists(f),
                    "The %s path exists and overwrite is not allowed" % f)
     if not os.path.isdir(dir_path):
         os.mkdir(dir_path)
     for t in tdf.all_tables:
         f = os.path.join(dir_path, t + ".csv")
         with open(f, 'w') as csvfile:
             writer = csv.DictWriter(
                 csvfile,
                 dialect=dialect,
                 fieldnames=tdf.primary_key_fields.get(t, ()) +
                 tdf.data_fields.get(t, ()))
             writer.writeheader() if write_header else None
             _t = getattr(tic_dat, t)
             if dictish(_t):
                 for p_key, data_row in _t.items():
                     primaryKeyDict = {
                         f: v
                         for f, v in zip(
                             tdf.primary_key_fields[t],
                             p_key if containerish(p_key) else (p_key, ))
                     }
                     writer.writerow(dict(data_row, **primaryKeyDict))
             else:
                 for data_row in (_t if containerish(_t) else _t()):
                     writer.writerow(dict(data_row))
コード例 #3
0
ファイル: xls.py プロジェクト: austin-bren/ticdat
 def _xlsx_write(self, tic_dat, file_path, tbl_name_mapping):
     verify(xlsx, "Can't write .xlsx files because xlsxwriter package isn't installed.")
     tdf = self.tic_dat_factory
     if os.path.exists(file_path):
         os.remove(file_path)
     book = xlsx.Workbook(file_path)
     def clean_for_write(t, f, x):
         if self.tic_dat_factory.infinity_io_flag != "N/A" or \
            (t == "parameters" and self.tic_dat_factory.parameters):
             return self.tic_dat_factory._infinity_flag_write_cell(t, f, x)
         if x in [float("inf"), -float("inf")] or isinstance(x, datetime.datetime):
             return str(x)
         return x
     for t in sorted(sorted(tdf.all_tables),
                      key=lambda x: len(tdf.primary_key_fields.get(x, ()))) :
         all_flds = self.tic_dat_factory.primary_key_fields.get(t, ()) + self.tic_dat_factory.data_fields.get(t, ())
         sheet = book.add_worksheet(tbl_name_mapping[t][:_longest_sheet])
         for i,f in enumerate(tdf.primary_key_fields.get(t,()) + tdf.data_fields.get(t, ())) :
             sheet.write(0, i, f)
         _t = getattr(tic_dat, t)
         if utils.dictish(_t) :
             for row_ind, (p_key, data) in enumerate(_t.items()) :
                 for field_ind, cell in enumerate( (p_key if containerish(p_key) else (p_key,)) +
                                     tuple(data[_f] for _f in tdf.data_fields.get(t, ()))):
                     sheet.write(row_ind+1, field_ind, clean_for_write(t, all_flds[field_ind], cell))
         else :
             for row_ind, data in enumerate(_t if containerish(_t) else _t()) :
                 for field_ind, cell in enumerate(tuple(data[_f] for _f in tdf.data_fields[t])) :
                     sheet.write(row_ind+1, field_ind, clean_for_write(t, all_flds[field_ind], cell))
     book.close()
コード例 #4
0
    def set_data_type(self, table, field, number_allowed = True,
                      inclusive_min = True, inclusive_max = False, min = 0, max = float("inf"),
                      must_be_int = False, strings_allowed= (), nullable = False):
        """
        sets the data type for a field. By default, fields don't have types. Adding a data type doesn't block
        data of the wrong type from being entered. Data types are useful for recognizing errant data entries
        with find_data_type_failures(). Errant data entries can be replaced with replace_data_type_failures().

        :param table: a table in the schema

        :param field: a data field for this table

        :param number_allowed: boolean does this field allow numbers?

        :param inclusive_min: boolean : if number allowed, is the min inclusive?

        :param inclusive_max: boolean : if number allowed, is the max inclusive?

        :param min: if number allowed, the minimum value

        :param max: if number allowed, the maximum value

        :param must_be_int: boolean : if number allowed, must the number be integral?

        :param strings_allowed: if a collection - then a list of the strings allowed.
                                The empty collection prohibits strings.
                                If a "*", then any string is accepted.
        :param nullable : boolean : can this value contain null (aka None aka nan (since pandas treats null as nan))

        :return:
        """
        verify(not self._has_been_used,
               "The data types can't be changed after a PanDatFactory has been used.")
        verify(table in self.all_tables, "Unrecognized table name %s"%table)
        verify(table not in self.generic_tables, "Cannot set data type for generic table")
        verify(field in self.data_fields[table] + self.primary_key_fields[table],
               "%s does not refer to a field for %s"%(field, table))

        verify((strings_allowed == '*') or
               (containerish(strings_allowed) and all(utils.stringish(x) for x in strings_allowed)),
"""The strings_allowed argument should be a container of strings, or the single '*' character.""")
        if utils.containerish(strings_allowed):
            strings_allowed = tuple(strings_allowed) # defensive copy
        if number_allowed:
            verify(utils.numericish(max), "max should be numeric")
            verify(utils.numericish(min), "min should be numeric")
            verify(max >= min, "max cannot be smaller than min")
            self._data_types[table][field] = TypeDictionary(number_allowed=True,
                strings_allowed=strings_allowed,  nullable = bool(nullable),
                min = min, max = max, inclusive_min= bool(inclusive_min), inclusive_max = bool(inclusive_max),
                must_be_int = bool(must_be_int))
        else :
            self._data_types[table][field] = TypeDictionary(number_allowed=False,
                strings_allowed=strings_allowed,  nullable = bool(nullable),
                min = 0, max = float("inf"), inclusive_min= True, inclusive_max = True,
                must_be_int = False)
コード例 #5
0
def assertTicDatTablesSame(t1, t2, _goodTicDatTable,
                           _assertTrue = assertTrue, _assertFalse = assertFalse) :
    _assertTrue(set(t1) == set(t2))
    _assertTrue(_goodTicDatTable(t1) and _goodTicDatTable(t2))
    if not dictish(t1) and not dictish(t2) :
        return
    if dictish(t1) != dictish(t2) and dictish(t2) :
        t1,t2 = t2,t1
    if not dictish(t2) :
        _assertTrue(all(containerish(x) and len(x) == 0 for x in t1.values()))
        return
    for k1,v1 in t1.items() :
        v2 = t2[k1]
        if dictish(v1) != dictish(v2) and dictish(v2) :
            v2, v1 = v1, v2
        if dictish(v1) and dictish(v2) :
            _assertTrue(set(v1) == set(v2))
            for _k1 in v1 :
                _assertTrue(v1[_k1] == v2[_k1])
        elif dictish(v1) and containerish(v2) :
            _assertTrue(sorted(v1.values()) == sorted(v2))
        elif dictish(v1) :
            _assertTrue(len(v1) == 1 and v1.values()[0] == v2)
        else :
            if containerish(v1) != containerish(v2) and containerish(v2) :
                v2, v1 = v1, v2
            if containerish(v1) and containerish(v2) :
                _assertTrue(len(v1) == len(v2))
                _assertTrue(all(v1[x] == v2[x] for x in range(len(v1))))
            elif containerish(v1) :
                _assertTrue(len(v1) == 1 and v1[0] == v2)
            else :
                _assertTrue(v1 == v2)
コード例 #6
0
    def copy_to_ampl(self, pan_dat, field_renamings = None, excluded_tables = None):
        """
        copies the pan_dat object into a new pan_dat object populated with amplpy.DataFrame objects
        performs a deep copy

        :param pan_dat: a PanDat object

        :param field_renamings: dict or None. If fields are to be renamed in the copy, then
                                a mapping from (table_name, field_name) -> new_field_name
                                If a data field is to be omitted, then new_field can be falsey
                                table_name cannot refer to an excluded table. (see below)
                                field_name doesn't have to refer to a field to an element of
                                self.data_fields[t], but it doesn't have to refer to a column in
                                the pan_dat.table_name DataFrame

        :param excluded_tables: If truthy, a list of tables to be excluded from the copy.
                                Tables without primary key fields are always excluded.

        :return: a deep copy of the tic_dat argument into amplpy.DataFrames
        """
        verify(amplpy, "amplpy needs to be installed in order to enable AMPL functionality")
        msg  = []
        verify(self.good_pan_dat_object(pan_dat, msg.append),
               "pan_dat not a good object for this factory : %s"%"\n".join(msg))
        verify(not excluded_tables or (containerish(excluded_tables) and
                                       set(excluded_tables).issubset(self.all_tables)),
               "bad excluded_tables argument")
        copy_tables = {t for t in self.all_tables if self.primary_key_fields[t]}.\
                      difference(excluded_tables or [])
        field_renamings = field_renamings or {}
        verify(dictish(field_renamings), "invalid field_renamings argument")
        for k,v in field_renamings.items():
            verify(containerish(k) and len(k) == 2 and k[0] in copy_tables and
                   k[1] in getattr(pan_dat, k[0]).columns and
                   ((v and utils.stringish(v)) or (not bool(v) and k[1] not in self.primary_key_fields[k[0]])),
                   "invalid field_renamings argument %s:%s"%(k,v))
        class AmplPanDat(object):
            def __repr__(self):
                return "td:" + tuple(copy_tables).__repr__()
        rtn = AmplPanDat()
        for t in copy_tables:
            rename = lambda f : field_renamings.get((t, f), f)
            df_ampl = amplpy.DataFrame(index=tuple(map(rename, self.primary_key_fields[t])))
            for f in self.primary_key_fields[t]:
                df_ampl.setColumn(rename(f), list(getattr(pan_dat, t)[f]))
            for f in {f for _t,f in field_renamings if _t == t}.union(self.data_fields[t]):
                if rename(f):
                    df_ampl.addColumn(rename(f), list(getattr(pan_dat, t)[f]))
            setattr(rtn, t, df_ampl)
        return rtn
コード例 #7
0
def assertTicDatTablesSame(t1, t2, _goodTicDatTable,
                           _assertTrue = assertTrue, _assertFalse = assertFalse) :
    _assertTrue(set(t1) == set(t2))
    _assertTrue(_goodTicDatTable(t1) and _goodTicDatTable(t2))
    if not dictish(t1) and not dictish(t2) :
        return
    if dictish(t1) != dictish(t2) and dictish(t2) :
        t1,t2 = t2,t1
    if not dictish(t2) :
        _assertTrue(all(containerish(x) and len(x) == 0 for x in t1.values()))
        return
    for k1,v1 in t1.items() :
        v2 = t2[k1]
        if dictish(v1) != dictish(v2) and dictish(v2) :
            v2, v1 = v1, v2
        if dictish(v1) and dictish(v2) :
            _assertTrue(set(v1) == set(v2))
            for _k1 in v1 :
                _assertTrue(v1[_k1] == v2[_k1])
        elif dictish(v1) and containerish(v2) :
            _assertTrue(sorted(map(str, v1.values())) == sorted(map(str, v2)))
        elif dictish(v1) :
            _assertTrue(len(v1) == 1 and v1.values()[0] == v2)
        else :
            if containerish(v1) != containerish(v2) and containerish(v2) :
                v2, v1 = v1, v2
            if containerish(v1) and containerish(v2) :
                _assertTrue(len(v1) == len(v2))
                _assertTrue(all(v1[x] == v2[x] for x in range(len(v1))))
            elif containerish(v1) :
                _assertTrue(len(v1) == 1 and v1[0] == v2)
            else :
                _assertTrue(v1 == v2)
コード例 #8
0
def convert_to_dicts_that_can_be_turned_into_DataFrames(
        tdf, dat, field_renamings=None):
    '''
    utility routine to help de-ticdat-ify small examples so that they can then be passed to
    amplpy team in a more easily understood notebook example with hard coded data.
    the inner dicts returned below can each be passed as an argument to pandas.DataFrame, and from there
    the `set_ampl_data` logic can be broken out explicitly
    :param tdf: a TicDatFactory
    :param dat: a TicDat object created by tdf
    :param field_renamings: the same argument used by copy_to_ampl
    :return:
    '''
    assert utils.dictish(field_renamings) and \
       all(utils.containerish(k) and len(k) == 2 and k[0] in tdf.all_tables and
           k[1] in tdf.primary_key_fields[k[0]] + tdf.data_fields[k[0]] and
           utils.stringish(v) and v not in tdf.primary_key_fields[k[0]] + tdf.data_fields[k[0]]
           for k,v in field_renamings.items()), "invalid field_renamings argument"
    dat = tdf.copy_to_pandas(dat, drop_pk_columns=False)

    def do_renames(t, df):
        for f in tdf.primary_key_fields[t] + tdf.data_fields[t]:
            if (t, f) in (field_renamings or []):
                df[field_renamings[t, f]] = df[f]
                df.drop(f, axis=1, inplace=True)
        return df

    rtn = {
        t: do_renames(t,
                      getattr(dat, t).reset_index(drop=True)).to_dict()
        for t in tdf.all_tables
    }
    return rtn
コード例 #9
0
ファイル: jsontd.py プロジェクト: nandi6uc/ticdat
def make_json_dict(tdf,
                   tic_dat,
                   verbose=False,
                   use_infinity_io_flag_if_provided=False):
    assert tdf.good_tic_dat_object(tic_dat)

    def write_cell(t, f, x):
        if isinstance(x, datetime.datetime):
            return str(x)
        return x if not use_infinity_io_flag_if_provided else tdf._infinity_flag_write_cell(
            t, f, x)

    jdict = defaultdict(list)
    for t in tdf.all_tables:
        all_fields = tdf.primary_key_fields.get(t, ()) + tdf.data_fields.get(
            t, ())

        def make_row(row):
            assert containerish(row) and len(row) == len(all_fields)
            row = [write_cell(t, f, x) for f, x in zip(all_fields, row)]
            return {f: v for f, v in zip(all_fields, row)} if verbose else row

        appender = lambda row: jdict[t].append(make_row(row))
        tbl = getattr(tic_dat, t)
        if tdf.primary_key_fields.get(t):
            for pk, data_row in tbl.items():
                appender((list(pk) if containerish(pk) else [pk]) +
                         [data_row[df] for df in tdf.data_fields[t]])
        else:
            for data_row in tbl:
                appender([data_row[df] for df in tdf.data_fields[t]])
    return jdict
コード例 #10
0
 def _get_data(self, tic_dat, as_sql):
     rtn = []
     for t in self.tic_dat_factory.all_tables:
         _t = getattr(tic_dat, t)
         if dictish(_t):
             primarykeys = tuple(self.tic_dat_factory.primary_key_fields[t])
             for pkrow, sqldatarow in _t.items():
                 _items = list(sqldatarow.items())
                 fields = primarykeys + tuple(x[0] for x in _items)
                 datarow = ((pkrow, ) if len(primarykeys) == 1 else
                            pkrow) + tuple(x[1] for x in _items)
                 assert len(datarow) == len(fields)
                 datarow = tuple(
                     self._write_data_cell(t, f, x)
                     for f, x in zip(fields, datarow))
                 str = "INSERT INTO [%s] (%s) VALUES (%s)" % (t, ",".join(
                     _brackets(fields)), ",".join("%s" if as_sql else "?"
                                                  for _ in fields))
                 if as_sql:
                     rtn.append((str %
                                 tuple(map(_insert_format, datarow))) + ";")
                 else:
                     rtn.append((str, datarow))
         else:
             for sqldatarow in (_t if containerish(_t) else _t()):
                 k, v = zip(*sqldatarow.items())
                 str = "INSERT INTO [%s] (%s) VALUES (%s)"%\
                          (t, ",".join(_brackets(k)), ",".join(
                             ["%s" if as_sql else "?"]*len(sqldatarow)))
                 if as_sql:
                     rtn.append((str % tuple(map(_insert_format, v))) + ";")
                 else:
                     rtn.append((str, v))
     return tuple(rtn)
コード例 #11
0
    def testSilly(self):
        if not self.can_run:
            return
        tdf = TicDatFactory(**sillyMeSchema())
        ticDat = tdf.TicDat(**sillyMeData())
        schema2 = sillyMeSchema()
        schema2["b"][0] = ("bField2", "bField1", "bField3")
        schema3 = sillyMeSchema()
        schema3["a"][1] = ("aData2", "aData3", "aData1")
        schema4 = sillyMeSchema()
        schema4["a"][1] = ("aData1", "aData3")
        schema5 = sillyMeSchema()
        _tuple = lambda x: tuple(x) if utils.containerish(x) else (x, )
        for t in ("a", "b"):
            schema5[t][1] = _tuple(schema5[t][1]) + _tuple(schema5[t][0])
        schema5["a"][0], schema5["b"][0] = (), []
        schema6 = sillyMeSchema()
        schema6["d"] = [["dField"], ()]

        tdf2, tdf3, tdf4, tdf5, tdf6 = (TicDatFactory(**x)
                                        for x in (schema2, schema3, schema4,
                                                  schema5, schema6))
        tdf5.set_generator_tables(("a", "c"))
        tdf5 = tdf5.clone()
        filePath = os.path.join(_scratchDir, "silly.db")
        tdf.sql.write_db_data(ticDat, filePath)
        self.assertFalse(tdf.sql.find_duplicates(filePath))

        ticDat2 = tdf2.sql.create_tic_dat(filePath)
        self.assertFalse(tdf._same_data(ticDat, ticDat2))

        ticDat3 = tdf3.sql.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, ticDat3))

        ticDat4 = tdf4.sql.create_tic_dat(filePath)
        for t in ["a", "b"]:
            for k, v in getattr(ticDat4, t).items():
                for _k, _v in v.items():
                    self.assertTrue(getattr(ticDat, t)[k][_k] == _v)
                if set(v) == set(getattr(ticDat, t)[k]):
                    self.assertTrue(t == "b")
                else:
                    self.assertTrue(t == "a")

        ticDat5 = tdf5.sql.create_tic_dat(filePath)
        self.assertTrue(tdf5._same_data(tdf._keyless(ticDat), ticDat5))
        self.assertTrue(
            callable(ticDat5.a) and callable(ticDat5.c)
            and not callable(ticDat5.b))

        self.assertTrue("table d" in self.firesException(
            lambda: tdf6.sql.create_tic_dat(filePath)))

        ticDat.a["theboger"] = (1, None, 12)
        tdf.sql.write_db_data(ticDat, makeCleanPath(filePath))
        ticDatNone = tdf.sql.create_tic_dat(filePath, freeze_it=True)
        self.assertTrue(tdf._same_data(ticDat, ticDatNone))
        self.assertTrue(ticDatNone.a["theboger"]["aData2"] == None)
コード例 #12
0
ファイル: xls.py プロジェクト: vn8317x/opalytics-ticdat
    def _xlsx_write(self, tic_dat, file_path, tbl_name_mapping):
        verify(
            xlsx,
            "Can't write .xlsx files because xlsxwriter package isn't installed."
        )
        tdf = self.tic_dat_factory
        if os.path.exists(file_path):
            os.remove(file_path)
        book = xlsx.Workbook(file_path)

        def clean_inf(x):
            if x == float("inf"):
                return "inf"
            if x == -float("inf"):
                return "-inf"
            return x

        for t in sorted(sorted(tdf.all_tables),
                        key=lambda x: len(tdf.primary_key_fields.get(x, ()))):
            sheet = book.add_worksheet(tbl_name_mapping[t])
            for i, f in enumerate(
                    tdf.primary_key_fields.get(t, ()) +
                    tdf.data_fields.get(t, ())):
                sheet.write(0, i, f)
            _t = getattr(tic_dat, t)
            if utils.dictish(_t):
                for row_ind, (p_key, data) in enumerate(_t.items()):
                    for field_ind, cell in enumerate(
                        (p_key if containerish(p_key) else (p_key, )) +
                            tuple(data[_f]
                                  for _f in tdf.data_fields.get(t, ()))):
                        sheet.write(row_ind + 1, field_ind, clean_inf(cell))
            else:
                for row_ind, data in enumerate(
                        _t if containerish(_t) else _t()):
                    for field_ind, cell in enumerate(
                            tuple(data[_f] for _f in tdf.data_fields[t])):
                        sheet.write(row_ind + 1, field_ind, clean_inf(cell))
        book.close()
コード例 #13
0
ファイル: lingo.py プロジェクト: vn8317x/opalytics-ticdat
def create_lingo_text(tdf, tic_dat, infinity=INFINITY):
    """
    Generate a Lingo .ldt string from a TicDat object

    :param tdf: A TicDatFactory defining the schema

    :param tic_dat: A TicDat object consistent with tdf

    :param infinity: A number used to represent infinity in lingo

    :return: A string consistent with the Lingo .ldt format
    """
    msg = []
    verify(tdf.good_tic_dat_object(tic_dat, msg.append),
           "tic_dat not a good object for this factory : %s" % "\n".join(msg))
    verify(not tdf.generator_tables, "doesn't work with generator tables.")
    verify(
        not tdf.generic_tables,
        "doesn't work with generic tables. (not yet - will add ASAP as needed) "
    )
    dict_with_lists = defaultdict(list)
    dict_tables = {t for t, pk in tdf.primary_key_fields.items() if pk}
    prepend = getattr(tdf, "lingo_prepend", "")
    for t in dict_tables:
        for k, r in getattr(tic_dat, t).items():
            row = list(k) if containerish(k) else [k]
            for f in tdf.data_fields.get(t, []):
                row.append(r[f])
            dict_with_lists[t].append(row)
    for t in set(tdf.all_tables).difference(dict_tables):
        for r in getattr(tic_dat, t):
            row = [r[f] for f in tdf.data_fields[t]]
            dict_with_lists[t].append(row)
    rtn = "data:\n"
    for t in _sorted_tables(tdf):
        rtn += "%s" % (prepend + t)
        for field in tdf.data_fields[t]:
            rtn += ',' + prepend + t + "_" + field.replace(" ", "_").lower()
        rtn += "=\n"
        for row in dict_with_lists[t]:
            rtn += "\t"
            for field in row:
                if stringish(field):
                    rtn += field + " "
                else:
                    rtn += str(infinity) if float(
                        'inf') == field else str(field) + " "
            rtn += "\n"
        rtn += ";\n"
    rtn += "enddata"
    return rtn
コード例 #14
0
ファイル: opl.py プロジェクト: nandi6uc/ticdat
def create_opl_text(tdf, tic_dat, infinity=INFINITY):
    """
    Generate a OPL .dat string from a TicDat object
    :param tdf: A TicDatFactory defining the schema
    :param tic_dat: A TicDat object consistent with tdf
    :param infinity: A number used to represent infinity in OPL
    :return: A string consistent with the OPL .dat format
    """
    msg = []
    verify(tdf.good_tic_dat_object(tic_dat, msg.append),
           "tic_dat not a good object for this factory : %s" % "\n".join(msg))
    verify(not tdf.generator_tables, "doesn't work with generator tables.")
    verify(
        not tdf.generic_tables,
        "doesn't work with generic tables. (not yet - will add ASAP as needed) "
    )
    dict_with_lists = defaultdict(list)
    dict_tables = {t for t, pk in tdf.primary_key_fields.items() if pk}
    for t in dict_tables:
        for k, r in getattr(tic_dat, t).items():
            row = list(k) if containerish(k) else [k]
            for f in tdf.data_fields.get(t, []):
                row.append(r[f])
            dict_with_lists[t].append(row)
    for t in set(tdf.all_tables).difference(dict_tables):
        for r in getattr(tic_dat, t):
            row = [r[f] for f in tdf.data_fields[t]]
            dict_with_lists[t].append(row)

    rtn = ""
    for i, (t, l) in enumerate(dict_with_lists.items()):
        rtn += "\n" if i > 0 else ""
        rtn += "%s = {" % (tdf.opl_prepend + t)
        if len(l[0]) > 1:
            rtn += "\n"
        for x in range(len(l)):
            r = l[x]
            if len(r) > 1:
                rtn += "<"
            for i, v in enumerate(r):
                rtn += ('"%s"' % v if stringish(v) else
                        (str(infinity) if float('inf') == v else str(v))) + (
                            ", " if i < len(r) - 1 else "")
            if len(r) == 1 and len(l) - 1 != x:
                rtn += ', '
            if len(r) > 1:
                rtn += ">\n"
        rtn += "};\n"

    return rtn
コード例 #15
0
ファイル: testsql.py プロジェクト: Dr-Irv/opalytics-ticdat
    def testSilly(self):
        tdf = TicDatFactory(**sillyMeSchema())
        ticDat = tdf.TicDat(**sillyMeData())
        schema2 = sillyMeSchema()
        schema2["b"][0] = ("bField2", "bField1", "bField3")
        schema3 = sillyMeSchema()
        schema3["a"][1] = ("aData2", "aData3", "aData1")
        schema4 = sillyMeSchema()
        schema4["a"][1] = ("aData1", "aData3")
        schema5 = sillyMeSchema()
        _tuple = lambda x : tuple(x) if utils.containerish(x) else (x,)
        for t in ("a", "b") :
            schema5[t][1] = _tuple(schema5[t][1]) + _tuple(schema5[t][0])
        schema5["a"][0], schema5["b"][0] =  (),  []
        schema6 = sillyMeSchema()
        schema6["d"] =  [["dField"],()]

        tdf2, tdf3, tdf4, tdf5, tdf6 = (TicDatFactory(**x) for x in (schema2, schema3, schema4, schema5, schema6))
        tdf5.set_generator_tables(("a","c"))
        filePath = os.path.join(_scratchDir, "silly.db")
        tdf.sql.write_db_data(ticDat, filePath)

        ticDat2 = tdf2.sql.create_tic_dat(filePath)
        self.assertFalse(tdf._same_data(ticDat, ticDat2))

        ticDat3 = tdf3.sql.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, ticDat3))

        ticDat4 = tdf4.sql.create_tic_dat(filePath)
        for t in ["a","b"]:
            for k,v in getattr(ticDat4, t).items() :
                for _k, _v in v.items() :
                    self.assertTrue(getattr(ticDat, t)[k][_k] == _v)
                if set(v) == set(getattr(ticDat, t)[k]) :
                    self.assertTrue(t == "b")
                else :
                    self.assertTrue(t == "a")

        ticDat5 = tdf5.sql.create_tic_dat(filePath)
        self.assertTrue(tdf5._same_data(tdf._keyless(ticDat), ticDat5))
        self.assertTrue(callable(ticDat5.a) and callable(ticDat5.c) and not callable(ticDat5.b))

        self.assertTrue("table d" in self.firesException(lambda  : tdf6.sql.create_tic_dat(filePath)))

        ticDat.a["theboger"] = (1, None, 12)
        tdf.sql.write_db_data(ticDat, makeCleanPath(filePath))
        ticDatNone = tdf.sql.create_tic_dat(filePath, freeze_it=True)
        self.assertTrue(tdf._same_data(ticDat, ticDatNone))
        self.assertTrue(ticDatNone.a["theboger"]["aData2"] == None)
コード例 #16
0
def make_json_dict(tdf, tic_dat, verbose=False):
    assert tdf.good_tic_dat_object(tic_dat)
    jdict = defaultdict(list)
    for t in tdf.all_tables:
        all_fields = tdf.primary_key_fields.get(t,()) + tdf.data_fields.get(t,())
        def make_row(row):
            assert containerish(row) and len(row) == len(all_fields)
            return {f:v for f,v in zip(all_fields, row)} if verbose else row
        appender = lambda row : jdict[t].append(make_row(row))
        tbl = getattr(tic_dat, t)
        if tdf.primary_key_fields.get(t):
            for pk, data_row in tbl.items():
                appender((list(pk) if containerish(pk) else [pk]) +
                         [data_row[df] for df in tdf.data_fields[t]])
        else:
            for data_row in tbl:
                appender([data_row[df] for df in tdf.data_fields[t]])
    return jdict
コード例 #17
0
def _try_create_space_case_mapping(tdf, ticdat):
    '''
    :param tdf: a TicDatFactory
    :param ticdat: a ticdat for the tdf
    :return: {"mapping:mapping} if a good mapping can be made, else {"failures":failures}
    '''
    assert tdf.good_tic_dat_object(ticdat), "ticdat not a good object for the tdf"
    rtn = defaultdict(set)
    for t in tdf.all_tables:
        if tdf.primary_key_fields.get(t):
            for ks in getattr(ticdat, t):
                for k in (ks if containerish(ks) else [ks]):
                    if stringish(k):
                        newk = ''.join(list(map(lambda c: c.upper() if c.isalnum() else '_', k)))
                        rtn[newk].add(k)
    failures = {k:tuple(sorted(v)) for k,v in rtn.items() if len(v) > 1}
    if failures:
        return {"failures":failures}
    return {"mapping": {k:next(iter(v)) for k,v in rtn.items()}}
コード例 #18
0
ファイル: jsontd.py プロジェクト: nandi6uc/ticdat
 def make_row(row):
     assert containerish(row) and len(row) == len(all_fields)
     row = [write_cell(t, f, x) for f, x in zip(all_fields, row)]
     return {f: v for f, v in zip(all_fields, row)} if verbose else row
コード例 #19
0
ファイル: jsontd.py プロジェクト: snelson3/opalytics-ticdat
 def make_row(row):
     assert containerish(row) and len(row) == len(all_fields)
     return {f: v for f, v in zip(all_fields, row)} if verbose else row
コード例 #20
0
    def __init__(self, **init_fields):
        """
        create a PanDatFactory

        :param init_fields: a mapping of tables to primary key fields
                            and data fields. Each field listing consists
                            of two sub lists ... first primary keys fields,
                            than data fields.
        ex: PanDatFactory (categories =  [["name"],["Min Nutrition", "Max Nutrition"]],
                           foods  =  [["Name"],["Cost"]]
                           nutritionQuantities = [["Food", "Category"],["Qty"]])
                           Use '*' instead of a pair of lists for generic tables
        ex: PanDatFactory (typical_table = [["Primary Key Field"],["Data Field"]],
                           generic_table = '*')
        :return: a PanDatFactory
        """
        verify(DataFrame and pd, "Need to install pandas in order to create a PanDatFactory")
        self._has_been_used = False
        verify(not any(x.startswith("_") for x in init_fields),
               "table names shouldn't start with underscore")
        verify(not any(" " in x for x in init_fields), "table names shouldn't have white space")
        verify(len(init_fields) == len({_.lower() for _ in init_fields}),
               "there are case insensitive duplicate table names")
        for k,v in init_fields.items():
            verify(v == '*' or
                   (containerish(v) and len(v) == 2 and all(containerish(_) for _ in v)),
                   ("Table %s needs to indicate it is a generic table by using '*'\n" +
                    "or specify two sublists, one for primary key fields and one for data fields")
                   %k)
            if v != '*':
                verify(all(utils.stringish(s) for _ in v for s in _),
                       "The field names for %s need to be strings"%k)
                verify(v[0] or v[1], "No field names specified for table %s"%k)
                verify(len(set(v[0]).union(v[1])) == len(v[0])+len(v[1]),
                       "There are duplicate field names for table %s"%k)
                verify(len({_.lower() for _ in list(v[0]) + list(v[1])}) == len(v[0])+len(v[1]),
                       "There are case insensitive duplicate field names for %s"%k)
        self.generic_tables = frozenset(k for k,v in init_fields.items() if v == '*')
        self._primary_key_fields = FrozenDict({k : tuple(v[0])for k,v in init_fields.items()
                                               if v != '*'})
        self._data_fields = FrozenDict({k : tuple(v[1]) for k,v in init_fields.items() if v != '*'})
        self._default_values = clt.defaultdict(dict)
        for tbl,flds in self._data_fields.items():
            for fld in flds:
                self._default_values[tbl][fld] = 0
        self._data_types = clt.defaultdict(dict)
        self._data_row_predicates = clt.defaultdict(dict)
        self._foreign_keys = clt.defaultdict(set)
        self.all_tables = frozenset(init_fields)
        superself = self
        class PanDat(object):
            def __repr__(self):
                tlen = lambda t: len(getattr(self, t)) if isinstance(getattr(self, t), DataFrame) else None
                return "pd: {" + ", ".join("%s: %s"%(t, tlen(t)) for t in superself.all_tables) + "}"
            def __init__(self, **init_tables):
                superself._trigger_has_been_used()
                for t in init_tables :
                    verify(t in superself.all_tables, "Unexpected table name %s"%t)
                    tbl = safe_apply(DataFrame)(init_tables[t])
                    if tbl is None and dictish(init_tables[t]) and all(map(stringish, init_tables[t])):
                        tbl = safe_apply(DataFrame)(**init_tables[t])
                    verify(isinstance(tbl, DataFrame),
                           "Failed to provide a valid DataFrame or DataFrame construction argument for %s"%t)
                    setattr(self, t, tbl.copy())
                    df = getattr(self, t)
                    if list(df.columns) == list(range(len(df.columns))) and \
                       len(df.columns) >= len(superself._all_fields(t)):
                        df.rename(columns={f1:f2 for f1, f2 in zip(df.columns, superself._all_fields(t))},
                                  inplace=True)
                for t in set(superself.all_tables).difference(init_tables):
                    setattr(self, t, DataFrame({f:[] for f in utils.all_fields(superself, t)}))
                missing_fields = {(t, f) for t in superself.all_tables for f in superself._all_fields(t)
                                  if f not in getattr(self, t).columns}
                verify(not missing_fields,
                       "The following are (table, field) pairs missing from the data.\n%s"%missing_fields)
                for t in superself.all_tables:
                    af = list(superself._all_fields(t))
                    df = getattr(self, t)
                    if list(df.columns)[:len(af)] != af:
                        extra_cols = [_ for _ in list(df.columns) if _ not in af]
                        setattr(self, t, df[af + extra_cols])
                        assert list(getattr(self, t)) == af + extra_cols

        self.PanDat = PanDat
        self.xls = pandatio.XlsPanFactory(self)
        self.sql = pandatio.SqlPanFactory(self)
        self.csv = pandatio.CsvPanFactory(self)
        self.json = pandatio.JsonPanFactory(self)
        self.opalytics = pandatio.OpalyticsPanFactory(self)
コード例 #21
0
 def write_file(self, tic_dat, mdb_file_path, allow_overwrite=False):
     """
     write the ticDat data to an SQLite database file
     :param tic_dat: the data object to write
     :param mdb_file_path: the file path of the SQLite database to populate
     :param allow_overwrite: boolean - are we allowed to overwrite pre-existing data
     :return:
     caveats : Numbers with absolute values larger than 1e+100 will
               be written as 1e+100 or -1e+100
     NB - thrown Exceptions of the form "Data type mismatch in criteria expression"
          generally result either from Access's inability to store different data
          types in the same field, or from a mismatch between the data object
          and the default field types ticdat uses when creating an Access schema.
          For the latter, feel free to call the write_schema function on the data
          file first with explicitly identified field types.
     """
     _standard_verify(self.tic_dat_factory.generic_tables)
     msg = []
     if not self.tic_dat_factory.good_tic_dat_object(
             tic_dat, lambda m: msg.append(m)):
         raise TicDatError("Not a valid TicDat object for this schema : " +
                           " : ".join(msg))
     verify(not os.path.isdir(mdb_file_path),
            "A directory is not a valid Access file path")
     if not os.path.exists(mdb_file_path):
         self.write_schema(mdb_file_path)
     table_names = self._check_tables_fields(
         mdb_file_path, self.tic_dat_factory.all_tables)
     with _connect(_connection_str(mdb_file_path)) as con:
         for t in self.tic_dat_factory.all_tables:
             verify(
                 table_names[t] == t,
                 "Failed to find table %s in path %s" % (t, mdb_file_path))
             if not allow_overwrite:
                 with con.cursor() as cur:
                     cur.execute("Select * from %s" % t)
                     verify(
                         not any(True for _ in cur.fetchall()),
                         "allow_overwrite is False, but there are already data records in %s"
                         % t)
             con.cursor().execute("Delete from %s" %
                                  t).commit() if allow_overwrite else None
             _t = getattr(tic_dat, t)
             if dictish(_t):
                 primary_keys = tuple(
                     self.tic_dat_factory.primary_key_fields[t])
                 for pk_row, sql_data_row in _t.items():
                     _items = tuple(sql_data_row.items())
                     fields = _brackets(primary_keys +
                                        tuple(x[0] for x in _items))
                     data_row = ((pk_row,) if len(primary_keys)==1 else pk_row) + \
                               tuple(_write_data(x[1]) for x in _items)
                     assert len(data_row) == len(fields)
                     str = "INSERT INTO %s (%s) VALUES (%s)"%\
                           (t, ",".join(fields), ",".join("?" for _ in fields))
                     con.cursor().execute(str, data_row).commit()
             else:
                 for sql_data_row in (_t if containerish(_t) else _t()):
                     str = "INSERT INTO %s (%s) VALUES (%s)" % (t, ",".join(
                         _brackets(sql_data_row.keys())), ",".join(
                             ["?"] * len(sql_data_row)))
                     con.cursor().execute(
                         str, tuple(map(_write_data,
                                        sql_data_row.values())))
コード例 #22
0
ファイル: testmdb.py プロジェクト: Dr-Irv/opalytics-ticdat
    def testSilly(self):
        tdf = TicDatFactory(**sillyMeSchema())
        ticDat = tdf.TicDat(**sillyMeData())
        filePath = os.path.join(_scratchDir, "silly.mdb")
        self.assertTrue(firesException(lambda : tdf.mdb.write_file(ticDat, makeCleanPath(filePath))))
        def sillyMeCleanData() :
            return {
                "a" : {"1" : (1, 2, "3"), "b" : (12, 12.2, "twelve"), "c" : (11, 12, "thirt")},
                "b" : {(1, 2, "3") : 1, (3, 4, "b") : 12},
                "c" : ((1, "2", 3, 4), (0.2, "b", 0.3, 0.4), (1.2, "b", 12, 24) )
            }
        ticDat = tdf.TicDat(**sillyMeCleanData())
        self.assertTrue(firesException(lambda : tdf.mdb.write_file(ticDat, makeCleanPath(filePath))))
        def makeCleanSchema() :
            tdf.mdb.write_schema(makeCleanPath(filePath), a={"aData3" : "text"},
                        b = {"bField1" : "int", "bField2" : "int"}, c={"cData2" : "text"})
            return filePath
        tdf.mdb.write_file(ticDat, makeCleanSchema())
        mdbTicDat = tdf.mdb.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, mdbTicDat))

        schema2 = sillyMeSchema()
        schema2["b"][0] = ("bField2", "bField1", "bField3")
        schema3 = sillyMeSchema()
        schema3["a"][1] = ("aData2", "aData3", "aData1")
        schema4 = sillyMeSchema()
        schema4["a"][1] = ("aData1", "aData3")
        schema5 = sillyMeSchema()
        _tuple = lambda x : tuple(x) if utils.containerish(x) else (x,)
        for t in ("a", "b") :
            schema5[t][1] = _tuple(schema5[t][1]) + _tuple(schema5[t][0])
        schema5["a"][0], schema5["b"][0] =  (),  []
        schema6 = sillyMeSchema()
        schema6["d"] =  [["dField"],()]

        tdf2, tdf3, tdf4, tdf5, tdf6 = (TicDatFactory(**x) for x in (schema2, schema3, schema4, schema5, schema6))
        tdf5.set_generator_tables(("a","c"))

        ticDat2 = tdf2.mdb.create_tic_dat(filePath)
        self.assertFalse(tdf._same_data(ticDat, ticDat2))

        ticDat3 = tdf3.mdb.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, ticDat3))

        ticDat4 = tdf4.mdb.create_tic_dat(filePath)
        for t in ["a","b"]:
            for k,v in getattr(ticDat4, t).items() :
                for _k, _v in v.items() :
                    self.assertTrue(getattr(ticDat, t)[k][_k] == _v)
                if set(v) == set(getattr(ticDat, t)[k]) :
                    self.assertTrue(t == "b")
                else :
                    self.assertTrue(t == "a")

        ticDat5 = tdf5.mdb.create_tic_dat(filePath)
        self.assertTrue(tdf5._same_data(tdf._keyless(ticDat), ticDat5))
        self.assertTrue(callable(ticDat5.a) and callable(ticDat5.c) and not callable(ticDat5.b))

        self.assertTrue("table d" in self.firesException(lambda  : tdf6.mdb.create_tic_dat(filePath)))

        ticDat.a["theboger"] = (1, None, "twelve")
        tdf.mdb.write_file(ticDat, makeCleanSchema())
        ticDatNone = tdf.mdb.create_tic_dat(filePath, freeze_it=True)
        self.assertTrue(tdf._same_data(ticDat, ticDatNone))
        self.assertTrue(ticDatNone.a["theboger"]["aData2"] == None)
コード例 #23
0
ファイル: testxls.py プロジェクト: Dr-Irv/opalytics-ticdat
    def testSilly(self):
        tdf = TicDatFactory(**sillyMeSchema())
        ticDat = tdf.TicDat(**sillyMeData())
        schema2 = sillyMeSchema()
        schema2["b"][0] = ("bField2", "bField1", "bField3")
        schema3 = sillyMeSchema()
        schema3["a"][1] = ("aData2", "aData3", "aData1")
        schema4 = sillyMeSchema()
        schema4["a"][1] = ("aData1", "aData3")
        schema5 = sillyMeSchema()
        _tuple = lambda x : tuple(x) if utils.containerish(x) else (x,)
        for t in ("a", "b") :
            schema5[t][1] = _tuple(schema5[t][1]) + _tuple(schema5[t][0])
        schema5["a"][0], schema5["b"][0] =  (),  []
        schema6 = sillyMeSchema()
        schema6["d"] =  [["dField"],()]

        tdf2, tdf3, tdf4, tdf5, tdf6 = (TicDatFactory(**x) for x in (schema2, schema3, schema4, schema5, schema6))
        tdf5.set_generator_tables(("a","c"))
        filePath = os.path.join(_scratchDir, "silly.xls")
        tdf.xls.write_file(ticDat, filePath)

        ticDat2 = tdf2.xls.create_tic_dat(filePath)
        self.assertFalse(tdf._same_data(ticDat, ticDat2))

        ticDat3 = tdf3.xls.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, ticDat3))

        ticDat4 = tdf4.xls.create_tic_dat(filePath)
        for t in ["a","b"]:
            for k,v in getattr(ticDat4, t).items() :
                for _k, _v in v.items() :
                    self.assertTrue(getattr(ticDat, t)[k][_k] == _v)
                if set(v) == set(getattr(ticDat, t)[k]) :
                    self.assertTrue(t == "b")
                else :
                    self.assertTrue(t == "a")

        ticDat5 = tdf5.xls.create_tic_dat(filePath)
        self.assertTrue(tdf5._same_data(tdf._keyless(ticDat), ticDat5))
        self.assertTrue(callable(ticDat5.a) and callable(ticDat5.c) and not callable(ticDat5.b))

        ticDat6 = tdf6.xls.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, ticDat6))
        self.assertTrue(firesException(lambda : tdf6._same_data(ticDat, ticDat6)))
        self.assertTrue(hasattr(ticDat6, "d") and utils.dictish(ticDat6.d))

        def writeData(data, write_header = True):
            import xlwt
            book = xlwt.Workbook()
            for t in tdf.all_tables :
                sheet = book.add_sheet(t)
                if write_header :
                    for i,f in enumerate(tdf.primary_key_fields.get(t, ()) + tdf.data_fields.get(t, ())) :
                        sheet.write(0, i, f)
                for rowInd, row in enumerate(data) :
                    for fieldInd, cellValue in enumerate(row):
                        sheet.write(rowInd+ (1 if write_header else 0), fieldInd, cellValue)
            if os.path.exists(filePath):
                os.remove(filePath)
            book.save(filePath)

        writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)])
        ticDatMan = tdf.xls.create_tic_dat(filePath, freeze_it=True)
        self.assertTrue(len(ticDatMan.a) == 2 and len(ticDatMan.b) == 3)
        self.assertTrue(ticDatMan.b[1, 20, 30]["bData"] == 40)
        rowCount = tdf.xls.get_duplicates(filePath)
        self.assertTrue(set(rowCount) == {'a'} and set(rowCount["a"]) == {1} and rowCount["a"][1]==2)

        writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)], write_header=False)
        self.assertTrue(self.firesException(lambda  : tdf.xls.create_tic_dat(filePath, freeze_it=True)))
        ticDatMan = tdf.xls.create_tic_dat(filePath, freeze_it=True, headers_present=False)
        self.assertTrue(len(ticDatMan.a) == 2 and len(ticDatMan.b) == 3)
        self.assertTrue(ticDatMan.b[1, 20, 30]["bData"] == 40)
        rowCount = tdf.xls.get_duplicates(filePath, headers_present=False)
        self.assertTrue(set(rowCount) == {'a'} and set(rowCount["a"]) == {1} and rowCount["a"][1]==2)

        ticDat.a["theboger"] = (1, None, 12)
        tdf.xls.write_file(ticDat, filePath, allow_overwrite=True)
        ticDatNone = tdf.xls.create_tic_dat(filePath, freeze_it=True)
        # THIS IS A FLAW - but a minor one. None's are hard to represent. It is turning into the empty string here.
        # not sure how to handle this, but documenting for now.
        self.assertFalse(tdf._same_data(ticDat, ticDatNone))
        self.assertTrue(ticDatNone.a["theboger"]["aData2"] == "")

        writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40), (1,20,30,12)])
        rowCount = tdf.xls.get_duplicates(filePath)
        self.assertTrue(set(rowCount) == {'a', 'b'} and set(rowCount["a"]) == {1} and rowCount["a"][1]==3)
        self.assertTrue(set(rowCount["b"]) == {(1,20,30)} and rowCount["b"][1,20,30]==2)
コード例 #24
0
 def apply_mapping(k):
     if containerish(k):
         return tuple(list(map(apply_mapping, k)))
     return mapping.get(k, k)
コード例 #25
0
    def testSilly(self):
        if not _can_accdb_unit_test:
            return
        tdf = TicDatFactory(**sillyMeSchema())
        ticDat = tdf.TicDat(**sillyMeData())
        filePath = os.path.join(_scratchDir, "silly.accdb")
        self.assertTrue(firesException(lambda : tdf.mdb.write_file(ticDat, makeCleanPath(filePath))))
        def sillyMeCleanData() :
            return {
                "a" : {"1" : (1, 2, "3"), "b" : (12, 12.2, "twelve"), "c" : (11, 12, "thirt")},
                "b" : {(1, 2, "3") : 1, (3, 4, "b") : 12},
                "c" : ((1, "2", 3, 4), (0.2, "b", 0.3, 0.4), (1.2, "b", 12, 24) )
            }
        ticDat = tdf.TicDat(**sillyMeCleanData())
        self.assertTrue(firesException(lambda : tdf.mdb.write_file(ticDat, makeCleanPath(filePath))))
        def makeCleanSchema() :
            tdf.mdb.write_schema(makeCleanPath(filePath), a={"aData3" : "text"},
                        b = {"bField1" : "int", "bField2" : "int"}, c={"cData2" : "text"})
            return filePath
        tdf.mdb.write_file(ticDat, makeCleanSchema())
        self.assertFalse(tdf.mdb.find_duplicates(filePath))
        accdbTicDat = tdf.mdb.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, accdbTicDat))

        schema2 = sillyMeSchema()
        schema2["b"][0] = ("bField2", "bField1", "bField3")
        schema3 = sillyMeSchema()
        schema3["a"][1] = ("aData2", "aData3", "aData1")
        schema4 = sillyMeSchema()
        schema4["a"][1] = ("aData1", "aData3")
        schema5 = sillyMeSchema()
        _tuple = lambda x : tuple(x) if utils.containerish(x) else (x,)
        for t in ("a", "b") :
            schema5[t][1] = _tuple(schema5[t][1]) + _tuple(schema5[t][0])
        schema5["a"][0], schema5["b"][0] =  (),  []
        schema6 = sillyMeSchema()
        schema6["d"] =  [["dField"],()]

        tdf2, tdf3, tdf4, tdf5, tdf6 = (TicDatFactory(**x) for x in (schema2, schema3, schema4, schema5, schema6))
        tdf5.set_generator_tables(("a","c"))

        ticDat2 = tdf2.mdb.create_tic_dat(filePath)
        self.assertFalse(tdf._same_data(ticDat, ticDat2))

        ticDat3 = tdf3.mdb.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, ticDat3))

        ticDat4 = tdf4.mdb.create_tic_dat(filePath)
        for t in ["a","b"]:
            for k,v in getattr(ticDat4, t).items() :
                for _k, _v in v.items() :
                    self.assertTrue(getattr(ticDat, t)[k][_k] == _v)
                if set(v) == set(getattr(ticDat, t)[k]) :
                    self.assertTrue(t == "b")
                else :
                    self.assertTrue(t == "a")

        ticDat5 = tdf5.mdb.create_tic_dat(filePath)
        self.assertTrue(tdf5._same_data(tdf._keyless(ticDat), ticDat5))
        self.assertTrue(callable(ticDat5.a) and callable(ticDat5.c) and not callable(ticDat5.b))

        self.assertTrue("table d" in self.firesException(lambda  : tdf6.mdb.create_tic_dat(filePath)))

        ticDat.a["theboger"] = (1, None, "twelve")
        tdf.mdb.write_file(ticDat, makeCleanSchema())
        ticDatNone = tdf.mdb.create_tic_dat(filePath, freeze_it=True)
        self.assertTrue(tdf._same_data(ticDat, ticDatNone))
        self.assertTrue(ticDatNone.a["theboger"]["aData2"] == None)
コード例 #26
0
ファイル: testcsv.py プロジェクト: Dr-Irv/opalytics-ticdat
        def doTest(headersPresent) :
            tdf = TicDatFactory(**sillyMeSchema())
            ticDat = tdf.TicDat(**sillyMeData())
            schema2 = sillyMeSchema()
            schema2["b"][0] = ("bField2", "bField1", "bField3")
            schema3 = sillyMeSchema()
            schema3["a"][1] = ("aData2", "aData3", "aData1")
            schema4 = sillyMeSchema()
            schema4["a"][1] = ("aData1", "aData3")
            schema5 = sillyMeSchema()
            _tuple = lambda x : tuple(x) if utils.containerish(x) else (x,)
            for t in ("a", "b") :
                schema5[t][1] = _tuple(schema5[t][1]) + _tuple(schema5[t][0])
            schema5["a"][0], schema5["b"][0] = (), []
            schema5b = sillyMeSchema()
            for t in ("a", "b") :
                schema5b[t][1] = _tuple(schema5b[t][0]) + _tuple(schema5b[t][1])
            schema5b["a"][0], schema5b["b"][0] = (), []
            schema6 = sillyMeSchema()
            schema6["d"] = [("dField",),[]]

            tdf2, tdf3, tdf4, tdf5, tdf5b, tdf6 = (TicDatFactory(**x) for x in
                            (schema2, schema3, schema4, schema5, schema5b, schema6))
            tdf5.set_generator_tables(["a", "c"])
            tdf5b.set_generator_tables(("a", "c"))


            dirPath = makeCleanDir(os.path.join(_scratchDir, "silly"))
            tdf.csv.write_directory(ticDat, dirPath, write_header=headersPresent)

            ticDat2 = tdf2.csv.create_tic_dat(dirPath, headers_present=headersPresent)
            (self.assertFalse if headersPresent else self.assertTrue)(tdf._same_data(ticDat, ticDat2))

            ticDat3 = tdf3.csv.create_tic_dat(dirPath, headers_present=headersPresent)
            (self.assertTrue if headersPresent else self.assertFalse)(tdf._same_data(ticDat, ticDat3))

            if headersPresent :
                ticDat4 = tdf4.csv.create_tic_dat(dirPath, headers_present=headersPresent)
                for t in ("a", "b") :
                    for k,v in getattr(ticDat4, t).items() :
                        for _k, _v in v.items() :
                            self.assertTrue(getattr(ticDat, t)[k][_k] == _v)
                        if set(v) == set(getattr(ticDat, t)[k]) :
                            self.assertTrue(t == "b")
                        else :
                            self.assertTrue(t == "a")
            else :
                self.assertTrue(self.firesException(lambda :
                                    tdf4.csv.create_tic_dat(dirPath, headers_present=headersPresent)))

            ticDat5 = tdf5.csv.create_tic_dat(dirPath, headers_present=headersPresent)
            (self.assertTrue if headersPresent else self.assertFalse)(
                                                    tdf5._same_data(tdf._keyless(ticDat), ticDat5))
            self.assertTrue(callable(ticDat5.a) and callable(ticDat5.c) and not callable(ticDat5.b))

            ticDat5b = tdf5b.csv.create_tic_dat(dirPath, headers_present=headersPresent)
            self.assertTrue(tdf5b._same_data(tdf._keyless(ticDat), ticDat5b))
            self.assertTrue(callable(ticDat5b.a) and callable(ticDat5b.c) and not callable(ticDat5b.b))


            ticDat6 = tdf6.csv.create_tic_dat(dirPath, headers_present=headersPresent)
            self.assertTrue(tdf._same_data(ticDat, ticDat6))
            self.assertTrue(firesException(lambda : tdf6._same_data(ticDat, ticDat6)))
            self.assertTrue(hasattr(ticDat6, "d") and utils.dictish(ticDat6.d))
            allDataTdf = TicDatFactory(**{t:[[], tdf.primary_key_fields.get(t, ()) + tdf.data_fields.get(t, ())]
                             for t in tdf.all_tables})

            def writeData(data):
                td = allDataTdf.TicDat(a = data, b=data, c=data)
                allDataTdf.csv.write_directory(td, dirPath, allow_overwrite=True, write_header=headersPresent)

            writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)])
            ticDatMan = tdf.csv.create_tic_dat(dirPath, headers_present=headersPresent, freeze_it=True)
            self.assertTrue(len(ticDatMan.a) == 2 and len(ticDatMan.b) == 3)
            self.assertTrue(ticDatMan.b[(1, 20, 30)]["bData"] == 40)
            rowCount = tdf.csv.get_duplicates(dirPath, headers_present= headersPresent)
            self.assertTrue(set(rowCount) == {'a'} and set(rowCount["a"]) == {1} and rowCount["a"][1]==2)


            writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40), (1,20,30,12)])
            rowCount = tdf.csv.get_duplicates(dirPath, headers_present=headersPresent)
            self.assertTrue(set(rowCount) == {'a', 'b'} and set(rowCount["a"]) == {1} and rowCount["a"][1]==3)
            self.assertTrue(set(rowCount["b"]) == {(1,20,30)} and rowCount["b"][1,20,30]==2)
コード例 #27
0
ファイル: testxls.py プロジェクト: austin-bren/ticdat
    def testSilly(self):
        if not self.can_run:
            return
        tdf = TicDatFactory(**sillyMeSchema())
        ticDat = tdf.TicDat(**sillyMeData())
        schema2 = sillyMeSchema()
        schema2["b"][0] = ("bField2", "bField1", "bField3")
        schema3 = sillyMeSchema()
        schema3["a"][1] = ("aData2", "aData3", "aData1")
        schema4 = sillyMeSchema()
        schema4["a"][1] = ("aData1", "aData3")
        schema5 = sillyMeSchema()
        _tuple = lambda x: tuple(x) if utils.containerish(x) else (x, )
        for t in ("a", "b"):
            schema5[t][1] = _tuple(schema5[t][1]) + _tuple(schema5[t][0])
        schema5["a"][0], schema5["b"][0] = (), []
        schema6 = sillyMeSchema()
        schema6["d"] = [["dField"], ()]

        tdf2, tdf3, tdf4, tdf5, tdf6 = (TicDatFactory(**x)
                                        for x in (schema2, schema3, schema4,
                                                  schema5, schema6))
        tdf5.set_generator_tables(("a", "c"))
        filePath = os.path.join(_scratchDir, "silly.xls")
        tdf.xls.write_file(ticDat, filePath)

        ticDat2 = tdf2.xls.create_tic_dat(filePath)
        self.assertFalse(tdf._same_data(ticDat, ticDat2))

        ticDat3 = tdf3.xls.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, ticDat3))

        ticDat4 = tdf4.xls.create_tic_dat(filePath)
        for t in ["a", "b"]:
            for k, v in getattr(ticDat4, t).items():
                for _k, _v in v.items():
                    self.assertTrue(getattr(ticDat, t)[k][_k] == _v)
                if set(v) == set(getattr(ticDat, t)[k]):
                    self.assertTrue(t == "b")
                else:
                    self.assertTrue(t == "a")

        ticDat5 = tdf5.xls.create_tic_dat(filePath,
                                          treat_inf_as_infinity=False)
        self.assertTrue(tdf5._same_data(tdf._keyless(ticDat), ticDat5))
        self.assertTrue(
            callable(ticDat5.a) and callable(ticDat5.c)
            and not callable(ticDat5.b))

        ticDat6 = tdf6.xls.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, ticDat6))
        self.assertTrue(
            firesException(lambda: tdf6._same_data(ticDat, ticDat6)))
        self.assertTrue(hasattr(ticDat6, "d") and utils.dictish(ticDat6.d))

        def writeData(data, write_header="same"):
            assert filePath.endswith(".xls")
            assert not write_header or write_header in ("lower", "same",
                                                        "duped")
            import xlwt
            book = xlwt.Workbook()
            for t in tdf.all_tables:
                sheet = book.add_sheet(t)
                if write_header:
                    all_fields = tdf.primary_key_fields.get(
                        t, ()) + tdf.data_fields.get(t, ())
                    for i, f in enumerate(
                        (2 if write_header == "duped" else 1) * all_fields):
                        sheet.write(
                            0, i,
                            f.lower() if write_header == "lower"
                            or i >= len(all_fields) else f)
                for rowInd, row in enumerate(data):
                    for fieldInd, cellValue in enumerate(
                        (2 if write_header == "duped" else 1) * row):
                        sheet.write(rowInd + (1 if write_header else 0),
                                    fieldInd, cellValue)
            if os.path.exists(filePath):
                os.remove(filePath)
            book.save(filePath)
            if write_header in [
                    "lower", "same"
            ]:  # will use pandas to generate the xlsx file version
                file_path_x = filePath + "x"
                if os.path.exists(file_path_x):
                    os.remove(file_path_x)
                writer = utils.pd.ExcelWriter(file_path_x)
                for t, (pks, dfs) in tdf.schema().items():
                    fields = pks + dfs
                    if write_header == "lower":
                        fields = [_.lower() for _ in fields]
                    d = {f: [] for f in fields}
                    for row in data:
                        for f, c in zip(fields, row):
                            d[f].append(c)
                    utils.pd.DataFrame(d).to_excel(writer, t, index=False)
                writer.save()

        writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)],
                  write_header="duped")
        self.assertTrue(
            self.firesException(
                lambda: tdf.xls.create_tic_dat(filePath, freeze_it=True)))

        writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)])
        ticDatMan = tdf.xls.create_tic_dat(filePath, freeze_it=True)
        self.assertTrue(len(ticDatMan.a) == 2 and len(ticDatMan.b) == 3)
        self.assertTrue(ticDatMan.b[1, 20, 30]["bData"] == 40)
        for f in [filePath, filePath + "x"]:
            rowCount = tdf.xls.find_duplicates(f)
            self.assertTrue(
                set(rowCount) == {'a'} and set(rowCount["a"]) == {1}
                and rowCount["a"][1] == 2)

        writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)],
                  write_header="lower")
        ticDatMan = tdf.xls.create_tic_dat(filePath, freeze_it=True)
        self.assertTrue(len(ticDatMan.a) == 2 and len(ticDatMan.b) == 3)
        self.assertTrue(ticDatMan.b[1, 20, 30]["bData"] == 40)
        for f in [filePath, filePath + "x"]:
            rowCount = tdf.xls.find_duplicates(f)
            self.assertTrue(
                set(rowCount) == {'a'} and set(rowCount["a"]) == {1}
                and rowCount["a"][1] == 2)

        writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)],
                  write_header=False)
        self.assertTrue(
            self.firesException(
                lambda: tdf.xls.create_tic_dat(filePath, freeze_it=True)))
        ticDatMan = tdf.xls.create_tic_dat(filePath,
                                           freeze_it=True,
                                           headers_present=False)
        self.assertTrue(len(ticDatMan.a) == 2 and len(ticDatMan.b) == 3)
        self.assertTrue(ticDatMan.b[1, 20, 30]["bData"] == 40)
        rowCount = tdf.xls.find_duplicates(filePath, headers_present=False)
        self.assertTrue(
            set(rowCount) == {'a'} and set(rowCount["a"]) == {1}
            and rowCount["a"][1] == 2)

        ticDat.a["theboger"] = (1, None, 12)
        tdf.xls.write_file(ticDat, filePath, allow_overwrite=True)
        ticDatNone = tdf.xls.create_tic_dat(filePath, freeze_it=True)
        # THIS IS A FLAW - but a minor one. None's are hard to represent. It is turning into the empty string here.
        # not sure how to handle this, but documenting for now.
        self.assertFalse(tdf._same_data(ticDat, ticDatNone))
        self.assertTrue(ticDatNone.a["theboger"]["aData2"] == "")
        # the workaround for this flaw is to set the data type to be nullabe but not allow the empty string
        tdfwa = TicDatFactory(**sillyMeSchema())
        tdfwa.set_data_type("a", "aData2", nullable=True)
        ticDatNone = tdfwa.xls.create_tic_dat(filePath, freeze_it=True)
        self.assertTrue(tdf._same_data(ticDat, ticDatNone))
        self.assertTrue(ticDatNone.a["theboger"]["aData2"] == None)

        # checking the same thing with .xlsx - using openpyxl, None is indeed recovered even without tdfwa munging!
        tdf.xls.write_file(ticDat, filePath + "x", allow_overwrite=True)
        ticDatNone = tdf.xls.create_tic_dat(filePath + "x", freeze_it=True)
        self.assertTrue(tdf._same_data(ticDat, ticDatNone))
        self.assertTrue(ticDatNone.a["theboger"]["aData2"] == None)
        ticDatNone = tdfwa.xls.create_tic_dat(filePath + "x", freeze_it=True)
        self.assertTrue(tdf._same_data(ticDat, ticDatNone))
        self.assertTrue(ticDatNone.a["theboger"]["aData2"] == None)

        writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40),
                   (1, 20, 30, 12)])
        for f in [filePath, filePath + "x"]:
            rowCount = tdf.xls.find_duplicates(f)
            self.assertTrue(
                set(rowCount) == {'a', 'b'} and set(rowCount["a"]) == {1}
                and rowCount["a"][1] == 3)
            self.assertTrue(
                set(rowCount["b"]) == {(1, 20, 30)}
                and rowCount["b"][1, 20, 30] == 2)
コード例 #28
0
        def doTest(headersPresent) :
            tdf = TicDatFactory(**sillyMeSchema())
            ticDat = tdf.TicDat(**sillyMeData())
            schema2 = sillyMeSchema()
            schema2["b"][0] = ("bField2", "bField1", "bField3")
            schema3 = sillyMeSchema()
            schema3["a"][1] = ("aData2", "aData3", "aData1")
            schema4 = sillyMeSchema()
            schema4["a"][1] = ("aData1", "aData3")
            schema5 = sillyMeSchema()
            _tuple = lambda x : tuple(x) if utils.containerish(x) else (x,)
            for t in ("a", "b") :
                schema5[t][1] = _tuple(schema5[t][1]) + _tuple(schema5[t][0])
            schema5["a"][0], schema5["b"][0] = (), []
            schema5b = sillyMeSchema()
            for t in ("a", "b") :
                schema5b[t][1] = _tuple(schema5b[t][0]) + _tuple(schema5b[t][1])
            schema5b["a"][0], schema5b["b"][0] = (), []
            schema6 = sillyMeSchema()
            schema6["d"] = [("dField",),[]]

            tdf2, tdf3, tdf4, tdf5, tdf5b, tdf6 = (TicDatFactory(**x) for x in
                            (schema2, schema3, schema4, schema5, schema5b, schema6))
            tdf5.set_generator_tables(["a", "c"])
            tdf5b.set_generator_tables(("a", "c"))


            dirPath = makeCleanDir(os.path.join(_scratchDir, "silly"))
            tdf.csv.write_directory(ticDat, dirPath, write_header=headersPresent)

            ticDat2 = tdf2.csv.create_tic_dat(dirPath, headers_present=headersPresent)
            (self.assertFalse if headersPresent else self.assertTrue)(tdf._same_data(ticDat, ticDat2))

            ticDat3 = tdf3.csv.create_tic_dat(dirPath, headers_present=headersPresent)
            (self.assertTrue if headersPresent else self.assertFalse)(tdf._same_data(ticDat, ticDat3))

            if headersPresent :
                ticDat4 = tdf4.csv.create_tic_dat(dirPath, headers_present=headersPresent)
                for t in ("a", "b") :
                    for k,v in getattr(ticDat4, t).items() :
                        for _k, _v in v.items() :
                            self.assertTrue(getattr(ticDat, t)[k][_k] == _v)
                        if set(v) == set(getattr(ticDat, t)[k]) :
                            self.assertTrue(t == "b")
                        else :
                            self.assertTrue(t == "a")
            else :
                self.assertTrue(self.firesException(lambda :
                                    tdf4.csv.create_tic_dat(dirPath, headers_present=headersPresent)))

            ticDat5 = tdf5.csv.create_tic_dat(dirPath, headers_present=headersPresent)
            (self.assertTrue if headersPresent else self.assertFalse)(
                                                    tdf5._same_data(tdf._keyless(ticDat), ticDat5))
            self.assertTrue(callable(ticDat5.a) and callable(ticDat5.c) and not callable(ticDat5.b))

            ticDat5b = tdf5b.csv.create_tic_dat(dirPath, headers_present=headersPresent)
            self.assertTrue(tdf5b._same_data(tdf._keyless(ticDat), ticDat5b))
            self.assertTrue(callable(ticDat5b.a) and callable(ticDat5b.c) and not callable(ticDat5b.b))


            ticDat6 = tdf6.csv.create_tic_dat(dirPath, headers_present=headersPresent)
            self.assertTrue(tdf._same_data(ticDat, ticDat6))
            self.assertTrue(firesException(lambda : tdf6._same_data(ticDat, ticDat6)))
            self.assertTrue(hasattr(ticDat6, "d") and utils.dictish(ticDat6.d))
            allDataTdf = TicDatFactory(**{t:[[], tdf.primary_key_fields.get(t, ()) + tdf.data_fields.get(t, ())]
                             for t in tdf.all_tables})

            def writeData(data):
                td = allDataTdf.TicDat(a = data, b=data, c=data)
                allDataTdf.csv.write_directory(td, dirPath, allow_overwrite=True, write_header=headersPresent)

            writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)])
            ticDatMan = tdf.csv.create_tic_dat(dirPath, headers_present=headersPresent, freeze_it=True)
            self.assertTrue(len(ticDatMan.a) == 2 and len(ticDatMan.b) == 3)
            self.assertTrue(ticDatMan.b[(1, 20, 30)]["bData"] == 40)
            rowCount = tdf.csv.find_duplicates(dirPath, headers_present= headersPresent)
            self.assertTrue(set(rowCount) == {'a'} and set(rowCount["a"]) == {1} and rowCount["a"][1]==2)


            writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40), (1,20,30,12)])
            rowCount = tdf.csv.find_duplicates(dirPath, headers_present=headersPresent)
            self.assertTrue(set(rowCount) == {'a', 'b'} and set(rowCount["a"]) == {1} and rowCount["a"][1]==3)
            self.assertTrue(set(rowCount["b"]) == {(1,20,30)} and rowCount["b"][1,20,30]==2)