Beispiel #1
0
 def setUp(self):
     self.multitable_db_filepath = getTestFileName(
         'four_tables_cnx_metadata')
     self.multitable_db = Datafile(self.multitable_db_filepath, debug=True)
     self.tables = getTestFileTables('four_tables_cnx_metadata')
     if len(self.tables) <= 1:
         raise Exception('Bad test db setup -- should contain >1 tables')
Beispiel #2
0
class TestGetSingularTableZeroTables(TestCase):
    def setUp(self):
        self.zerotable_db_filepath = getTestFileName('zero_tables')
        self.zerotable_db = Datafile(self.zerotable_db_filepath, debug=True)
        self.tables = getTestFileTables('zero_tables')
        if len(self.tables) != 0:
            raise Exception('Bad test db setup -- should contain 0 tables')

    def testZeroFail(self):
        self.zerotable_db.openConnection()
        self.assertRaises(Exception, self.zerotable_db.getSingularTable)
Beispiel #3
0
class TestGetSingularTableMultipleTables(TestCase):
    def setUp(self):
        self.multitable_db_filepath = getTestFileName(
            'four_tables_cnx_metadata')
        self.multitable_db = Datafile(self.multitable_db_filepath, debug=True)
        self.tables = getTestFileTables('four_tables_cnx_metadata')
        if len(self.tables) <= 1:
            raise Exception('Bad test db setup -- should contain >1 tables')

    def testMultiFail(self):
        self.multitable_db.openConnection()
        self.assertRaises(Exception, self.multitable_db.getSingularTable)
Beispiel #4
0
class TestGetSingularTableSuccess(TestCase):
    def setUp(self):
        self.onetable_db_filepath = getTestFileName('single_simple_table')
        self.onetable_db = Datafile(self.onetable_db_filepath, debug=True)
        self.tables = getTestFileTables('single_simple_table')
        if len(self.tables) != 1:
            raise Exception('Bad test db setup -- should only contain 1 table')

    def testSuccess(self):
        self.onetable_db.openConnection()
        expected = self.tables[0]
        actual = self.onetable_db.getSingularTable()
        self.onetable_db.closeConnection()
        self.assertEqual(expected, actual)
 def setUp(self):
     self.nonexistent_db_filepath = getTestFileName("nonexistent_sqlite_file")
     try:
         os.remove(self.nonexistent_db_filepath)
     except:
         pass
     self.nonexistent_db = Datafile(self.nonexistent_db_filepath, debug=True)
Beispiel #6
0
    def read(self, incoming_connection_name, batch_size=1):

        if self.debug:
            print(
                'Attempting to read in cached data for incoming connection "{}"'
                .format(incoming_connection_name))

        # get the filepath of the data
        input_data_metadata = self.__getIncomingConnectionMetadata(
            incoming_connection_name)
        input_data_filename = input_data_metadata["filename"]
        input_data_filetype = input_data_metadata["filetype"]
        # create datafile object
        # (by not specifying the fileformat paramter, it will assume the file
        # type from the file's extension)
        with Datafile(input_data_filename,
                      fileformat=input_data_filetype,
                      debug=self.debug) as db:
            msg_action = 'reading input data "{}"'.format(
                incoming_connection_name)
            try:
                # get the data from the sql db (if only one table exists, no need to specify the table name)
                data = db.getData(batch_size=batch_size)
                # print success message
                print("".join(["SUCCESS: ", msg_action]))
                # return the data
                return data
            except:
                print("".join(["ERROR: ", msg_action]))
                raise
Beispiel #7
0
class TestSqliteConnectionOpen(TestCase):
    def setUp(self):
        self.existing_db_filepath = getTestFileName('single_simple_table')
        self.existing_db = Datafile(self.existing_db_filepath, debug=True)
        self.existing_db.openConnection()

    def testConnectionCreated(self):
        self.assertTrue(
            hasattr(self.existing_db, 'connection')
            and type(self.existing_db.connection) is DbConnection)

    def testConnectionClosed(self):
        self.existing_db.closeConnection()
        self.assertFalse(
            hasattr(self.existing_db, 'connection')
            and type(self.existing_db.connection) is DbConnection)
Beispiel #8
0
 def testAyxWriteDataContents(self):
     write(self.data, self.connection)
     expected = self.data
     with Datafile(self.filename, create_new=False) as result_db:
         actual = result_db.getData()
     print(expected.head())
     print(actual.head())
     pandas.testing.assert_frame_equal(expected, actual)
Beispiel #9
0
 def testGetTableNames(self):
     for datafile in self.datafiles:
         # create temp db connection
         db = Datafile(getTestFileName(datafile))
         db.openConnection()
         # generate expected/actual results
         expected = getTestFileTables(datafile)
         actual = db.getTableNames()
         # close db connection
         db.closeConnection()
         # assert table names match
         self.assertCountEqual(expected, actual)
 def setUp(self):
     self.invalid_db_filepath = getTestFileName("invalid_sqlite_file")
     self.invalid_db = Datafile(self.invalid_db_filepath, debug=True)
Beispiel #11
0
 def setUp(self):
     self.onetable_db_filepath = getTestFileName('single_simple_table')
     with Datafile(self.onetable_db_filepath, debug=True) as db:
         self.data = db.getData()
Beispiel #12
0
    def readMetadata(self, incoming_connection_name):
        if self.debug:
            print(
                'Attempting to get (cached) metadata for for incoming connection "{}"'
                .format(incoming_connection_name))
        # create a flag indicating whether input is a pandas dataframe
        pandas_df_input_flag = isinstance(incoming_connection_name,
                                          pd.core.frame.DataFrame)

        # if the input is a dataframe, then write the first row to a temporary
        # sqlite file, and get the metadata from it
        if pandas_df_input_flag:
            input_df_head = incoming_connection_name.head(1)
            temp_table_name = str(uuid1())
            filetype = self.output_datafile_format["filetype"]
            temp_file_path = ".".join(
                [temp_table_name, self.output_datafile_format["extension"]])
            with Datafile(
                    temp_file_path,
                    create_new=True,
                    temporary=True,
                    fileformat=filetype,
                    debug=self.debug,
            ) as db:
                db.writeData(input_df_head, "data")
                raw_metadata = db.getMetadata()
        # otherwise, if not a dataframe, assume input argument value is a
        # connection name string (function called will validate string type)
        else:
            pandas_df_input_flag = False
            # get the filepath of the data
            input_data_metadata = self.__getIncomingConnectionMetadata(
                incoming_connection_name)
            input_data_filename = input_data_metadata["filename"]
            filetype = input_data_metadata["filetype"]
            # get the data from the sqlite file
            with Datafile(
                    input_data_filename,
                    create_new=False,
                    fileformat=filetype,
                    debug=self.debug,
            ) as db:
                raw_metadata = db.getMetadata()

        # initiate the a MetadataTools object
        metadata_tools = MetadataTools(debug=self.debug)
        metadata_dict = {}
        # for index, field in raw_metadata.iterrows():
        for index, field in enumerate(raw_metadata):
            if pandas_df_input_flag:
                if field["name"] == str(input_df_head.columns[index]):
                    field_name = input_df_head.columns[index]
                else:
                    raise ReferenceError(" ".join([
                        "error: pandas dataframe columns appear",
                        "to be in a different order than the correspond",
                        "datafile table for some reason...",
                        "> pandas dataframe columns: {}".format(
                            input_df_head.columns),
                        "> datafile dataframe columns: {}".format(
                            list(raw_metadata["name"])),
                    ]))
            else:
                field_name = field["name"]
            field_type_str = field["type"]

            # parse out field type (str) and length (tuple) from string
            field_type_and_length_d = metadata_tools.parseFieldTypeAndLengthStr(
                field_type_str, context=filetype)
            field_type = field_type_and_length_d["type"]
            field_length = field_type_and_length_d["length"]
            # set metadata
            conversion = metadata_tools.convertTypeString(
                "{} {}".format(field_type, field_length),
                from_context=filetype,
                to_context="yxdb",
            )
            metadata_dict[field_name] = {
                "type": conversion["type"],
                "length": conversion["length"],
            }
            updated_field_metadata = metadata_tools.supplementWithDefaultLengths(
                metadata_dict[field_name]["type"],
                metadata_dict[field_name]["length"],
                context="yxdb",
            )
            updated_field_metadata[
                "length"] = metadata_tools.convertLengthTupleToContext(
                    updated_field_metadata["length"], context="yxdb")
            metadata_dict[field_name] = updated_field_metadata

            # now deal with source...
            if "source" not in field:
                metadata_dict[field_name]["source"] = None
            else:
                metadata_dict[field_name]["source"] = field["source"]
            # ...and description
            if "description" not in field:
                metadata_dict[field_name]["description"] = None
            else:
                metadata_dict[field_name]["description"] = field["description"]

        if self.debug:
            print("CachedData.readMetadata({}): {}".format(
                incoming_connection_name, metadata_dict))

        return metadata_dict
Beispiel #13
0
    def write(self,
              pandas_df,
              outgoing_connection_number,
              batch_size=1,
              columns=None):

        if self.debug:
            print(
                'Alteryx.write() -- attempting to write out cached data to outgoing connection "{}"'
                .format(outgoing_connection_number))

        try:
            outgoing_connection_number = self.__checkOutgoingConnectionNumber__(
                outgoing_connection_number)
            if pandas_df is None:
                raise TypeError(
                    "A pandas dataframe is required for passing data to outgoing connections in Alteryx"
                )
            elif not isinstance(pandas_df, pd.core.frame.DataFrame):
                raise TypeError(
                    "Currently only pandas dataframes can be used to pass data to outgoing connections in Alteryx"
                )
        except Exception as err:
            print(
                "ERROR: Alteryx.write(pandas_df, outgoing_connection_number):")
            print(err)
            raise

        if columns is None:
            pass
        elif not isinstance(columns, dict):
            raise TypeError(
                "columns (metadata) is optional, but if provided, must be a dict or list"
            )

        # get list of columns in input data frame
        pandas_cols = list(pandas_df.columns)

        if self.debug:
            print("pandas_df.columns:")
            print(pandas_df.columns)
            # print(dir(pandas_df.columns.dtype))
            # print(pandas_df.columns.name)
            for index, colname in enumerate(pandas_df.columns):
                coltype = pandas_df.dtypes[index]
                print("  {}: {}".format(colname, coltype))
                # print("  {}: {}".format(col, pandas_df.columns[col]))

        metadata_tools = MetadataTools(debug=self.debug)
        expected_column_attributes = ["name", "type", "length"]

        cols_tmp = {}

        from_context = "pandas"
        to_context = "yxdb"
        for index, colname in enumerate(pandas_df.columns):
            coltype = str(pandas_df.dtypes[index])
            try:
                db_col_metadata = metadata_tools.convertTypeString(
                    coltype, from_context=from_context, to_context=to_context)
                yxdb_type = db_col_metadata["type"]
                yxdb_length = db_col_metadata["length"]
                cols_tmp[colname] = {
                    "name": colname,
                    "type": yxdb_type,
                    "length": yxdb_length,
                }
            except:
                print(
                    'couldn\'t find conversion for {} ("{}") from {} to {} -> skipping'
                    .format(colname, coltype, from_context, to_context))

            # include any metadata provided
            new_column_info = None
            if isinstance(columns, dict) and colname in columns:
                new_column_info = columns[colname]
            elif isinstance(columns, list) and index < len(columns):
                new_column_info = columns[index]
            if new_column_info is not None:
                for updated_attr in new_column_info:
                    cols_tmp[colname][updated_attr] = new_column_info[
                        updated_attr]

            if "type" in cols_tmp:
                new_type = cols_tmp["type"]
            else:
                new_type = None
            if "length" in cols_tmp:
                new_length = cols_tmp["length"]
            else:
                new_length = None

            if self.debug:
                print(
                    "[CachedData.write] name: {}, type/length: {}, from_context: {}, to_context: {} -> type: {}, length: {}"
                    .format(colname, coltype, from_context, to_context,
                            new_type, new_length))

        renames = {}
        write_metadata = {}

        from_context = "yxdb"
        to_context = self.output_datafile_format["filetype"]

        for colname in cols_tmp:
            col_metadata = cols_tmp[colname]
            if self.debug:
                print(
                    "[CachedData.write] name: {}, from_context: {}, to_context: {}, metadata: {}"
                    .format(colname, from_context, to_context, col_metadata))

            col_name = str(
                col_metadata["name"]
            )  # coerce unnamed (ordered) columns from int to str
            col_type = col_metadata["type"]
            col_length = col_metadata["length"]

            # using the *new* column name for metadata
            write_metadata[col_name] = {}

            # copy any non-name/type/length attributes to write_metadata dict (eg, source, description)
            for attr in col_metadata.keys():
                if attr not in ["name", "type", "length"]:
                    write_metadata[col_name][attr] = col_metadata[attr]

            # if name changed, add to renames dict
            if col_name != colname:
                renames[colname] = col_name

            conversion = metadata_tools.convertTypeString(
                "{} {}".format(col_type, col_length),
                from_context=from_context,
                to_context=to_context,
            )

            # supplement with default column type lengths
            type_lengths = metadata_tools.supplementWithDefaultLengths(
                conversion["type"], conversion["length"], context=to_context)
            new_type = type_lengths["type"]
            new_length = type_lengths["length"]

            if self.debug:
                print("\n-----\n{}\n------\n".format(type_lengths))

            # concatenate type and length
            col_type_length = None
            if new_length is not None and len(str(new_length)) > 0:
                if new_type is None or len(str(new_type)) == 0:
                    raise ValueError("cannot set a column length without type")
                col_type_length = "{} {}".format(new_type, new_length)
            elif new_type is not None and len(str(new_type)) > 0:
                col_type_length = new_type

            # convert type/length to output format
            if col_type_length is not None:
                try:
                    db_col_metadata = metadata_tools.convertTypeString(
                        col_type_length,
                        from_context=to_context,
                        to_context=to_context)
                    db_col_type_only = db_col_metadata["type"]
                    db_col_length_only = db_col_metadata["length"]

                    if self.debug:
                        print("\n-----\n{}\n------\n".format(db_col_metadata))

                    # concatenate type and length (use default if necessary)
                    db_col_type = metadata_tools.concatTypeLength(
                        db_col_type_only,
                        db_col_length_only,
                        context=to_context)

                    # set in dtypes dict
                    write_metadata[col_name]["type"] = db_col_type_only
                    write_metadata[col_name]["length"] = db_col_length_only
                    write_metadata[col_name]["type_length"] = db_col_type

                    if self.debug:
                        print(
                            "[CachedData.write] name: {}, from_context: {}, to_context: {}, {} -> {}}"
                            .format(
                                colname,
                                from_context,
                                to_context,
                                col_type_length,
                                db_col_type,
                            ))
                except:
                    if self.debug:
                        print(
                            '[CachedData.write] unable to convert {} ("{}") from {} to {} -> skipping'
                            .format(colname, col_type_length, from_context,
                                    to_context))

        if len(write_metadata.keys()) == 0:
            write_metadata = None

        if len(renames.keys()) == 0:
            renames = None
            pandas_df_out = pandas_df
        else:
            if self.debug:
                print("renaming columns before output:")
                print(renames)
            pandas_df_out = pandas_df.rename(columns=renames, inplace=False)

        # create custom sqlite object
        # (TODO: update to yxdb)
        with Datafile(
                "output_{}.{}".format(
                    outgoing_connection_number,
                    self.output_datafile_format["extension"]),
                create_new=True,
                debug=self.debug,
        ) as db:
            msg_action = "writing outgoing connection data {}".format(
                outgoing_connection_number)
            try:
                # get the data from the sql db (if only one table exists, no need to specify the table name)
                data = db.writeData(
                    pandas_df_out,
                    "data",
                    metadata=write_metadata,
                    batch_size=batch_size,
                )
                # print success message
                print("".join(["SUCCESS: ", msg_action]))
                # return the data
                return data
            except:
                print("".join(["ERROR: ", msg_action]))
                raise
Beispiel #14
0
 def setUp(self):
     self.existing_db_filepath = getTestFileName('single_simple_table')
     self.existing_db = Datafile(self.existing_db_filepath, debug=True)
     self.existing_db.openConnection()
Beispiel #15
0
 def setUp(self):
     self.zerotable_db_filepath = getTestFileName('zero_tables')
     self.zerotable_db = Datafile(self.zerotable_db_filepath, debug=True)
     self.tables = getTestFileTables('zero_tables')
     if len(self.tables) != 0:
         raise Exception('Bad test db setup -- should contain 0 tables')
Beispiel #16
0
 def setUp(self):
     self.existing_db_filepath = getTestFileName('single_simple_table')
     self.existing_db = Datafile(self.existing_db_filepath)
Beispiel #17
0
 def setUp(self):
     self.onetable_db_filepath = getTestFileName('single_simple_table')
     self.onetable_db = Datafile(self.onetable_db_filepath, debug=True)
     self.tables = getTestFileTables('single_simple_table')
     if len(self.tables) != 1:
         raise Exception('Bad test db setup -- should only contain 1 table')