コード例 #1
0
 def getSingularTable(self):
     if self.debug:
         print(
             "Attempting to find the name of the table in the Datafile (assuming only one table exists)"
         )
     tables = self.getTableNames()
     table_count = len(tables)
     # if no tables exist throw error
     if table_count == 0:
         raise ValueError(
             fileErrorMsg("Datafile does not contain any tables",
                          self.filepath))
     # if multiple tables exist, throw error
     elif table_count > 1:
         raise ValueError(
             fileErrorMsg(
                 "Datafile should only contain 1 table, but instead has multiple: {}"
                 .format(tables),
                 self.filepath,
             ))
     # return table name only if only one table exists
     elif table_count == 1:
         table = tables[0]
         if self.debug:
             print(
                 "One table was found in Datafile -- the table name is: {}".
                 format(table))
         return table
コード例 #2
0
    def __isConnectionOpen(self, error_if_closed=None):
        if self.debug:
            if hasattr(self, "connection"):
                print("Connection status: {}".format(self.connection))
            else:
                print("Connection status: [doesn't exist]")
        if error_if_closed is None:
            error_if_closed = False
        # if there's not even a connection attribute, return false
        if not hasattr(self, "connection"):
            connection_is_open = False
        elif self.connection is None:
            connection_is_open = False
        else:
            if isinstance(self.connection, self.fileformat.connection_class):
                connection_is_open = True
            else:
                raise AttributeError("\n".join([
                    "connection type appears to be invalid:",
                    "> Expected: {}".format(self.fileformat.connection_class),
                    "> Actual: {}".format(type(self.connection)),
                ]))

        if error_if_closed and not (connection_is_open):
            raise AttributeError(
                fileErrorMsg("datafile connection is closed", self.filepath))

        return connection_is_open
コード例 #3
0
    def __returnConnection(self):
        error_msg = "Unable to connect to input data"
        try:
            # if file exists, and not creating a new db, then throw error
            if self.create_new:
                if os.path.isfile(self.filepath):
                    try:
                        deleteFile(self.filepath, debug=self.debug)
                    except:
                        raise PermissionError(
                            "unable to delete file: {}".format(self.filepath))

                    if not os.access(os.path.dirname(self.filepath), os.W_OK):
                        raise PermissionError(
                            "unable to write to filepath: {}".format(
                                self.filepath))
                    return None

            elif fileExists(self.filepath,
                            throw_error=not (self.create_new),
                            msg=error_msg):
                # open connection and attempt to read one row
                # to confirm that it is a valid file

                if self.fileformat.filetype == "sqlite":
                    connection = sqlite3.connect(self.filepath)
                    connection.execute("select * from sqlite_master limit 1")
                elif self.fileformat.filetype == "yxdb":
                    connection = pyxdb.AlteryxYXDB()
                    connection.open(self.filepath)
                    # test that its a real yxdb file
                    connection.get_num_records()
                else:
                    self.__formatNotSupportedYet()
                return connection
        except:
            try:
                connection.close()
            except:
                pass
            raise ConnectionError(fileErrorMsg(error_msg, self.filepath))
コード例 #4
0
    def writeData(self, pandas_df, table, metadata=None, batch_size=1):
        if self.debug:
            print(
                '[CachedData.writeData] Attempting to write data to table "{}"'
                .format(table))
            print("[Datafile.writeData] metadata: {}".format(metadata))
        try:
            if self.fileformat.filetype == "sqlite":
                self.__createConnection()

                # prepare dtype arg for pandas
                dtypes = {}
                if isinstance(metadata, dict):
                    for col in metadata:
                        col_metadata = metadata[col]
                        if "name" in col_metadata:
                            name = col_metadata["name"]
                        else:
                            name = col
                        if "type_length" in col_metadata:
                            type_length = col_metadata["type_length"]
                            if type_length is not None:
                                dtypes[name] = type_length
                if len(dtypes.keys()) == 0:
                    dtypes = None

                if self.debug:
                    print("[Datafile.writeData] dtypes: {}".format(dtypes))

                # write to database
                pandas_df.to_sql(
                    table,
                    self.connection,
                    if_exists="replace",
                    index=False,
                    dtype=dtypes,
                )
            elif self.fileformat.filetype == "yxdb":
                # prepare metadata dict for AlteryxYXDB().create_from_dict (list)
                metadata_list = []
                column_conversions = {}
                pythontool_source = "PythonTool:"
                for index, col in enumerate(metadata.keys()):
                    metadata_col = metadata[col]
                    if self.debug:
                        print("\n[Datafile.writeData] input column: {}".format(
                            metadata_col))
                    field_name = col
                    alteryx_type = metadata_col["type"]
                    field_type = pyxdbLookupFieldTypeEnum(alteryx_type)
                    field_length = metadata_col["length"]
                    field_size = field_length[0]
                    if len(field_length) > 1:
                        field_scale = int(field_length[1])
                    else:
                        field_scale = -1
                    # prepare source metadata
                    if "source" in metadata_col:
                        source = metadata_col["source"]
                    else:
                        source = ""
                    if len(source) > 0:
                        if source[-1:] == ":":
                            source = "{}{}".format(source, pythontool_source)
                    else:
                        source = pythontool_source
                    if "description" in metadata_col:
                        description = metadata_col["description"]
                    else:
                        description = ""

                    yxdb_metadata = {
                        "name": field_name,
                        "type": field_type,
                        "size": field_size,
                        "scale": field_scale,
                        "source": source,
                        "description": description,
                    }

                    if alteryx_type == "Boolean":
                        column_conversions[index] = "bool"
                    elif alteryx_type in ("Byte", "Int16", "Int32", "Int64"):
                        column_conversions[index] = "int"
                    elif alteryx_type in ("Float", "Fixed Decimal", "Double"):
                        column_conversions[index] = "float"

                    if self.debug:
                        print("[Datafile.writeData] yxdb column: {}".format(
                            yxdb_metadata))

                    metadata_list.append(yxdb_metadata)
                if self.debug:
                    print("\nmetadata_list: {}".format(metadata_list))
                    print(
                        "\ncolumn_conversions: {}".format(column_conversions))

                try:
                    self.__createConnection(metadata_list)
                    row_count = pandas_df.shape[0]
                    if self.debug:
                        print("[Datafile.writeData] row count: {}".format(
                            row_count))
                    rows_i = 0
                    for i in range(row_count):
                        if self.debug:
                            print("[Datafile.writeData] i: {}".format(i))

                        # get row (as list) from pandas dataframe
                        row = list(pandas_df.iloc[i])
                        # convert numeric types to base python (instead of numpy types, which are used by pandas)
                        for col_i in column_conversions:
                            if pd.isnull(row[col_i]):
                                row[col_i] = None
                            else:
                                row[col_i] = getattr(
                                    builtins,
                                    column_conversions[col_i])(row[col_i])

                        if batch_size == 1:
                            self.connection.append_record(row)
                        else:
                            if rows_i == 0:
                                rows = [None] * batch_size
                            rows[rows_i] = row
                            rows_i += 1
                            if rows_i == batch_size:
                                self.connection.append_records(rows)
                            elif i == row_count - 1:
                                self.connection.append_records(rows[:rows_i])
                except:
                    raise
                finally:
                    try:
                        self.connection.close()
                    except:
                        pass
            else:
                self.__formatNotSupportedYet()
            if self.debug:
                print(
                    fileErrorMsg(
                        'Success writing output table "{}"'.format(table),
                        self.filepath))
            return pandas_df
        except:
            print(
                fileErrorMsg(
                    'Error: unable to write output table "{}"'.format(table),
                    self.filepath,
                ))
            raise
コード例 #5
0
    def getData(self, table=None, batch_size=1):
        if self.debug:
            print('Attempting to get data from table "{}"'.format(table))

        self.__isConnectionOpen(error_if_closed=True)

        # if no table specified, check to see if there is only table and use that
        if table is None:
            table = self.getSingularTable()

        self.__validateTableName(table)

        # now that the table name has been retrieved, get the data as pandas df
        try:
            if self.fileformat.filetype == "sqlite":
                query_result = pd.read_sql_query(
                    "select * from {}".format(table), self.connection)
            elif self.fileformat.filetype == "yxdb":
                # (another temporary solution)
                # get metadata (column names)
                # colnames = list(self.getMetadata()["name"])
                colnames = [col["name"] for col in self.getMetadata()]
                # reset pointer back to first line
                self.openConnection()
                num_records = self.connection.get_num_records()

                if num_records > 0:
                    self.connection.go_record(0)

                # get number of records in dataset
                # read in records in batch
                i = 0
                data = [None] * num_records
                while i < num_records:
                    if batch_size == 1:
                        data[i] = self.connection.read_record()
                    else:
                        if i + batch_size < num_records:
                            data[i:i +
                                 batch_size] = self.connection.read_records(
                                     batch_size)
                        else:
                            data[i:] = self.connection.read_records(
                                num_records - i)
                    i += batch_size

                query_result = pd.DataFrame(data, columns=colnames)

                # # get the actual data
                # try:
                #     # try to load all records at once
                #     num_records = self.connection.get_num_records()
                #     query_result = pd.DataFrame(
                #         self.connection.read_records(num_records), columns=colnames
                #     )
                # except:
                #     # if unable to do that, load one record at a time
                #     data = []
                #     while True:
                #         data.append(self.connection.read_record())
                #         if data[-1] == []:
                #             break
                #     query_result = pd.DataFrame(data, columns=colnames)

                # # if unable to do that, load one record at a time
                # data = []
                # while True:
                #     row = self.connection.read_record()
                #     if row == []:
                #         break
                #     else:
                #         data.append(row)
                #
                # query_result = pd.DataFrame(data, columns=colnames)

            else:
                self.__formatNotSupportedYet()

            if self.debug:
                print(
                    fileErrorMsg(
                        'Success reading input table "{}" '.format(table),
                        self.filepath))
            return query_result
        except:
            print(
                fileErrorMsg(
                    'Error: unable to read input table "{}"'.format(table),
                    self.filepath,
                ))
            raise
コード例 #6
0
    def getMetadata(self, table=None):
        if self.debug:
            print('Attempting to get metadata from table "{}"'.format(table))

        self.__isConnectionOpen(error_if_closed=True)

        if self.fileformat.filetype == "yxdb" and table is not None:
            raise ValueError(" ".join([
                "specifying a table name ({})".format(table),
                "for a yxdb file ({}) does not make sense".format(
                    self.filename),
            ]))
        # if no table specified, check to see if there is only table and use that
        if table is None:
            table = self.getSingularTable()

        self.__validateTableName(table)

        try:
            if self.fileformat.filetype == "sqlite":
                query = "pragma table_info({})"
                if self.debug:
                    print("getMetadata (sqlite query): {}".format(query))
                query_result = pd.read_sql_query(query.format(table),
                                                 self.connection)
                if self.debug:
                    print("getMetadata (sqlite query result): {}".format(
                        query_result))
                # now massage that pragma query result into our expected format...
                column_metadata_list = []
                for index, field in query_result.iterrows():
                    field_dict = {
                        "name": field["name"],
                        # type is a string concatenation of "{type} {size}.{scale}"
                        "type": field["type"],
                        # source and description are lost/unavailable with sqlite
                        "source": "",
                        "description": "",
                    }
                    column_metadata_list.append(field_dict)
            elif self.fileformat.filetype == "yxdb":

                # reset pointer back to first line
                self.openConnection()
                raw_metadata = self.connection.get_record_meta()

                # now massage the metadata returned into our expected format...
                column_metadata_list = []
                for column_metadata in raw_metadata:
                    # concatenate type, size, and scale appropriately
                    # (because this is what was expected with sqlite... the value
                    # is then parsed, validated, and transformed. it works, and
                    # it doesn't seem rewriting for this because getting metadata
                    # is something that happens with relative infrequency)
                    type_name = str(column_metadata["type"])
                    size = column_metadata["size"]
                    scale = column_metadata["scale"]
                    if column_metadata["scale"] > 0:
                        length = "{}.{}".format(size, scale)
                    else:
                        length = str(size)
                    type_length = "{} ({})".format(type_name, length)

                    field_dict = {
                        "name": column_metadata["name"],
                        "type": type_length,
                        "source": column_metadata["source"],
                        "description": column_metadata["description"],
                    }
                    column_metadata_list.append(field_dict)
            else:
                self.__formatNotSupportedYet()
            if self.debug:
                print(
                    fileErrorMsg(
                        'Success reading metadata from table "{}" '.format(
                            table),
                        self.filepath,
                    ))
            return column_metadata_list
        except:
            print(
                fileErrorMsg(
                    'Error: unable to read metadata for table "{}"'.format(
                        table),
                    self.filepath,
                ))
            raise
コード例 #7
0
    def __createConnection(self, metadata=None):
        if self.debug:
            print("Attempting to create new data connection: {}".format(
                self.filepath))
        error_msg = "Unable to create new data connection"
        try:
            if self.fileformat.filetype == "sqlite":
                if metadata is not None:
                    raise ValueError(
                        "metadata not currently supported for creating an empty sqlite file"
                    )
                connection = sqlite3.connect(self.filepath)
                connection.execute("select * from sqlite_master limit 1")
                self.connection = connection
            elif self.fileformat.filetype == "yxdb":
                if metadata is None:
                    raise ValueError(
                        "metadata is currently required for creating an empty yxdb file"
                    )
                # if given a dict, then convert it to list format
                if self.debug:
                    print("DataFile.__createConnection() -- input metadata:")
                    print(metadata)
                if isinstance(metadata, dict):
                    metadata_list = []
                    if self.debug:
                        print("---")
                        print("metadata: {}".format(metadata))
                    for key in metadata:
                        if self.debug:
                            print("key: {}".format(key))
                        metadata_d = {"name": key}
                        metadata_attr = metadata[key]
                        if self.debug:
                            print("metadata_attr: {}".format(metadata_attr))
                        for attr in metadata_attr:
                            metadata_d[attr] = metadata_attr[attr]
                        metadata_list.append(metadata_d)
                    print("---")
                    if self.debug:
                        print(
                            "DataFile.__createConnection() -- adjusted metadata:"
                        )
                        print(metadata_list)
                else:
                    metadata_list = metadata
                if not isinstance(metadata_list, list):
                    raise TypeError(
                        "AlteryxYXDB().create_from_dict() requires a list (ironically) of metadata, with each element being a dict representing a column"
                    )

                self.connection = pyxdb.AlteryxYXDB()
                self.connection.create_from_dict(self.filepath, metadata)
            else:
                self.__formatNotSupportedYet()

            if self.debug:
                print("Successfully created new data connection: {}".format(
                    self.filepath))

        except Exception as err:
            try:
                connection.close()
            except:
                pass
            print(err)
            print(fileErrorMsg(error_msg, self.filepath))
            raise