예제 #1
0
    def from_query(self, query, append=False):
        """
        Fill from query

        Fills the data frame with data from a table in the database.

        Args:
            query: The query used to retrieve the data. 
            append (bool): If a DataFrame already exists, should table be appended?
        """

        # -------------------------------------------
        # Send JSON command to getml engine

        cmd = dict()
        cmd["type_"] = "DataFrame.from_query"
        cmd["name_"] = self.name
        cmd["query_"] = query

        cmd["categoricals_"] = self.categorical_names
        cmd["discretes_"] = self.discrete_names
        cmd["join_keys_"] = self.join_key_names
        cmd["numericals_"] = self.numerical_names
        cmd["targets_"] = self.target_names
        cmd["time_stamps_"] = self.time_stamp_names

        cmd["append_"] = append
        
        comm.send(cmd)

        # -------------------------------------------

        return self
예제 #2
0
    def group_by(self, join_key, name, aggregations):
        """
        Creates new DataFrame by grouping over a join key.

        Args:
            join_key (str): Name of the join key to group by.
            name (str): Name of the new DataFrame.
            aggregations: List containing aggregations.

        Returns:    
            :class:`~getml.engine.DataFrame`
        """

        # ----------------------------------------------------------------------
        # Build command

        cmd = dict()
        cmd["name_"] = name
        cmd["type_"] = "DataFrame.group_by"

        cmd["join_key_name_"] = join_key
        cmd["df_name_"] = self.name
        cmd["aggregations_"] = [agg.thisptr for agg in aggregations]

        comm.send(cmd)

        # ----------------------------------------------------------------------
        # Create handle for new data frame.

        new_df = DataFrame(name)

        return new_df.refresh()
예제 #3
0
def connect_sqlite3(
        name=":memory:",
        time_formats=["%Y-%m-%dT%H:%M:%s%z", "%Y-%m-%d %H:%M:%S", "%Y-%m-%d"]):
    """
    Creates a new SQLite3 database connection.

    Args:
        name (str): Name of the sqlite3 file.  If the file does not exist, it
            will be created. Set to ":memory:" for a purely in-memory SQLite3
            database.
        time_formats (str, optional): The formats tried when parsing time stamps.
            Check out https://pocoproject.org/docs/Poco.DateTimeFormatter.html#9946 for the options.
    """

    # -------------------------------------------
    # Prepare command.

    cmd = dict()

    cmd["name_"] = os.path.abspath(name)
    cmd["type_"] = "Database.new"

    cmd["db_"] = "sqlite3"
    cmd["time_formats_"] = time_formats

    # -------------------------------------------
    # Send JSON command to engine.

    comm.send(cmd)
예제 #4
0
    def from_db(self, table_name, append=False):
        """
        Fill from Database

        The DataFrame will be filled from a table in the database.
        
        Args:
            table_name(str): Table from which we want to retrieve the data.
            append(bool): If a DataFrame already exists, should table be appended?
        """

        # -------------------------------------------
        # Send JSON command to getml engine

        cmd = dict()
        cmd["type_"] = "DataFrame.from_db"
        cmd["name_"] = self.name
        cmd["table_name_"] = table_name

        cmd["categoricals_"] = self.categorical_names
        cmd["discretes_"] = self.discrete_names
        cmd["join_keys_"] = self.join_key_names
        cmd["numericals_"] = self.numerical_names
        cmd["targets_"] = self.target_names
        cmd["time_stamps_"] = self.time_stamp_names

        cmd["append_"] = append
        
        comm.send(cmd)

        # -------------------------------------------

        return self
예제 #5
0
    def send(self):
        """
        Send this RelboostModel to the getml engine.
        """

        # -------------------------------------------
        # Send own JSON command to getML engine

        if self.params["population"] is None:
            raise Exception("Population cannot be None!")

        if self.params["peripheral"] is None:
            raise Exception("Peripheral cannot be None!")

        cmd = dict()
        cmd["name_"] = self.name
        cmd["type_"] = "RelboostModel"
        cmd["population_"] = self.params["population"].thisptr
        cmd["peripheral_"] = [
            per.thisptr["name_"] for per in self.params["peripheral"]
        ]
        cmd["hyperparameters_"] = self.__make_hyperparameters()

        comm.send(cmd)

        # -------------------------------------------

        return self
예제 #6
0
    def where(self, name, condition):
        """
        Creates a new DataFrame as a subselection of this one.

        Args: 
            name (str): Name of the new DataFrame.
            condition (bool): Boolean column indicating the rows you want to select.
        """

        # ----------------------------------------------------------------------
        # Build command

        cmd = dict()
        cmd["type_"] = "DataFrame.where"
        cmd["name_"] = self.name

        cmd["new_df_"] = name
        cmd["condition_"] = condition.thisptr

        comm.send(cmd)

        # ----------------------------------------------------------------------
        # Create handle for new data frame.

        new_df = DataFrame(name)

        return new_df.refresh()
예제 #7
0
    def set_unit(self, unit):
        """
        Sets the unit of the column.

        Args:
            unit: The new unit.
        """

        # -------------------------------------------
        # Build command string

        cmd = dict()

        cmd.update(self.thisptr)

        cmd["unit_"] = unit

        cmd["type_"] += ".set_unit"

        # -------------------------------------------
        # Send JSON command to engine

        comm.send(cmd)

        # -------------------------------------------
        # Store the new unit

        self.thisptr["unit_"] = unit
예제 #8
0
    def to_csv(self, fname, quotechar='"', sep=','):
        """
        Writes the data frame into a newly created CSV file.

        Args:
            fname (str): The name of the CSV file.
            quotechar (str): The character used to wrap strings.
            sep (str): The separator used for separating fields.
        """
        # ----------------------------------------------------------------------
        # Transform path
        
        fname_ = os.path.abspath(fname)
        
        # ----------------------------------------------------------------------
        # Build command

        cmd = dict()
        cmd["type_"] = "DataFrame.to_csv"
        cmd["name_"] = self.name

        cmd["fname_"] = fname_ 
        cmd["quotechar_"] = quotechar 
        cmd["sep_"] = sep 

        comm.send(cmd)
예제 #9
0
    def join(
        self,
        name,
        other,
        join_key,
        other_join_key=None,
        cols=None,
        other_cols=None,
        how="inner",
        where=None):
        """
        Create a new DataFrame by joining this DataFrame with another DataFrame.

        Args:
            name (str): The name of the new DataFrame.
            other (DataFrame): The other DataFrame.
            join_key (str): Name of the join key in this DataFrame.
            other_join_key (str, optional): Name of the join key in the other table
                (if not identical to join_key).
            cols (optional): List of columns from this DataFrame to be included.
                If left blank, all columns from this DataFrame will be included.
            other_cols (optional): List of columns from the other DataFrame to be included.
                If left blank, all columns from the other DataFrame will be included.
            how (str): Type of the join. Supports "left", "right" and "inner".
            where (bool): Boolean column that imposes WHERE conditions on the join.
        """

        # -------------------------------------------
        # Send JSON command to getml engine

        cmd = dict()
        cmd["type_"] = "DataFrame.join"
        cmd["name_"] = name

        cmd["df1_name_"] = self.name
        cmd["df2_name_"] = other.name

        cmd["join_key_used_"] = join_key
        cmd["other_join_key_used_"] = other_join_key or join_key

        cmd["cols1_"] = cols or []
        cmd["cols2_"] = other_cols or []

        cmd["cols1_"] = [c.thisptr for c in cmd["cols1_"]]
        cmd["cols2_"] = [c.thisptr for c in cmd["cols2_"]]

        cmd["how_"] = how

        if where is not None:
            cmd["where_"] = where.thisptr

        comm.send(cmd)

        # -------------------------------------------

        return DataFrame(name=name).refresh()
예제 #10
0
    def read_csv(
            self,
            fnames,
            append=True,
            quotechar='"',
            sep=',',
            time_formats=["%Y-%m-%dT%H:%M:%s%z", "%Y-%m-%d %H:%M:%S", "%Y-%m-%d"]):
        """
        Read CSV file

        It is assumed that the first line of each CSV file contains the column
        names.

        Args:
            fnames (List[str]): CSV file names to be read.
            append (bool): If a DataFrame already exists, should the file be appended?
            quotechar (str): The character used to wrap strings.
            sep (str): The separator used for separating fields.
            time_formats (str): The formats tried when parsing time stamps.
                Refer to https://pocoproject.org/docs/Poco.DateTimeFormatter.html#9946 for the options.
        """
        # -------------------------------------------
        # Transform paths
        
        fnames_ = [os.path.abspath(_) for _ in fnames]

        # -------------------------------------------
        # Send JSON command to getml engine

        cmd = dict()
        cmd["type_"] = "DataFrame.read_csv"
        cmd["name_"] = self.name

        cmd["fnames_"] = fnames_

        cmd["append_"] = append
        cmd["quotechar_"] = quotechar
        cmd["sep_"] = sep
        cmd["time_formats_"] = time_formats

        cmd["categoricals_"] = self.categorical_names
        cmd["discretes_"] = self.discrete_names
        cmd["join_keys_"] = self.join_key_names
        cmd["numericals_"] = self.numerical_names
        cmd["targets_"] = self.target_names
        cmd["time_stamps_"] = self.time_stamp_names

        comm.send(cmd)

        # -------------------------------------------

        return self
예제 #11
0
    def __save(self):
        """
        Saves the model as a JSON file.
        """

        # -------------------------------------------
        # Send JSON command to getML engine

        cmd = dict()
        cmd["type_"] = "RelboostModel.save"
        cmd["name_"] = self.name

        comm.send(cmd)
예제 #12
0
    def load(self):
        """
        Loads the DataFrame object from the engine.
        """

        # ----------------------------------------------------------------------

        cmd = dict()
        cmd["type_"] = "DataFrame.load"
        cmd["name_"] = self.name

        comm.send(cmd)

        # ----------------------------------------------------------------------

        return self.refresh()
예제 #13
0
    def __rm_col(self, name, role):

        # ------------------------------------------------------
        # Send command

        cmd = dict()
        cmd["type_"] = "DataFrame.remove_column"
        cmd["name_"] = name

        cmd["df_name_"] = self.name
        cmd["role_"] = role

        comm.send(cmd)

        # ------------------------------------------------------

        self.refresh()
예제 #14
0
    def load(self):
        """
        Loads the model from a JSON file.
        """

        # -------------------------------------------
        # Send JSON command to getML engine

        cmd = dict()
        cmd["type_"] = "RelboostModel.load"
        cmd["name_"] = self.name

        comm.send(cmd)

        # -------------------------------------------

        return self.refresh()
예제 #15
0
def delete_project(name):
    """
    Deletes the project.

    All data and models contained in the project directory will be lost.

    Args:
        name (str): Name of your project.

    Raises:
        ConnectionRefusedError: If unable to connect to engine
    """

    cmd = dict()
    cmd["type_"] = "delete_project"
    cmd["name_"] = name

    comm.send(cmd)
예제 #16
0
    def delete(self, mem_only=False):
        """
        Deletes the model from the engine.

        Args:
            mem_only (bool): If True, then the data frame will be deleted from
                memory only, but not from disk. Default: False.
        """

        # -------------------------------------------
        # Send JSON command to getML engine

        cmd = dict()
        cmd["type_"] = "RelboostModel.delete"
        cmd["name_"] = self.name
        cmd["mem_only_"] = mem_only

        comm.send(cmd)
예제 #17
0
    def delete(self, mem_only=False):
        """
        Deletes the data frame from the engine.

        Args:
            mem_only (bool): If True, the data frame will be deleted from
                memory only, but not from disk.
        """

        # -------------------------------------------
        # Send JSON command to getml engine

        cmd = dict()
        cmd["type_"] = "DataFrame.delete"
        cmd["name_"] = self.name
        cmd["mem_only_"] = mem_only

        comm.send(cmd)
예제 #18
0
    def to_db(self, table_name):
        """
        Writes the data frame into a newly created table in the database.

        Args:
            table_name (str): Name of the table to be created. 
                If a table of that name already exists, it will be replaced.
        """
        
        # ----------------------------------------------------------------------
        # Build command

        cmd = dict()
        cmd["type_"] = "DataFrame.to_db"
        cmd["name_"] = self.name

        cmd["table_name_"] = table_name 

        comm.send(cmd)
예제 #19
0
    def __add_column(self, col, name, role, unit):

        # ------------------------------------------------------
        # Send command

        cmd = dict()
        cmd["type_"] = "DataFrame.add_column"
        cmd["name_"] = name

        cmd["col_"] = col.thisptr
        cmd["df_name_"] = self.name
        cmd["role_"] = role
        cmd["unit_"] = unit

        comm.send(cmd)

        # ------------------------------------------------------

        self.refresh()
예제 #20
0
    def save(self):
        """Saves the DataFrame on the engine.
        
        To be saved on the engine, it already has to be present
        there. You can use the :meth:`~getml.engine.DataFrame.send`
        function to upload it to the engine.
                
        Returns:
            :class:`~getml.engine.DataFrame`:
                The current instance of the DataFrame class.
        """

        cmd = dict()
        cmd["type_"] = "DataFrame.save"
        cmd["name_"] = self.name

        comm.send(cmd)
        
        return self
예제 #21
0
def connect_postgres(
        pg_host,
        pg_hostaddr,
        pg_port,
        dbname,
        user,
        password,
        time_formats=["%Y-%m-%dT%H:%M:%s%z", "%Y-%m-%d %H:%M:%S", "%Y-%m-%d"]):
    """
    Creates a new PostgreSQL database connection.

    Args:
        pg_host (str): Host of the PostgreSQL database.
        pg_hostaddr (str): IP address of the PostgreSQL database.
        pg_port(int): Port of the PostgreSQL database.
        user (str): User name with which to log into the PostgreSQL database.
        password (str): Password with which to log into the PostgreSQL database.
        time_formats (str, optional): The formats tried when parsing time stamps.
            Check out https://pocoproject.org/docs/Poco.DateTimeFormatter.html#9946 for the options.
    """

    # -------------------------------------------
    # Prepare command.

    cmd = dict()

    cmd["name_"] = ""
    cmd["type_"] = "Database.new"
    cmd["db_"] = "postgres"

    cmd["host_"] = pg_host
    cmd["hostaddr_"] = pg_hostaddr
    cmd["port_"] = pg_port
    cmd["dbname_"] = dbname
    cmd["user_"] = user
    cmd["password_"] = password
    cmd["time_formats_"] = time_formats

    # -------------------------------------------
    # Send JSON command to engine.

    comm.send(cmd)
예제 #22
0
def drop_table(name):
    """
    Drops a table from the database.

    Args:
        name (str): The table to be dropped.
    """

    # -------------------------------------------
    # Prepare command.

    cmd = dict()

    cmd["name_"] = name
    cmd["type_"] = "Database.drop_table"

    # -------------------------------------------
    # Send JSON command to engine.

    comm.send(cmd)
예제 #23
0
    def copy(self, other):
        """
        Copies the parameters and hyperparameters from another model.

        Args:
            other (:class:`getml.models.RelboostModel`): The other model.
        """

        # -------------------------------------------
        # Send JSON command to getML engine

        cmd = dict()
        cmd["type_"] = "RelboostModel.copy"
        cmd["name_"] = self.name
        cmd["other_"] = other.name

        comm.send(cmd)

        # -------------------------------------------

        self.refresh()
예제 #24
0
def set_project(name):
    """
    Select a project.

    All data frames and models will be stored in the corresponding project
    directory. If a project of that name does not already exist, a new one will
    be created.

    Args:
        name (str): Name of your project.

    Raises:
        ConnectionRefusedError: If unable to connect to engine
    """
    if not is_alive():
        err_msg = "Cannot connect to getML engine. Make sure the engine is running and you are logged in."
        raise ConnectionRefusedError(err_msg)

    cmd = dict()
    cmd["type_"] = "set_project"
    cmd["name_"] = name

    comm.send(cmd)
예제 #25
0
def read_csv(name, fnames, header=True, quotechar='"', sep=',', skip=0):
    """
    Reads a CSV file into the database.

    Args:
        name (str): Name of the table in which the data is to be inserted.
        fnames (List[str]): The list of CSV file names to be read.
        header (bool, optional): Whether the CSV file contains a header with the column names. Default to True.
        quotechar (str, optional): The character used to wrap strings. Default:`"`
        sep (str, optional): The separator used for separating fields. Default:`,`
        skip (int, optional): Number of lines to skip at the beginning of each
            file (Default: 0). If *header* is True, the lines will be skipped
            before the header.
    """
    # -------------------------------------------
    # Transform paths
    fnames_ = [os.path.abspath(_) for _ in fnames]

    # -------------------------------------------
    # Prepare command.

    cmd = dict()

    cmd["name_"] = name
    cmd["type_"] = "Database.read_csv"

    cmd["fnames_"] = fnames_
    cmd["header_"] = header
    cmd["quotechar_"] = quotechar
    cmd["sep_"] = sep
    cmd["skip_"] = skip

    # -------------------------------------------
    # Send JSON command to engine.

    comm.send(cmd)