def __fit(self, peripheral_data_frames, population_data_frame, s): # ----------------------------------------------------- # Send the complete fit command. cmd = dict() cmd["type_"] = "RelboostModel.fit" cmd["name_"] = self.name cmd["peripheral_names_"] = [df.name for df in peripheral_data_frames] cmd["population_name_"] = population_data_frame.name comm.send_string(s, json.dumps(cmd)) # ----------------------------------------------------- # Do the actual fitting begin = time.time() print("Loaded data. Features are now being trained...") msg = comm.recv_string(s) end = time.time() # ---------------------------------------------------------------------- # Print final message if "Trained" in msg: print(msg) self.__print_time_taken(begin, end, "Time taken: ") else: raise Exception(msg)
def is_alive(): """ Checks if engine is running. Returns: bool: True if the engine is running and accepting commands, False otherwise """ ## --------------------------------------------------------------- cmd = dict() cmd["type_"] = "is_alive" cmd["name_"] = "" s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) try: s.connect((getml.host, getml.port)) except ConnectionRefusedError: return False comm.send_string(s, json.dumps(cmd)) s.close() return True
def execute(query): """ Executes an SQL query on the database. Args: query (str): The SQL query to be executed. """ # ------------------------------------------- # Prepare command. cmd = dict() cmd["name_"] = "" cmd["type_"] = "Database.execute" # ------------------------------------------- # Send JSON command to engine. s = comm.send_and_receive_socket(cmd) # ------------------------------------------- # Send the actual query. comm.send_string(s, query) # ------------------------------------------- # Make sure that everything went well. msg = comm.recv_string(s) s.close() if msg != "Success!": raise Exception(msg)
def __close(self, s): cmd = dict() cmd["type_"] = "RelboostModel.close" cmd["name_"] = self.name comm.send_string(s, json.dumps(cmd)) msg = comm.recv_string(s) if msg != "Success!": raise Exception(msg)
def from_json(self, json_str, append=False, time_formats=["%Y-%m-%dT%H:%M:%s%z", "%Y-%m-%d %H:%M:%S", "%Y-%m-%d"]): """ Fill from JSON Fills the data frame with data from a JSON string. Args: json_str (str): The JSON string containing the data. append (bool): If a DataFrame already exists, should json_str be appended? time_formats (str): The formats tried when parsing time stamps. Refer to https://pocoproject.org/docs/Poco.DateTimeFormatter.html#9946 for the options. """ # ------------------------------------------- # Send JSON command to getml engine cmd = dict() cmd["type_"] = "DataFrame.from_json" cmd["name_"] = self.name cmd["categoricals_"] = self.categorical_names cmd["discretes_"] = self.discrete_names cmd["join_keys_"] = self.join_key_names cmd["numericals_"] = self.numerical_names cmd["targets_"] = self.target_names cmd["time_stamps_"] = self.time_stamp_names cmd["append_"] = append cmd["time_formats_"] = time_formats s = comm.send_and_receive_socket(cmd) # ------------------------------------------- # Send the JSON string comm.send_string(s, json_str) # ------------------------------------------- # Make sure everything went well and close # connection msg = comm.recv_string(s) s.close() if msg != "Success!": raise Exception(msg) # ------------------------------------------- return self
def send(self, data_frame, sock = None): """Send data to the getml engine. If sock is None, it will call a function to create a new socket, use it for the data transfer and close it afterwards. If, instead, a socket is provided, it just sends all the data but does not close it. Args: data_frame (pandas.DataFrame): Data Frame that you want to be appended to the existing data. sock (optional): Socket connecting the Python API with the getML engine. """ # ------------------------------------------------------ if data_frame is not None: self.__check_plausibility(data_frame) # ------------------------------------------------------ # Send data frame itself cmd = dict() cmd["type_"] = "DataFrame" cmd["name_"] = self.name if sock is None: s = comm.send_and_receive_socket(cmd) else: s = sock comm.send_string(s, json.dumps(cmd)) msg = comm.recv_string(s) if msg != "Success!": raise Exception(msg) # ------------------------------------------------------ # Send individual columns to getml engine self.__send_data(data_frame, s) # ------------------------------------------------------ self.__close(s) if sock is None: s.close() return self
def __get_categorical(self, sock=None): """ Transform column to numpy array Args: sock: Socket connecting the Python API with the getML engine. """ # ------------------------------------------- # Build command string cmd = dict() cmd["name_"] = self.thisptr["df_name_"] cmd["type_"] = "CategoricalColumn.get" cmd["col_"] = self.thisptr # ------------------------------------------- # Send command to engine if sock is None: s = comm.send_and_receive_socket(cmd) else: s = sock comm.send_string(s, json.dumps(cmd)) msg = comm.recv_string(s) # ------------------------------------------- # Make sure everything went well, receive data # and close connection if msg != "Found!": s.close() raise Exception(msg) mat = comm.recv_categorical_matrix(s) # ------------------------------------------- # Close connection. if sock is None: s.close() # ------------------------------------------- return mat.ravel()
def append(self, data_frame, sock=None): """Appends data to tables that already exist on the getml engine. If sock is None, it will call a function to create a new socket, use it for the data transfer and close it afterwards. If, instead, a socket is provided, it just sends all the data but does not close it. Args: data_frame (pandas.DataFrame): Table that you want to be appended to the existing data. sock (optional): Socket connecting the Python API with the getML engine. """ # ------------------------------------------------------ self.__check_plausibility(data_frame) # ------------------------------------------------------ # Create connection. cmd = dict() cmd["type_"] = "DataFrame.append" cmd["name_"] = self.name if sock is None: s = comm.send_and_receive_socket(cmd) else: s = sock comm.send_string(s, json.dumps(cmd)) # ------------------------------------------------------ # Send individual matrices to getml engine self.__send_data(data_frame, s) # ------------------------------------------------------ self.__close(s) if sock is None: s.close() return self
def __transform(self, peripheral_data_frames, population_data_frame, s, score=False, predict=False, table_name=""): # ----------------------------------------------------- # Prepare the command for the getML engine cmd = dict() cmd["type_"] = "RelboostModel.transform" cmd["name_"] = self.name cmd["score_"] = score cmd["predict_"] = predict cmd["peripheral_names_"] = [df.name for df in peripheral_data_frames] cmd["population_name_"] = population_data_frame.name cmd["table_name_"] = table_name comm.send_string(s, json.dumps(cmd)) # ----------------------------------------------------- # Do the actual transformation msg = comm.recv_string(s) if msg == "Success!": if table_name == "": yhat = comm.recv_matrix(s) else: yhat = None else: raise Exception(msg) # ----------------------------------------------------- return yhat
def send(self, numpy_array, s): """ Sends the object to the engine, data taken from a numpy array. Args: numpy_array (:class:`numpy.ndarray`): Number of columns should match the number of columns of the object itself. s: Socket """ # ------------------------------------------- # Send own JSON command to getml engine comm.send_string(s, json.dumps(self.thisptr)) # ------------------------------------------- # Send data to getml engine if self.thisptr["type_"] == "CategoricalColumn": comm.send_categorical_matrix(s, numpy_array) elif self.thisptr["type_"] == "Column": comm.send_matrix(s, numpy_array) # ------------------------------------------- # Make sure everything went well msg = comm.recv_string(s) if msg != "Success!": raise Exception(msg) # ------------------------------------------- if len(numpy_array.shape) > 1: self.colnames = self.colnames or [ "column_" + str(i + 1) for i in range(numpy_array.shape[1]) ]
def __get(self, sock=None): """ Transform column to numpy array Args: sock: Socket connecting the Python API with the getML engine. """ # ------------------------------------------- # Build command string cmd = dict() cmd["name_"] = self.thisptr["df_name_"] cmd["type_"] = "Column.get" cmd["col_"] = self.thisptr # ------------------------------------------- # Establish communication with getml engine if sock is None: s = comm.send_and_receive_socket(cmd) else: s = sock comm.send_string(s, json.dumps(cmd)) msg = comm.recv_string(s) # ------------------------------------------- # Make sure everything went well, receive data # and close connection if msg != "Found!": s.close() raise Exception(msg) mat = comm.recv_matrix(s) # ------------------------------------------- # Close connection. if sock is None: s.close() # ------------------------------------------- # If this is a time stamp, then transform to # pd.Timestamp. if self.thisptr["type_"] == "Column": if self.thisptr[ "role_"] == "time_stamp" or "time stamp" in self.thisptr[ "unit_"]: shape = mat.shape mat = [pd.Timestamp(ts_input=ts, unit="D") for ts in mat.ravel()] mat = np.asarray(mat) mat.reshape(shape[0], shape[1]) # ------------------------------------------- return mat.ravel()