def create_w2v_embed(df): ''' We will generate feature vectors using Frovedis Word2Vec for review text. ''' os.environ["VE_OMP_NUM_THREADS"] = '8' FrovedisServer.initialize("mpirun -np 1 " + os.environ["FROVEDIS_SERVER"]) frovedis_w2v = Frovedis_Word2Vec(sentences=list(df["Review"]), hiddenSize=512, minCount=2, n_iter=100) x_emb = frovedis_w2v.transform(list(df["Review"]), func=np.mean) os.environ["VE_OMP_NUM_THREADS"] = '1' FrovedisServer.shut_down() return pd.DataFrame(x_emb)
def run_frovedis(params, X, nproc): from frovedis.exrpc.server import FrovedisServer from frovedis.matrix.wrapper import ARPACK FrovedisServer.initialize("mpirun -np {nproc} {server}".format( nproc=nproc, server=os.environ['FROVEDIS_SERVER'])) start = time.time() clf = ARPACK.computeSVD(X, params["n_components"]) end = time.time() clf.release() FrovedisServer.shut_down() return end - start
def forecast(self, steps=1, exog=None, alpha=0.05): """ DESC: Perform out-of-sample forecasting PARAMS: steps -> TYPE: int, DEFAULT: 1, it specifies the number of out-of-sample values to be predicted exog -> DEFAULT: None, (Unused) alpha -> DEFAULT: 0.05, (Unused) RETURNS: TYPE: ndarray of shape (n_samples,), returns out of sample forecasted values """ if exog is not None: raise ValueError("Currently, exog is not supported by forecast()!") if alpha != 0.05: raise ValueError("Currently, alpha is not supported by forecast()!") if steps is None: steps = 1 elif steps <= 0: raise ValueError("Prediction must have `end` after `start`!") (host, port) = FrovedisServer.getServerInstance() fcast = rpclib.arima_forecast(host, port, steps, self.__mid, \ self.__mdtype) excpt = rpclib.check_server_exception() if excpt["status"]: raise RuntimeError(excpt["info"]) return np.asarray(fcast, dtype = np.float64)
def load_numpy_matrix(self, mat, dtype=None): """load_numpy_matrix""" self.release() if dtype is None: dtype = self.__dtype else: self.__dtype = dtype if self.__dtype is None: self.__dtype = mat.dtype else: mat = np.asmatrix(mat, self.__dtype) m_data = mat.A1 (nrow, ncol) = mat.shape (host, port) = FrovedisServer.getServerInstance() data_type = self.get_dtype() if data_type == DTYPE.DOUBLE: dmat = rpclib.create_frovedis_double_dense_matrix( host, port, nrow, ncol, m_data, self.__mtype.encode('ascii')) elif data_type == DTYPE.FLOAT: dmat = rpclib.create_frovedis_float_dense_matrix( host, port, nrow, ncol, m_data, self.__mtype.encode('ascii')) elif data_type == DTYPE.LONG: dmat = rpclib.create_frovedis_long_dense_matrix( host, port, nrow, ncol, m_data, self.__mtype.encode('ascii')) elif data_type == DTYPE.INT: dmat = rpclib.create_frovedis_int_dense_matrix( host, port, nrow, ncol, m_data, self.__mtype.encode('ascii')) else: raise TypeError("Unsupported input type: " + self.__dtype) excpt = rpclib.check_server_exception() if excpt["status"]: raise RuntimeError(excpt["info"]) return self.load_dummy(dmat)
def run_frovedis(params, X_train, y_train, X_test, y_test, nproc): from frovedis.exrpc.server import FrovedisServer from frovedis.mllib.linear_model import LogisticRegression FrovedisServer.initialize("mpirun -np {nproc} {server}".format( nproc=nproc, server=os.environ['FROVEDIS_SERVER'])) start = time.time() clf = LogisticRegression(**params).fit(X_train, y_train) end = time.time() y_pred = clf.predict(X_test) score = 1.0 * sum(y_test == y_pred) / len(y_test) clf.release() FrovedisServer.shut_down() return score, end - start
def generate_rules(self, confidence=0.8): """ NAME: generate_rules """ midr = ModelID.get() (host, port) = FrovedisServer.getServerInstance() rpclib.fpgrowth_fpr(host, port, self.__mid, midr, confidence) return Fp_rules(midr)
def debug_print(self): """debug_print""" (host, port) = FrovedisServer.getServerInstance() rpclib.show_frovedis_dense_matrix(host, port, self.get(), self.__mtype.encode('ascii'), self.get_dtype()) excpt = rpclib.check_server_exception() if excpt["status"]: raise RuntimeError(excpt["info"])
def run_frovedis(params, X, nproc): from frovedis.exrpc.server import FrovedisServer from frovedis.mllib.cluster import KMeans FrovedisServer.initialize( "mpirun -np {nproc} {server}".format( nproc=nproc, server=os.environ['FROVEDIS_SERVER'] ) ) start = time.time() clf = KMeans(**params).fit(X) end = time.time() clf.release() FrovedisServer.shut_down() return end - start
def save_binary(self, fname): """save_binary""" (host, port) = FrovedisServer.getServerInstance() rpclib.save_frovedis_dense_matrix(host, port, self.get(), fname.encode('ascii'), True, self.__mtype.encode('ascii'), self.get_dtype()) excpt = rpclib.check_server_exception() if excpt["status"]: raise RuntimeError(excpt["info"])
def __release_server_heap(self): """ to release model pointer from server heap """ (host, port) = FrovedisServer.getServerInstance() rpclib.release_frovedis_dense_matrix(host, port, self.get(), self.__mtype.encode('ascii'), self.get_dtype()) excpt = rpclib.check_server_exception() if excpt["status"]: raise RuntimeError(excpt["info"])
def generate_rules(self, confidence=0.8): """ NAME: generate_rules """ midr = ModelID.get() (host, port) = FrovedisServer.getServerInstance() rpclib.fpgrowth_fpr(host, port, self.__mid, midr, confidence) excpt = rpclib.check_server_exception() if excpt["status"]: raise RuntimeError(excpt["info"]) return Fp_rules(midr)
def transpose(self): """transpose""" (host, port) = FrovedisServer.getServerInstance() dmat = rpclib.transpose_frovedis_dense_matrix( host, port, self.get(), self.__mtype.encode('ascii'), self.get_dtype()) excpt = rpclib.check_server_exception() if excpt["status"]: raise RuntimeError(excpt["info"]) return FrovedisDenseMatrix(mtype=self.__mtype, mat=dmat, dtype=self.__dtype)
def inv(self): # returns inverse of self """inv""" ret = FrovedisBlockcyclicMatrix(mat=self) # ret = cls (host, port) = FrovedisServer.getServerInstance() mat_rf = GetrfResult( rpclib.pgetrf(host, port, ret.get(), ret.get_dtype())) rpclib.pgetri(host, port, ret.get(), mat_rf.ipiv(), ret.get_dtype()) # ret = inv(ret) excpt = rpclib.check_server_exception() if excpt["status"]: raise RuntimeError(excpt["info"]) return ret
def __mul__(self, mat): # returns (cls * mat) tmp = FrovedisBlockcyclicMatrix.asBCM(mat) if (self.get_dtype() != tmp.get_dtype()): raise TypeError("mul: input matrix types are not same!") (host, port) = FrovedisServer.getServerInstance() dmat = rpclib.pgemm(host, port, self.get(), tmp.get(), False, False, 1.0, 0.0, self.get_dtype()) excpt = rpclib.check_server_exception() if excpt["status"]: raise RuntimeError(excpt["info"]) data_type = TypeUtil.to_numpy_dtype(self.get_dtype()) return FrovedisBlockcyclicMatrix(mat=dmat, dtype=data_type)
def to_frovedis_rowmatrix(self): """to_frovedis_rowmatrix""" if self.__mtype == 'R': return self (host, port) = FrovedisServer.getServerInstance() dmat = rpclib.get_frovedis_rowmatrix(host, port, self.get(), self.numRows(), self.numCols(), self.__mtype.encode('ascii'), self.get_dtype()) excpt = rpclib.check_server_exception() if excpt["status"]: raise RuntimeError(excpt["info"]) return FrovedisDenseMatrix(mtype='R', mat=dmat, dtype=self.__dtype)
def load(self, fname): """ NAME: load """ self.release() self.__mid = ModelID.get() (host, port) = FrovedisServer.getServerInstance() rpclib.load_frovedis_model(host, port, self.__mid, self.__mkind, DTYPE.DOUBLE, fname.encode('ascii')) excpt = rpclib.check_server_exception() if excpt["status"]: raise RuntimeError(excpt["info"]) return self
def __sub__(self, mat): # returns (cls - mat) # tmp = cls - tmp(=mat) tmp = FrovedisBlockcyclicMatrix(mat=mat) # copy if (self.get_dtype() != tmp.get_dtype()): raise TypeError("sub: input matrix types are not same!") # geadd performs B = al*A + be*B, thus tmp = B and tmp = A - tmp (host, port) = FrovedisServer.getServerInstance() rpclib.pgeadd(host, port, self.get(), tmp.get(), False, 1.0, -1.0, self.get_dtype()) excpt = rpclib.check_server_exception() if excpt["status"]: raise RuntimeError(excpt["info"]) return tmp
def release(self): """release""" if self.__fdata is not None: (host, port) = FrovedisServer.getServerInstance() rpclib.release_frovedis_dense_matrix(host, port, self.get(), self.__mtype.encode('ascii'), self.get_dtype()) excpt = rpclib.check_server_exception() if excpt["status"]: raise RuntimeError(excpt["info"]) self.__fdata = None self.__num_row = 0 self.__num_col = 0
def fittedvalues(self): """ DESC: fittedvalues getter RETURNS: TYPE: ndarray of shape (n_samples,), returns the fitted values of the model """ if self._fittedvalues is None: (host, port) = FrovedisServer.getServerInstance() ret = rpclib.get_fitted_vector(host, port, self.__mid, \ self.__mkind, self.__mdtype) excpt = rpclib.check_server_exception() if excpt["status"]: raise RuntimeError(excpt["info"]) self._fittedvalues = np.asarray(ret, dtype=np.float64) return self._fittedvalues
def copy(self, mat): # cls = mat """copy""" self.release() if self.__dtype is None: self.__dtype = mat.__dtype if mat.__mtype != self.__mtype or mat.__dtype != self.__dtype: raise TypeError("Incompatible types for copy operation") if mat.__fdata is not None: (host, port) = FrovedisServer.getServerInstance() dmat = rpclib.copy_frovedis_dense_matrix( host, port, mat.get(), mat.__mtype.encode('ascii'), mat.get_dtype()) excpt = rpclib.check_server_exception() if excpt["status"]: raise RuntimeError(excpt["info"]) return self.load_dummy(dmat)
def load_binary(self, fname, dtype=None): """load_binary""" self.release() if dtype is None: dtype = self.__dtype else: self.__dtype = dtype (host, port) = FrovedisServer.getServerInstance() if self.__dtype is None: self.__dtype = np.float32 # default type: float dmat = rpclib.load_frovedis_dense_matrix(host, port, fname.encode("ascii"), True, self.__mtype.encode('ascii'), self.get_dtype()) excpt = rpclib.check_server_exception() if excpt["status"]: raise RuntimeError(excpt["info"]) return self.load_dummy(dmat)
def fit(self, data): """ NAME: fit """ if self.minSupport < 0: raise ValueError("Negative minsupport factor!") self.release() self.__mid = ModelID.get() f_df = self.__convert_to_df(data) (host, port) = FrovedisServer.getServerInstance() rpclib.fpgrowth_trainer(host, port, f_df.get(), self.__mid, \ self.minSupport, self.verbose) excpt = rpclib.check_server_exception() if excpt["status"]: raise RuntimeError(excpt["info"]) return self
def predict(self, start=None, end=None, dynamic=False, **kwargs): """ DESC: Perform in-sample prediction and out-of-sample forecasting PARAMS: start -> TYPE: int, DEFAULT: None, it specifies the staring index after which the values are to be predicted stop -> TYPE: int, DEFAULT: None, it specifies the index till which the values are to be predicted dynamic -> DEFAULT: False, (Unused) **kwargs -> (Unused) NOTE: In case start or stop are negative then predicted values are returned from fitted values as long as indexes are accessible(in range). This is in sync with statsmodel behaviour. RETURNS: TYPE: ndarray of shape (n_samples,), it returns the predicted values """ if start is None: start = 0 elif start < 0: if self._endog_len >= abs(start): start = self._endog_len + start else: raise KeyError("The `start` argument could not be matched " + \ "to a location related to the index of " + \ "the data.") if end is None: end = self._endog_len - 1 elif end < 0: if self._endog_len >= abs(end): end = self._endog_len + end else: raise KeyError("The `end` argument could not be matched to " + \ "a location related to the index of the data.") if end < start: raise ValueError("Prediction must have `end` after `start`!") if dynamic: raise ValueError("Currently, ARIMA.predict() does not support " + \ "dynamic = True!") (host, port) = FrovedisServer.getServerInstance() arima_pred = rpclib.arima_predict(host, port, start, end, \ self.__mid, self.__mdtype) excpt = rpclib.check_server_exception() if excpt["status"]: raise RuntimeError(excpt["info"]) return np.asarray(arima_pred, dtype = np.float64)
import os import numpy as np from frovedis.exrpc.server import FrovedisServer from frovedis.matrix.dense import FrovedisBlockcyclicMatrix from frovedis.matrix.wrapper import PBLAS FrovedisServer.initialize("mpirun -np 2 {}".format(os.environ['FROVEDIS_SERVER'])) # numpy matrices creation x = np.matrix([[1],[2],[3],[4]], dtype=np.float64) # 4x1 y = np.matrix([[5],[6],[7],[8]], dtype=np.float64) # 4x1 m = np.matrix([[1,0,0,0],[0,1,0,0],[0,0,1,0],[0,0,0,1]], dtype=np.float64) # 4x4: eye(I) n = np.matrix([[1,2,3,4],[5,6,7,8],[8,7,6,5],[4,3,2,1]], dtype=np.float64) # 4x4 # Creating Frovedis server side blockcyclic matrics from numpy matrices bcx = FrovedisBlockcyclicMatrix(x) # blockcyclic vector (x) bcy = FrovedisBlockcyclicMatrix(y) # blockcyclic vector (y) bcm = FrovedisBlockcyclicMatrix(m) # blockcyclic matrix (m) bcn = FrovedisBlockcyclicMatrix(n) # blockcyclic matrix (n) # --- print original data print ("x:") print (x) print ("y:") print (y) print ("m:") print (m) print ("m:") print (n)
estimator_name.append(estimator_nm) start_time = time.time() estimator.fit(x_train, y_train) train_time.append(round(time.time() - start_time, 4)) start_time = time.time() train_score.append(estimator.score(x_train, y_train)) test_score.append(estimator.score(x_test, y_test)) test_time.append(round(time.time() - start_time, 4)) #3.1 LinearRegression TARGET = "lnr" FrovedisServer.initialize("mpirun -np 8 " + os.environ["FROVEDIS_SERVER"]) f_est = fLNR() E_NM = TARGET + "_frovedis_" + frovedis.__version__ evaluate(f_est, E_NM, x_train, y_train, x_test, y_test) f_est.release() FrovedisServer.shut_down() s_est = sLNR() E_NM = TARGET + "_sklearn_" + sklearn.__version__ evaluate(s_est, E_NM, x_train, y_train, x_test, y_test) #3.2 SGDRegressor TARGET = "sgd" FrovedisServer.initialize("mpirun -np 8 " + os.environ["FROVEDIS_SERVER"]) f_est = fSGDReg(loss="squared_loss", penalty="l2", eta0=0.00001)
def __del__(self): """ NAME: __del__ """ if FrovedisServer.isUP(): self.release()
# Test covariances shape import sys import numpy as np from frovedis.exrpc.server import FrovedisServer from frovedis.matrix.dense import FrovedisRowmajorMatrix from frovedis.mllib.gmm import GaussianMixture # initializing the Frovedis server argvs = sys.argv argc = len(argvs) if (argc < 2): print( 'Please give frovedis_server calling command as the first argument \n(e.g. "mpirun -np 2 /opt/nec/frovedis/ve/bin/frovedis_server")' ) quit() FrovedisServer.initialize(argvs[1]) train_mat = np.loadtxt("./input/gmm_data.txt") # creating spectral agglomerative object n_components = 2 try: gmm_model = GaussianMixture(n_components=n_components) # fitting the training matrix on gaussian mixture object gmm_model.fit(train_mat) cov = gmm_model.covariances_ except Exception as e: print("status=Exception: " + str(e)) sys.exit(1)
def __to_numpy_data_inplace(self, data, is_ndarray=True): """ non-retuning function to overwrite input numpy matrix/ndarray with converted matrix. self.size needs to be matched with data.size self.__dtype needs to be matched with data.dtype """ if self.__fdata is not None: data = np.asmatrix(data) # doesn't copy. it is needed #to get flattened array A1 if data.size != self.size: raise ValueError(\ "input matrix/ndarray size is different than self size!") (host, port) = FrovedisServer.getServerInstance() arr = data.A1 #getting the flatten array from numpy matrix data_size = self.size inp_type = self.get_dtype() out_type = TypeUtil.to_id_dtype(data.dtype) # rpc functions overwrite the arr data in C-level if inp_type == DTYPE.DOUBLE and out_type == DTYPE.INT: rpclib.get_double_rowmajor_array_as_int_array( host, port, self.get(), self.__mtype.encode('ascii'), arr, data_size) elif inp_type == DTYPE.DOUBLE and out_type == DTYPE.LONG: rpclib.get_double_rowmajor_array_as_long_array( host, port, self.get(), self.__mtype.encode('ascii'), arr, data_size) elif inp_type == DTYPE.DOUBLE and out_type == DTYPE.FLOAT: rpclib.get_double_rowmajor_array_as_float_array( host, port, self.get(), self.__mtype.encode('ascii'), arr, data_size) elif inp_type == DTYPE.DOUBLE and out_type == DTYPE.DOUBLE: rpclib.get_double_rowmajor_array_as_double_array( host, port, self.get(), self.__mtype.encode('ascii'), arr, data_size) elif inp_type == DTYPE.FLOAT and out_type == DTYPE.INT: rpclib.get_float_rowmajor_array_as_int_array( host, port, self.get(), self.__mtype.encode('ascii'), arr, data_size) elif inp_type == DTYPE.FLOAT and out_type == DTYPE.LONG: rpclib.get_float_rowmajor_array_as_long_array( host, port, self.get(), self.__mtype.encode('ascii'), arr, data_size) elif inp_type == DTYPE.FLOAT and out_type == DTYPE.FLOAT: rpclib.get_float_rowmajor_array_as_float_array( host, port, self.get(), self.__mtype.encode('ascii'), arr, data_size) elif inp_type == DTYPE.FLOAT and out_type == DTYPE.DOUBLE: rpclib.get_float_rowmajor_array_as_double_array( host, port, self.get(), self.__mtype.encode('ascii'), arr, data_size) elif inp_type == DTYPE.LONG and out_type == DTYPE.INT: rpclib.get_long_rowmajor_array_as_int_array( host, port, self.get(), self.__mtype.encode('ascii'), arr, data_size) elif inp_type == DTYPE.LONG and out_type == DTYPE.LONG: rpclib.get_long_rowmajor_array_as_long_array( host, port, self.get(), self.__mtype.encode('ascii'), arr, data_size) elif inp_type == DTYPE.LONG and out_type == DTYPE.FLOAT: rpclib.get_long_rowmajor_array_as_float_array( host, port, self.get(), self.__mtype.encode('ascii'), arr, data_size) elif inp_type == DTYPE.LONG and out_type == DTYPE.DOUBLE: rpclib.get_long_rowmajor_array_as_double_array( host, port, self.get(), self.__mtype.encode('ascii'), arr, data_size) elif inp_type == DTYPE.INT and out_type == DTYPE.INT: rpclib.get_int_rowmajor_array_as_int_array( host, port, self.get(), self.__mtype.encode('ascii'), arr, data_size) elif inp_type == DTYPE.INT and out_type == DTYPE.LONG: rpclib.get_int_rowmajor_array_as_long_array( host, port, self.get(), self.__mtype.encode('ascii'), arr, data_size) elif inp_type == DTYPE.INT and out_type == DTYPE.FLOAT: rpclib.get_int_rowmajor_array_as_float_array( host, port, self.get(), self.__mtype.encode('ascii'), arr, data_size) elif inp_type == DTYPE.INT and out_type == DTYPE.DOUBLE: rpclib.get_int_rowmajor_array_as_double_array( host, port, self.get(), self.__mtype.encode('ascii'), arr, data_size) else: raise TypeError("to_numpy_matrix/array: \ Supported dtypes are int/long/float/double only!") excpt = rpclib.check_server_exception() if excpt["status"]: raise RuntimeError(excpt["info"]) if is_ndarray: data = np.asarray(data)
import sys import numpy as np from frovedis.exrpc.server import FrovedisServer from frovedis.linalg import eigsh desc = "Testing eigsh() for int32 numpy array: " # initializing the Frovedis server argvs = sys.argv argc = len(argvs) if argc < 2: print( 'Please give frovedis_server calling command as the first argument \n' '(e.g. "mpirun -np 2 /opt/nec/frovedis/ve/bin/frovedis_server")') quit() FrovedisServer.initialize(argvs[1]) # sample numpy array square symmetric dense data (6x6) mat = np.asarray( [[2, -1, 0, 0, -1, 0], [-1, 3, -1, 0, -1, 0], [0, -1, 2, -1, 0, 0], [0, 0, -1, 3, -1, -1], [-1, -1, 0, -1, 3, 0], [0, 0, 0, -1, 0, 1]], dtype=np.int32) try: eigen_vals, eigen_vecs = eigsh(mat, k=3) print(desc, "Passed") except: print(desc, "Failed") FrovedisServer.shut_down()
def fit(self): """ DESC: Fit (estimate) the parameters of the model RETURNS: TYPE: list of shape (n_samples,), returns the fitted values of the model """ self.reset_metadata() if len(self.order) != 3: raise ValueError("`order` argument must be an iterable with " + \ "three elements.") if self.ar_lag < 1: raise ValueError("Terms in the AR order cannot be less than 1.") if self.diff_order < 0: raise ValueError("Cannot specify negative differencing.") if self.ma_lag < 0: raise ValueError("Terms in the MA order cannot be negative.") if self.seasonal is None: self.seasonal = 0 elif self.seasonal < 0: raise ValueError("The seasonal differencing interval cannot " + \ "be negative, given: " + str(self.seasonal)) if self.auto_arima is True and self.ma_lag <= 0: raise ValueError("Currently, auto_arima cannot start with a " + \ "moving average component having value less " + \ "than 1!") if self.solver == 'sag': self.solver = 'sgd' elif self.solver not in ['lapack', 'lbfgs', 'scalapack']: raise ValueError("Unknown solver: " + self.solver + " for time " + \ "series analysis!") if isinstance(self.endog, FrovedisDvector): self.__mdtype = self.endog.get_dtype() inp_data = self.endog.to_numpy_array() shape = np.shape(inp_data) if np.shape(inp_data)[0] < (self.ar_lag + self.diff_order + \ self.ma_lag + self.seasonal + 2): raise ValueError("Number of samples in input is too less " + \ "for time series analysis!") self._endog_len = shape[0] elif isinstance(self.endog, (FrovedisCRSMatrix, FrovedisDenseMatrix, FrovedisRowmajorMatrix, FrovedisColmajorMatrix)): raise TypeError("endog can only be FrovedisDvector, " + \ "not {}".format(self.endog)) else: shape = np.shape(self.endog) if len(shape) == 1 or (len(shape) == 2 and shape[1] == 1): if np.shape(self.endog)[0] < (self.ar_lag + self.diff_order + \ self.ma_lag + self.seasonal + 2): raise ValueError("Number of samples in input is too " + \ "less for time series analysis!") self.endog = np.ravel(self.endog) self._endog_len = shape[0] self.endog = FrovedisDvector().as_dvec(self.endog) self.__mdtype = self.endog.get_dtype() else: raise ValueError("Frovedis ARIMA models require univariate " + \ "`endog`. Got shape {0}".format(shape)) self.__mkind = M_KIND.ARM (host, port) = FrovedisServer.getServerInstance() rpclib.arima_fit(host, port, self.endog.get(), self.ar_lag, \ self.diff_order, self.ma_lag, self.seasonal, \ self.auto_arima, str_encode(self.solver), \ self.verbose, self.__mid, self.__mdtype) excpt = rpclib.check_server_exception() if excpt["status"]: raise RuntimeError(excpt["info"]) self.isfitted = True return self