def transform(self, X: dt.Frame): XX = X[:, self.tgc].to_pandas() XX.rename(columns={self.time_column: "ds"}, inplace=True) tgc_wo_time = list(np.setdiff1d(self.tgc, self.time_column)) if len(tgc_wo_time) > 0: XX_grp = XX.groupby(tgc_wo_time) else: XX_grp = [([None], XX)] preds = [] for key, X in XX_grp: key = key if isinstance(key, list) else [key] grp_hash = '_'.join(map(str, key)) print("prophet - transforming data of shape: %s for group: %s" % (str(X.shape), grp_hash)) if grp_hash in self.models: model = self.models[grp_hash] if model is not None: # Facebook Prophet returns the predictions ordered by time # So we should keep track of the times for each group so that # predictions are ordered the same as the imput frame # Make a copy of the input dates X_ds = X.copy() X_ds['ds'] = pd.to_datetime(X_ds['ds']) # Predict with prophet, get the time and prediction and index by time as well # In the case date repeats inside of a group (this happens at least in acceptance test) # We groupby date and keep the max (prophet returns the same value for a given date) # XX will contain the predictions indexed by date XX = model.predict(X)[['ds', 'yhat']].groupby('ds').max() # Now put yhat in the right order, simply by maping the dates to the predictions X_ds['yhat'] = X_ds["ds"].map(XX['yhat']) # Now set XX back to the predictions and drop the index XX = X_ds['yhat'].reset_index(drop=True) else: XX = pd.DataFrame(np.full((X.shape[0], 1), self.nan_value), columns=['yhat']) # invalid model else: XX = pd.DataFrame(np.full((X.shape[0], 1), self.nan_value), columns=['yhat']) # unseen groups XX.index = X.index preds.append(XX) XX = pd.concat(tuple(preds), axis=0).sort_index() return XX
CREATE TABLE test.some_table ( id INTEGER NOT NULL, char1 VARCHAR(8) NOT NULL, char2 VARCHAR(8) NOT NULL, num1 INTEGER NOT NULL, num2 INTEGER NOT NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; """) # With ~ 25% rows changed: DF_NEW = DF.copy() DF_NEW[f.id < NROW / 2, 4] = 1 with mysql.Database(URL) as db: db.send_data(DF, 'some_table', mode='truncate') time.sleep(200) with Stopwatch('Send with update'): db.send_data(DF_NEW, 'some_table', 'update') with mysql.Database(URL) as db: db.send_data(DF, 'some_table', mode='truncate') time.sleep(200) with Stopwatch('Send update diffs: 25%'): db.send_data(DF_NEW, 'some_table', 'update_diffs') # With ~5% changes