def __init__(self, rating_path, product_path, customer_path, checkpoint_dir='checkpoints/recommendation', checkpoint_name='recommendation_candidate_model.ckpt', load_checkpoint=True): if (not check_is_csv(rating_path)) or ( not check_is_csv(product_path) or (not check_is_csv(customer_path))): raise Exception('Input path {}, {}, {} is not csv'.format( rating_path, product_path, customer_path)) self.rating_df = pd.read_csv(rating_path) self.product_series = pd.read_csv(product_path) self.customer_series = pd.read_csv(customer_path) # Create checkpoint dir self.checkpoint_dir = checkpoint_dir self.checkpoint_name = checkpoint_name self.checkpoint_path = os.path.join(checkpoint_dir, checkpoint_name) self.load_checkpoint = load_checkpoint make_dir(checkpoint_dir) self.n_of_products, self.n_of_customers = self.product_series.size, self.customer_series.size self.train_ds, self.test_ds = train_test_split(self.rating_df) self.candidate_model = matrix_factorization_model( self.n_of_products, self.n_of_customers) self.load_weight_and_compile(load_checkpoint) # model artifacts self.history = None self.product_embeddings, self.customer_embeddings = None, None
def __init__(self, data_dir, checkpoint_dir="./checkpoint/deepNN_recommendation"): self.data_processor = RecommendationDataProcessor(data_dir) self.model = deep_nn_model(self.data_processor.product_feature_dim, self.data_processor.customer_feature_dim, self.data_processor.features_depth) self.checkpoint_dir = checkpoint_dir make_dir(self.checkpoint_dir) self.checkpoint_path = os.path.join(checkpoint_dir, 'deep_nn_recommendation.ckpt')
def compile_and_fit(model: tf.keras.Model, window: WindowGenerator, model_name: str, patience=cfg.EARLY_STOPPING['patience'], ): """ Train model @param model_name: @param model: @param window: @param patience: @return: """ checkpoint_dir = os.path.join(cfg.CHECKPOINT_PATH, model_name) checkpoint_path = os.path.join(checkpoint_dir, '{epoch:04d}.ckpt') make_dir(checkpoint_dir) callbacks = [] if not is_dir_empty(checkpoint_dir): load_weight(model, checkpoint_dir) cp_callback = tf.keras.callbacks.ModelCheckpoint( checkpoint_path, save_weights_only=True, verbose=1, ) callbacks.append(cp_callback) if cfg.EARLY_STOPPING['enabled'] is True: early_stopping = tf.keras.callbacks.EarlyStopping( monitor='val_loss', patience=patience, mode='min') callbacks.append(early_stopping) model.compile( optimizer=tf.keras.optimizers.Adam(), loss=tf.losses.MeanSquaredError(), metrics=[tf.metrics.MeanAbsoluteError()] ) history = model.fit( window.train, epochs=cfg.MAX_EPOCH, validation_data=window.val, callbacks=callbacks, verbose=2, ) return history
def visualize_embedding(self): log_dir = 'logs/embeddings' embedding_name = 'customer_embeddings' make_dir(log_dir) self._set_embeddings() product_embeddings_tensor = tf.Variable(self.product_embeddings) customer_embeddings_tensor = tf.Variable(self.customer_embeddings) checkpoint = tf.train.Checkpoint( customer_embeddings=customer_embeddings_tensor) checkpoint.save(os.path.join(log_dir, '{}.ckpt'.format(embedding_name))) config = projector.ProjectorConfig() embedding = config.embeddings.add() embedding.tensor_name = '{}/.ATTRIBUTES/VARIABLE_VALUE'.format( embedding_name) projector.visualize_embeddings(log_dir, config)
START_DATE_STR = '2017-01-01 00:00:00' END_DATE_STR = '2020-01-01 00:00:00' start_date_obj = arrow.get(START_DATE_STR, ARROW_DATE_TIME_FORMAT) end_date_obj = arrow.get(END_DATE_STR, ARROW_DATE_TIME_FORMAT) diff_date = end_date_obj - start_date_obj diff_hours = diff_date.total_seconds() / 3600 OUTPUT_DIR = '../output' OUTPUT_PATH = os.path.join(OUTPUT_DIR, 'data.csv') make_dir(OUTPUT_DIR) obj = {} # Just random pattern for every 50 time steps TIME_STEPS = 50 time_steps_range = np.arange(50) random_pattern = np.where(time_steps_range < 10, time_steps_range**3, (time_steps_range - 9)**2) # add seasonality to dataset according to input_hour def add_seasonality(input_hour): # Add non-stationary property to data input_hour = 0 if random.choice([0, 1, 2]) == 0 else input_hour # 50 is equal to time steps declared above