## combining the genre tags and the user tags movies2_df = movies2_df.merge(grouped_df, on='movieId', how='left') movies2_df['tag'] = movies2_df['tag'].apply( lambda x: str(x)) + ' ' + movies2_df['genres'] movies2_df['tag'] = movies2_df['tag'].apply( lambda x: x.replace('nan', '').strip()) tags_grouped_df = movies2_df.iloc[:][['movieId', 'tag']] print(tags_grouped_df.head()) print(tags_grouped_df.shape) ## calculating the TFIDF matrix tfidf_df = Preprocess.createTFIDFMatrix(tags_grouped_df) print(tfidf_df.shape) ## dumping the tfidf matrix Util.saveObj(tfidf_df, 'tfidf_df') # ## loading the TFIDF matrix # tfidf_df = Util.loadObj('tfidf_df') # print(tfidf_df.shape) ## loading the reduced TFIDF matrix tfidf_reduced_df = Util.loadObj('tfidf_reduced_df') print(tfidf_reduced_df.shape) ## creating vector df with spacy sentence vector vector_df = createSentenceVector(imdb_df) print(vector_df.shape) ## dumping the vector df Util.saveObj(vector_df, 'vector_df')
self.BottleNeckDense(encoder_out_2)) decoder_out_1 = self.dropout_layer(self.DecoderDense1(bottleneck_out)) decoder_out_2 = self.dropout_layer(self.DecoderDense2(decoder_out_1)) final_out = self.dropout_layer(self.FinalDense(decoder_out_2)) return final_out NUM_EPOCHS = 100 BATCH_SIZE = 64 tfidf_matrix = Util.loadObj('tfidf_df') X = tfidf_matrix.to_numpy() features = X.shape[1] model = AutoEncoder(features) optimizer = keras.optimizers.Adam(lr=0.000003) loss = lambda x, x_hat: tf.reduce_sum(keras.losses.mean_squared_error( x, x_hat)) model.compile(loss=loss, optimizer=optimizer, metrics=['mse']) model.fit(x=X, y=X, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS) reduced = model.BottleNeckDense(model.EncoderDense2(model.EncoderDense1(X))) reduced_np = reduced.numpy() indices = tfidf_matrix.index.tolist() tfidf_reduced_df = pd.DataFrame(reduced_np) tfidf_reduced_df['movieId'] = indices Util.saveObj(tfidf_reduced_df, 'tfidf_reduced_df') print(tfidf_reduced_df['movieId'])