preprocessor=get_col('title')))
            ])

        vectorizer.fit(df_reduced.to_dict('records'))
        df_text_processed = vectorizer.transform(df_reduced.to_dict('records'))
        vocab = vectorizer.get_feature_names()

with timer('Lda and nmf on title / description'):    
    if os.path.exists('desc_top_50_nmf.pkl'):
        desc_top_50 = np.load('desc_top_50_nmf.pkl')
        desc_top_50 = desc_top_50[:, :50]
        print('desc nmf features loaded')
    else:
        print('fit desc nmf features . . . ')
        desc_top_50 = NMF(n_components=50, max_iter=1000).fit_transform(df_text_processed[:, :desc_features])
        desc_top_50.dump('desc_top_50_nmf.pkl')
        
    if os.path.exists('title_top_50_lda.pkl'):
        title_top_50 = np.load('title_top_50_lda.pkl')
        print('title lda features loaded')
    else:
        print('fit title lda features . . .')
        title_top_50 = LDA(n_components=50, learning_method='online').fit_transform(df_text_processed[:, desc_features:])
        title_top_50.dump('title_top_50_lda.pkl')

with timer('Add title lda and desc nmf to data'):
    desc_nmf_features = ['desc_nmf_' + str(i) for i in range(1, 51)]
    title_lda_features = ['title_lda_' + str(i) for i in range(1, 51)]
    
    df_reduced_columns_before = df_reduced.columns.tolist()
    df_reduced = pd.concat([df_reduced, pd.DataFrame(desc_top_50, columns=[desc_nmf_features])], axis=1)