def main():
    np.random.seed(42)
    data_dir_path = '../DS Bootcamp/dailycaller'
    very_large_data_dir_path = './very_large_data'
    model_dir_path = './models'

    print('loading csv file ...')
    df = pd.read_csv('dcr Man_Cleaned.csv')
    #X = df['Article']
    X = df['Title']
    #X = df['Joined']
    Y = df.Title

    config = np.load(Seq2SeqGloVeSummarizerV2.get_config_file_path(model_dir_path=model_dir_path)).item()

    summarizer = Seq2SeqGloVeSummarizerV2(config)
    summarizer.load_glove(very_large_data_dir_path)
    summarizer.load_weights(weight_file_path=Seq2SeqGloVeSummarizerV2.get_weight_file_path(model_dir_path=model_dir_path))

    print('start predicting ...')
    for i in np.random.permutation(np.arange(len(X)))[0:20]:
        x = X[i]
        actual_headline = Y[i]
        headline = summarizer.summarize(x)

        print('Generated Headline: ', headline)
        print('Original Headline: ', actual_headline)
def main():
    np.random.seed(42)
    data_dir_path = './data'
    very_large_data_dir_path = './very_large_data'
    model_dir_path = './models'

    print('loading csv file ...')
    df = pd.read_csv(data_dir_path + "/Papers.csv")
    Y = df.Abstract
    X = df['PaperText']

    config = np.load(
        Seq2SeqGloVeSummarizerV2.get_config_file_path(
            model_dir_path=model_dir_path)).item()

    summarizer = Seq2SeqGloVeSummarizerV2(config)
    summarizer.load_glove(very_large_data_dir_path)
    summarizer.load_weights(
        weight_file_path=Seq2SeqGloVeSummarizerV2.get_weight_file_path(
            model_dir_path=model_dir_path))

    print('start predicting ...')
    for i in np.random.permutation(np.arange(len(X)))[0:20]:
        x = X[i]
        actual_headline = Y[i]
        headline = summarizer.summarize(x)

        print('Generated Headline: ', headline)
        print('Original Headline: ', actual_headline)
Example #3
0
def main():
    np.random.seed(42)
    data_dir_path = './data'
    very_large_data_dir_path = './very_large_data'
    report_dir_path = './reports'
    model_dir_path = './models'

    print('loading csv file ...')
    df = pd.read_csv(data_dir_path + "/fake_or_real_news.csv")

    print('extract configuration from input texts ...')
    Y = df.title
    X = df['text']
    config = fit_text(X, Y)

    print('configuration extracted from input texts ...')

    summarizer = Seq2SeqGloVeSummarizerV2(config)
    summarizer.load_glove(very_large_data_dir_path)

    if LOAD_EXISTING_WEIGHTS:
        summarizer.load_weights(weight_file_path=Seq2SeqGloVeSummarizerV2.get_weight_file_path(model_dir_path=model_dir_path))

    Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.2, random_state=42)

    print('demo size: ', len(Xtrain))
    print('testing size: ', len(Xtest))

    print('start fitting ...')
    history = summarizer.fit(Xtrain, Ytrain, Xtest, Ytest, epochs=20, batch_size=16)

    history_plot_file_path = report_dir_path + '/' + Seq2SeqGloVeSummarizerV2.model_name + '-history.png'
    if LOAD_EXISTING_WEIGHTS:
        history_plot_file_path = report_dir_path + '/' + Seq2SeqGloVeSummarizerV2.model_name + '-history-v' + str(summarizer.version) + '.png'
    plot_and_save_history(history, summarizer.model_name, history_plot_file_path, metrics={'loss', 'acc'})