コード例 #1
0
def load_ESIM_model(folder_name, file_name='best', path_override=None):
    """Instantiates and loads ESIM model"""
    hp = HyperParams()

    pretrained_emb = load_pretrained_emb(
        os.path.join(config.saved_ESIM_model_path, pretrained_emb_file))
    ESIM_model = ESIM(hp.dim_word, hp.n_classes, hp.n_words, hp.dim_word,
                      pretrained_emb).to(DEVICE)

    if path_override is not None:
        data.load_model(ESIM_model, os.path.join(path_override, file_name))

    else:
        if file_name == 'best':
            file_name = 'ESIM_{:.3f}.pt'.format(
                data.get_top_n_models(os.path.join(
                    config.saved_ESIM_model_path, folder_name),
                                      'ESIM',
                                      n=1,
                                      descending=True)[0])

        data.load_model(
            ESIM_model,
            os.path.join(config.saved_ESIM_model_path, folder_name, file_name))

    return ESIM_model
コード例 #2
0
def load_CartPole_models():
    """Instantiate supervised model and load saved weights"""
    supervised_encoder = None
    supervised_model = GeneralDecoderRNN(input_size=4,
                                         hidden_size=128,
                                         output_size=2).to(DEVICE)

    data.load_model(
        supervised_model,
        os.path.join(config.saved_RL_model_path, args.CARTPOLE_DECODER))

    return supervised_encoder, supervised_model
コード例 #3
0
def load_FrozenLake_models():
    """Instantiate supervised model and load saved weights (CNN model)"""
    supervised_encoder = CNNStateEncoder(128).to(DEVICE)
    supervised_model = GeneralDecoderRNN(input_size=2,
                                         hidden_size=128,
                                         output_size=4).to(DEVICE)

    data.load_model(
        supervised_encoder,
        os.path.join(config.saved_RL_model_path, args.FROZENLAKE_ENCODER))
    data.load_model(
        supervised_model,
        os.path.join(config.saved_RL_model_path, args.FROZENLAKE_DECODER))

    return supervised_encoder, supervised_model
コード例 #4
0
def load_pretrained_critic(env_name):
    """Intantial RL critic and load trained weights"""
    if env_name == 'CartPole':
        data.load_model(
            critic_model,
            os.path.join(config.saved_RL_model_path, env_name,
                         args.CP_PRETRAINED_CRITIC))
    elif env_name == 'FrozenLake':
        data.load_model(
            critic_model,
            os.path.join(config.saved_RL_model_path, env_name,
                         args.FL_PRETRAINED_CRITIC))
    else:
        print(
            "Please select one of the following environments: ['FrozenLake', 'CartPole']"
        )
コード例 #5
0
def main(model_dump_file):
    model = data.load_model(model_dump_file)
    testing_set = data.preprocess_testing_set()
    fpath = os.path.join("submit", "submit.csv")
    with open(fpath, 'w') as f:
        f.write("ImageId,Label")
        for i in range(len(testing_set)):
            nn.forward(model, testing_set[i], is_test_time=True)
            f.write("\n%d,%d" % (i + 1, np.argmax(model['score'])))
コード例 #6
0
def load_RL_models(folder_name,
                   actor_file_name='best',
                   critic_file_name='best'):
    """Instantiate RL models and load trained weights"""
    actor_model = RLActor(input_size=env.state_space,
                          hidden_size=128,
                          output_size=env.action_space).to(DEVICE)

    if actor_file_name == 'best':
        actor_file_name = 'actor_{:.1f}.pt'.format(
            data.get_top_n_models(os.path.join(config.saved_RL_model_path,
                                               args.env_name, folder_name),
                                  'actor',
                                  n=1)[0])

    data.load_model(
        actor_model,
        os.path.join(config.saved_RL_model_path, args.env_name, folder_name,
                     actor_file_name))

    if args.init_critic:
        critic_model = RLCritic(input_size=(env.state_space +
                                            actor_model.hidden_size),
                                hidden_size=128).to(DEVICE)

        if critic_file_name == 'best':
            critic_file_name = 'critic_{:.1f}.pt'.format(
                data.get_top_n_models(os.path.join(config.saved_RL_model_path,
                                                   args.env_name, folder_name),
                                      'critic',
                                      n=1)[0])

        data.load_model(
            critic_model,
            os.path.join(config.saved_RL_model_path, args.env_name,
                         folder_name, critic_file_name))

        return actor_model, critic_model

    else:
        return actor_model, None
コード例 #7
0
def main(epoch, rate, reg, decay, continue_at, batch_size):
    input_size = 784
    hidden_layer_size = 200
    output_size = 10
    if (continue_at and os.path.exists(continue_at)):
        model = data.load_model(continue_at)
    else:
        model = nn.init_model(input_size, hidden_layer_size, output_size, reg)

    # epoch = 20;
    # base_learning_rate = 1e-5;
    base_learning_rate = rate
    decay_schedule = nn.decay_schedule(epoch, decay)
    learning_rate = base_learning_rate * decay_schedule
    print(learning_rate)

    precision_curve = plot.plot()
    loss_curve = plot.plot()
    for ep in range(epoch):
        lr = learning_rate[ep]
        training_set = data.preprocess_training_set()
        print("training epoch %d/%d with learning rate %g" %
              (ep + 1, epoch, lr))
        batches = nn.sample_batches(training_set, batch_size)
        yes = 0
        cnt = 0
        for batch in batches:
            for item in batch:
                label, img = item
                nn.forward(model, img, is_test_time=False)
                prob = model['score'].copy()
                prob -= np.max(prob)
                prob = np.exp(prob) / np.sum(np.exp(prob))
                dz = prob.copy()
                dz[label] -= 1
                nn.sgd_backward(model, dz, batch_size)

                predict = np.argmax(model['score'])
                yes += (predict == label)
                cnt += 1
                if (cnt % 1000 == 0):
                    loss_curve.append(-np.log(prob[label]))
                    print("[%d/%d]: %0.2f%%" % (yes, cnt, yes / cnt * 100),
                          end='\r')
            nn.update_weights(model, lr)
        precision_curve.append(yes / cnt)
        precision_curve.save("precision.jpg")
        loss_curve.save("loss.jpg")
        data.save_model(model)
        print("\nmodel saved\n")
コード例 #8
0
def main(model_dump_file, batch_size):
    model = data.load_model(model_dump_file)
    test = np.array(data.preprocess_testing_set())
    if (not os.path.exists("submit")):
        os.makedirs("submit")
    fpath = os.path.join("submit", "submit.csv")
    with open(fpath, 'w') as f:
        f.write("ImageId,Label")
        X = data.sample_batches_test(test, batch_size)
        cnt = 0
        for x in X:
            nn.forward(model, x, is_test_time=True)
            predictions = np.argmax(model['output'], axis=1).ravel()
            for i in predictions:
                cnt += 1
                if (cnt % 1000 == 0):
                    print("validating %d/%d" % (cnt, len(test)))
                f.write("\n%d,%d" % (cnt, i))
コード例 #9
0
def load_supervised_models(folder_name, encoder_file_name='best', decoder_file_name='best'):
    """Initializes the encoder and decoder models and loads the weights from the trained models"""
    hidden_size=256; embedding_size=256;
    if encoder_file_name in encoder_models.pretrained_models_list:
        encoder = define_encoder(encoder_file_name)
        decoder = DecoderRNN(embedding_size=embedding_size, hidden_size=encoder.hidden_size,
                                 output_size=vocab_index.n_words).to(DEVICE)
        if decoder_file_name == 'best':
            decoder_file_name = 'decoder_{:.3f}.pt'.format(data.get_top_n_models(
                    os.path.join(config.saved_supervised_model_path, folder_name), 'decoder', n=1, descending=False)[0])
            
        data.load_model(decoder, os.path.join(config.saved_supervised_model_path, folder_name, decoder_file_name))
        return encoder, decoder
        
    else:
        if folder_name in ['Baseline_Test', 'VanillaEncoder', 'VanillaEncoder_Adam',
                           'VanillaEncoder_Switch', 'VanillaEncoder_TF']:
            pass
        
        elif folder_name in ['Attention_SGD', 'Attention_Adam']:
            decoder = AttnDecoderRNN(embedding_size=embedding_size, hidden_size=hidden_size,
                                     output_size=vocab_index.n_words).to(DEVICE)
        
        else:
            raise SystemExit('Please correct test folder name')
        
        if 'encoder' not in locals():
            encoder = encoder_models.EncoderRNN(input_size=vocab_index.n_words,
                                               embedding_size=embedding_size, hidden_size=hidden_size).to(DEVICE)    
        if 'decoder' not in locals():
            decoder = DecoderRNN(embedding_size=embedding_size, hidden_size=hidden_size,
                                 output_size=vocab_index.n_words).to(DEVICE)
        
        if encoder_file_name == 'best':
            encoder_file_name = 'encoder_{:.3f}.pt'.format(data.get_top_n_models(
                    os.path.join(config.saved_supervised_model_path, folder_name), 'encoder', n=1, descending=False)[0])
        
        if decoder_file_name == 'best':
            decoder_file_name = 'decoder_{:.3f}.pt'.format(data.get_top_n_models(
                    os.path.join(config.saved_supervised_model_path, folder_name), 'decoder', n=1, descending=False)[0])
        
        data.load_model(encoder, os.path.join(config.saved_supervised_model_path, folder_name, encoder_file_name))
        data.load_model(decoder, os.path.join(config.saved_supervised_model_path, folder_name, decoder_file_name))
        
        return encoder, decoder
コード例 #10
0
ファイル: check.py プロジェクト: blurgyy/kaggle
def main(epoch, rate, continue_at, batch_size, channels):
    train_size = 64;
    learning_rate = rate;
    if(continue_at and os.path.exists(continue_at)):
        model = data.load_model(continue_at);
    else:
        model = nn.init_model(*channels);
    train = data.preprocess_training_set(aug = False)[0:train_size];
    loss_curve = plot.plot();
    acc_curve = plot.plot();
    for ep in range(epoch):
        lr = learning_rate;
        np.random.shuffle(train);
        X, Y = data.sample_batches_train(train, batch_size);
        yes, cnt, epoch_loss = 0, 0, 0;
        stime = time.perf_counter();
        for i in range(len(X)):
            x, y = X[i], Y[i];
            nn.forward(model, x, is_test_time = False);
            dz, loss = nn.grad(model, y);
            nn.backward(model, dz);

            epoch_loss += loss;
            prediction = np.argmax(model['output'], axis=1);
            score = prediction.reshape(-1,1) == y.reshape(-1,1);
            yes += np.sum(score);
            cnt += len(y);
            nn.adam_update(model, lr);
            # nn.momentum_update(model, lr);
            # nn.sgd_update(model, lr);
        etime = time.perf_counter();
        acc = yes/cnt*100;
        loss_curve.append(epoch_loss);
        acc_curve.append(acc);
        loss_curve.save("loss.png")
        acc_curve.save("acc.png");
        print("ep %d/%d, acc %0.2f%%, overall loss %.2f, time elapsed %.2f second(s)" % (ep+1, epoch, acc, epoch_loss, etime-stime));
コード例 #11
0
tf.flags.DEFINE_string("MODEL_NAME", None, "Name of a saved model")

FLAGS = tf.flags.FLAGS
FLAGS._parse_flags()

if FLAGS.MODEL_NAME==None:
	raise ValueError("Model name not specified!")

tokens = FLAGS.MODEL_NAME.split("_")
dataset = tokens[0]
model = "_".join(["CNN"]+tokens[1:-2])

with tf.Session() as sess:

	neural_network = data.load_model(sess, model=model, model_name=FLAGS.MODEL_NAME)
	input_x = neural_network.input_x
	input_y = neural_network.input_y

	max_sentence_length = input_x.get_shape()[1]
	num_classes = input_y.get_shape()[1]

	class_dict=data.get_dataset_classes(dataset)

	print("Classes:")
	for c in class_dict:
		print(c)
	print()

	reverse_class_dict=dict()
	for k in class_dict.keys():
コード例 #12
0
ファイル: test_model.py プロジェクト: nhthung/comp551
import time

from data import load_test, predictions_to_csv, load_model

if __name__ == '__main__':
    print('Loading data...')
    start = time.time()
    data_test = load_test()
    print(f'Time to load data: {time.time()-start}')

    # Replace with name of model you want load
    pipeline = load_model('sgd_bigram_tfidf.joblib')

    # Generate predictions
    pred = pipeline.predict(data_test)

    # Name of csv to save predictions in
    predictions_to_csv(pred, 'sgd_bigram_tfidf.csv')
コード例 #13
0
        args.SM_FOLDER,
        encoder_file_name=args.SM_ENCODER_FILE_NAME,
        decoder_file_name=args.SM_DECODER_FILE_NAME)
    actor_model, critic_model, actor_optimizer, critic_optimizer = init_actor_critic_models(
        supervised_decoder,
        init_critic=args.init_critic,
        transfer_weights=args.transfer_weights)

    # Create folder if saving models
    if args.save_models:
        saved_RL_model_results.init_folder(args, actor_model, critic_model)

    # Load pretrained critic
    if args.use_pretrained_critic and critic_model is not None:
        data.load_model(
            critic_model,
            os.path.join(config.saved_RL_model_path, args.env_name,
                         args.reward_function, args.PRETRAINED_CRITIC))
    # Optionally load trained models
    if args.load_models:
        actor_model, critic_model = data.load_RL_models(
            args.env_name,
            args.load_model_folder_name,
            actor_model,
            critic_model,
            actor_file_name='best',
            critic_file_name='best')

    # Instantiate teacher model if using policy distillation
    if args.use_policy_distillation:
        teacher_model = TeacherRNN(
            embedding_size=supervised_decoder.embedding_size,
コード例 #14
0
def run_tester(single=False, file=None, model_str='transe'):
    if model_str == 'transe':
        model_path = './checkpoint/mapper.pt'
        if not os.path.exists(model_path):
            exit("Train mapper first!")

        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model = mapper().to(device)
        model.load_state_dict(torch.load(model_path))
        model.eval()

        filter = target_filter()

        if single:
            with open(file, 'r') as f:
                data = f.readlines()
                usr_r = data[0].strip()
                text = data[1].strip()

            nlp = SentenceTransformer('distilbert-base-nli-mean-tokens',
                                      device=device)
            this_x = nlp.encode(text)
            output = model(torch.from_numpy(this_x).float().to(device))
            output = output.detach().cpu().numpy()

            e, r = load_model(model_str)

            i_j = {}
            for j, ee in enumerate(e):
                d = score_transe(output, r[int(usr_r)], ee)
                i_j[j] = d

            sorted_d = {
                k: v
                for k, v in sorted(i_j.items(), key=lambda item: item[1])
            }

            entities = load_entities()
            for i in list(sorted_d)[:10]:
                print(entities[str(i)])

        else:
            hs, ts, rs, hs_name = load_open_word_test(device,
                                                      deep_filtered=True)

            r_t = relation_tail_train()

            count_1 = 0
            count_3 = 0
            count_10 = 0

            print('Testing...')

            for i in tqdm(range(len(hs))):
                this_desc_embedding = hs[i]

                output = model(
                    torch.from_numpy(this_desc_embedding).float().to(device))
                output = output.detach().cpu().numpy()

                e, r = load_model(model_str)

                gt = ts[i]
                this_relation = rs[i]
                this_name = hs_name[i]

                this_filter = []
                for tail in tuple(filter[this_name + ':' + this_relation]):
                    if tail != gt:
                        this_filter.append(tail)

                i_j = {}
                for j, ee in enumerate(e):
                    d = score_transe(output, r[int(this_relation)], ee)
                    if str(j) not in this_filter:
                        i_j[str(j)] = d

                sorted_d = {
                    k: v
                    for k, v in sorted(i_j.items(), key=lambda item: item[1])
                }

                if gt in list(sorted_d)[:10]:
                    count_10 += 1
                if gt in list(sorted_d)[:3]:
                    count_3 += 1
                if gt in list(sorted_d)[:1]:
                    count_1 += 1

            print('hits@1: %.1f' % (count_1 / len(hs) * 100))
            print('hits@3: %.1f' % (count_3 / len(hs) * 100))
            print('hits@10: %.1f' % (count_10 / len(hs) * 100))

    elif model_str == 'complex':
        model_r_path = './checkpoint/mapper_complex_r.pt'
        model_i_path = './checkpoint/mapper_complex_i.pt'
        if not os.path.exists(
                model_r_path) and not not os.path.exists(model_i_path):
            exit("Train mapper first!")

        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model_r = mapper().to(device)
        model_i = mapper().to(device)
        model_r.load_state_dict(torch.load(model_r_path))
        model_i.load_state_dict(torch.load(model_i_path))
        model_r.eval()
        model_i.eval()

        if single:
            with open(file, 'r') as f:
                data = f.readlines()
                usr_r = int(data[0].strip())
                text = data[1].strip()

            nlp = SentenceTransformer(
                'bert-base-wikipedia-sections-mean-tokens', device=device)
            this_x = nlp.encode(text)
            output_r = model_r(torch.from_numpy(this_x).float().to(device))
            output_i = model_i(torch.from_numpy(this_x).float().to(device))
            output_r = output_r.detach().cpu().numpy()
            output_i = output_i.detach().cpu().numpy()
            e, r = load_model(model_str)
            e_r = e[0]
            e_i = e[1]
            r_r = r[0]
            r_i = r[1]

            i_j = {}
            for j in range(len(e_r)):
                d = score_complex(output_r, output_i, e_r[j], e_i[j],
                                  r_r[usr_r], r_i[usr_r])
                i_j[j] = d

            sorted_d = {
                k: v
                for k, v in sorted(i_j.items(), key=lambda item: item[1])
            }

            entities = load_entities()
            for i in list(sorted_d)[:10]:
                print(entities[str(i)])

        else:
            hs, ts, rs, _ = load_open_word_test(device, deep_filtered=True)

            count_1 = 0
            count_3 = 0
            count_10 = 0

            print('Testing...')

            filter = target_filter()

            for i in tqdm(range(len(hs))):
                this_desc_embedding = hs[i]

                output_r = model_r(
                    torch.from_numpy(this_desc_embedding).float().to(device))
                output_i = model_i(
                    torch.from_numpy(this_desc_embedding).float().to(device))
                output_r = output_r.detach().cpu().numpy()
                output_i = output_i.detach().cpu().numpy()

                e, r = load_model(model_str)
                e_r = e[0]
                e_i = e[1]
                r_r = r[0]
                r_i = r[1]

                gt = ts[i]
                this_relation = rs[i]
                this_filter = []
                for tail in tuple(filter[this_relation]):
                    if tail != gt:
                        this_filter.append(tail)

                i_j = {}
                for j in range(len(e_r)):
                    d = score_complex(output_r, output_i, e_r[j], e_i[j],
                                      r_r[int(this_relation)],
                                      r_i[int(this_relation)])
                    if str(j) not in this_filter:
                        i_j[str(j)] = d

                sorted_d = {
                    k: v
                    for k, v in sorted(i_j.items(), key=lambda item: item[1])
                }

                if gt in list(sorted_d)[:10]:
                    count_10 += 1
                if gt in list(sorted_d)[:3]:
                    count_3 += 1
                if gt in list(sorted_d)[:1]:
                    count_1 += 1

            print('hits@1: %.1f' % (count_1 / len(hs) * 100))
            print('hits@3: %.1f' % (count_3 / len(hs) * 100))
            print('hits@10: %.1f' % (count_10 / len(hs) * 100))
コード例 #15
0
        print(datetime.datetime.now())
        print('{} samples read'.format(stacks.shape[0]))

        prob = model.predict_proba(stacks, batch_size=128, verbose=1)
        print(datetime.datetime.now())
    else:
        prob = np.zeros((0, model.output_shape[1]))
        for chunkNum in range(1, nChunks + 1):
            print('Reading chunk ' + str(chunkNum) + '/' + str(nChunks) +
                  ' from ' + inFile)
            stacks, labels = data.load_mat_chunk(inFile, chunkNum=chunkNum)
            stacks = np.float32(stacks) / 255
            print(stacks.shape[0], 'samples read')

            prob = np.append(prob,
                             model.predict_proba(stacks,
                                                 batch_size=128,
                                                 verbose=1),
                             axis=0)

    return prob


model = data.load_model(cnnModelDef, cnnModelWeights)

for inFile in inFiles:
    outFile = inFile.replace('.mat', '_cnnClass.mat')
    p = process_file(inFile, outFile, model)
    print('Saving to ' + outFile)
    savemat(outFile, {'p': p, 'clab': clab})
コード例 #16
0
class tf_idf():

    nlp = load_model('en_core_web_md')

    def word_freq(self, text) -> dict:
        """
        Create document word frequency table {w1:f1, ..., wN:fN}.
        Remove stop words, punct, etc. and lowercase
        :rtype: dict
        """
        doc = self.nlp(text)
        word_freq_table = {}
        for token in doc:
            ignore = token.is_stop or token.is_punct or token.is_quote or token.is_oov or token.text in [
                '.', ',', ';', ':', '%', '-'
            ]
            if not ignore and token.text in word_freq_table:
                word_freq_table[token.lower_] += 1
            elif not ignore:
                word_freq_table[token.lower_] = 1

        return word_freq_table

    def sent_word_freq(self, text) -> dict:
        """
        Create sentence word frequency table {s1:{w1:f1, ..., wN:fN}, ..., sN:{w1:f1, ..., wN:fN} }.
        :rtype: dict
        """
        doc = self.nlp(text)
        sent_word_freq_table = {}
        for sent in doc.sents:
            word_freq_table = self.word_freq(sent.lower_)
            sent_word_freq_table[sent.lower_[:15]] = word_freq_table

        return sent_word_freq_table

    def tf_matrix(self, sent_word_freq_table) -> dict:
        tf_matrix = {}
        for sent, word_freq_table in sent_word_freq_table.items():
            tf_table = {}
            sent_word_count = len(word_freq_table)
            for word, freq in word_freq_table.items():
                tf_table[word] = freq / sent_word_count
            tf_matrix[sent] = tf_table

        return tf_matrix

    def global_word_freq(self, tf_matrix) -> dict:
        tf_global_matrix = {}
        for sent, f_table in tf_matrix.items():
            for word, count in f_table.items():
                if word in tf_global_matrix:
                    tf_global_matrix[word] += count
                else:
                    tf_global_matrix[word] = count

        return tf_global_matrix

    def idf(self, tf_matrix, tf_global_matrix) -> dict:
        total_documents = len(tf_matrix)
        idf_matrix = {}
        for sent, f_table in tf_matrix.items():
            idf_table = {}
            for word in f_table.keys():
                idf_table[word] = math.log10(total_documents /
                                             float(tf_global_matrix[word]))
            idf_matrix[sent] = idf_table

        return idf_matrix

    def tf_idf(self, tf_matrix, idf_matrix) -> dict:
        tf_idf_matrix = {}
        for (sent1, f_table1), (sent2,
                                f_table2) in zip(tf_matrix.items(),
                                                 idf_matrix.items()):
            tf_idf_table = {}
            for (word1,
                 value1), (word2,
                           value2) in zip(f_table1.items(), f_table2.items(
                           )):  # here, keys are the same in both the table
                tf_idf_table[word1] = float(value1 * value2)
            tf_idf_matrix[sent1] = tf_idf_table

        return tf_idf_matrix

    def score_sentences(self, tf_idf_matrix) -> dict:
        # Score sentences by their word TFs
        # Algorithm: adds word TFs and divides by total no of words in sentence. Normalise scale in range [0..10]
        sentenceScores = {}
        for sent, f_table in tf_idf_matrix.items():
            sent_word_count = len(f_table)
            scores = [score for _word, score in f_table.items()]
            maxScore = max(scores)
            normScores = [score / maxScore for score in scores]
            total_sent_score = sum(normScores)
            sentenceScores[sent] = total_sent_score / sent_word_count

        return sentenceScores

    def average_score(self, sentenceScores) -> int:
        sumScores = sum([sentenceScores[entry] for entry in sentenceScores])
        # Average score of a sentence from original summary_text
        average = sumScores / len(sentenceScores)

        return average

    def generate_summary(self, sents, sentenceScores, threshold) -> str:
        summary = ' '.join([
            sent.text.strip() for sent in sents
            if ((sent.lower_[:15] in sentenceScores) and (
                sentenceScores[sent.lower_[:15]] <= (threshold)))
        ])
        return summary

    def summarize(self, text, threshold: float) -> str:
        doc = self.nlp(text)
        sents = doc.sents
        '''
        Term frequency (TF) is how often a word appears in the document, divided by how many words there are in the document.
        '''
        # 1 Calculate the term frequency matrix, by sentence
        tf_matrix = self.sent_word_freq(text)
        #st.write(pd.DataFrame(tf_matrix))

        # 2 Calculate the term frequency matrix, global (all sentences)
        tf_global_matrix = self.global_word_freq(tf_matrix)
        #st.write(pd.DataFrame({'tf_global_matrix':tf_global_matrix}))
        '''
        Inverse document frequency (IDF) is how unique or rare a word is.
        '''
        # 3 Calculate IDF
        idf_matrix = self.idf(tf_matrix, tf_global_matrix)
        #st.write(pd.DataFrame(idf_matrix))

        # 4 Calculate TF-IDF
        tf_idf_matrix = self.tf_idf(tf_matrix, idf_matrix)
        #st.write(pd.DataFrame(tf_idf_matrix))

        # 5 Score sentences
        sentence_scores = self.score_sentences(tf_idf_matrix)
        #st.write(pd.DataFrame({'sentence_scores':sentence_scores}))

        # 6 Generate summary
        summary = self.generate_summary(sents, sentence_scores, threshold)

        return summary