コード例 #1
0
def handle_arguments():
    # Handle the command line arguments.
    if not argv[3].isdigit():
        print("The quantity parameter must be an integer.")
        return

    file = argv[1]
    parameter = argv[2]
    quantity = int(argv[3])

    if parameter != '-s' and parameter != '-w':
        print_usage()
        return

    words = extractor.get_words(file)
    words_scores = get_word_scores(words)
    sentences = extractor.get_sentences(file)
    sentences_scores = get_sentence_scores_dict(sentences, words_scores)

    if parameter == '-s':
        if quantity > len(sentences):
            print(
                "Quantity specified is greater than the number of sentences.")
        else:
            print_popular(sentences_scores, sort_dictionary(sentences_scores),
                          quantity)
    else:
        if quantity > len(words):
            print("Quantity specified is greater than the number of words.")
        else:
            print_popular(words_scores, sort_dictionary(words_scores),
                          quantity)
コード例 #2
0
def summarize(filename, num_of_sentences):
    # Summarize a file. The length of the summary will be the number of sentences specified.
    file = filename

    # Extract all the words and sentences and get their respective scores.
    all_words = extractor.get_words(file)
    word_scores = scoring.get_word_scores(all_words)
    all_sentences = extractor.get_sentences(file)
    all_sentences = filter.omit_transition_sentences(all_sentences)
    sentence_scores = scoring.get_sentence_scores_list(all_sentences,
                                                       word_scores)

    if num_of_sentences > len(all_sentences):
        print("The summary cannot be longer than the text.")
        return

    # Get x sentences with the highest scores, in chronological order.
    threshold = scoring.x_highest_score(sentence_scores, num_of_sentences)
    top_sentences = scoring.top_sentences(all_sentences, sentence_scores,
                                          threshold)

    # Put the top sentences into one string.
    summary = ""
    for sentence in top_sentences:
        summary += sentence + " "
    summary = summary[:-1]
    print(summary)
コード例 #3
0
def summarize(filename, topics, input_words, num_of_sentences):
    # Summarize a file. The length of the summary will be the number of sentences specified.
    file = filename

    # Extract all the words and sentences and get their respective scores.
    all_words = extractor.get_words(file)
    word_scores = scoring.get_word_scores(all_words)
    all_sentences = extractor.get_sentences(file)
    all_sentences = filter.omit_transition_sentences(all_sentences)

    all_sentences = topic_sent(all_sentences, topics)
    i = 0
    complete_summary = []

    for all_sentences_part in all_sentences:
        num_of_sentences_new = num_of_sentences
        sentence_scores_part = scoring.get_sentence_scores_list(
            all_sentences_part, word_scores)
        all_sentences_part, sentence_scores_part = additional_filter.remove_duplicates(
            all_sentences_part, sentence_scores_part)

        if num_of_sentences_new > len(all_sentences_part):
            #print("The summary cannot be longer than the text.")
            num_of_sentences_new = len(all_sentences_part)

    # Get x sentences with the highest scores, in chronological order.

        threshold = scoring.x_highest_score(sentence_scores_part,
                                            num_of_sentences_new)
        top_sentences = scoring.top_sentences(all_sentences_part,
                                              sentence_scores_part, threshold)

        # Put the top sentences into one string.
        top_sentences = top_sentences[-num_of_sentences_new:]

        summary = input_words[i] + ": \n"
        i = i + 1
        for sentence in top_sentences:
            summary += sentence + " "
        complete_summary.append(summary + '\n')

    return complete_summary
コード例 #4
0
    for phrase in transition_phrases:
        if lower.startswith(phrase):
            return True
    return False


def omit_transition_sentences(sentences):
    transition_phrases = get_transition_phrases()
    result = []
    for sentence in sentences:
        if not is_transition_phrase(transition_phrases, sentence):
            result.append(sentence)
    return result


if __name__ == "__main__":
    if len(argv) == 2:
        transition_phrases = get_transition_phrases()
        sentences = extractor.get_sentences(argv[1])
        count = 0
        for sentence in sentences:
            for phrase in transition_phrases:
                lower = sentence.lower()
                if lower.startswith(phrase):
                    print("Omitted: " + sentence)
                    count += 1
                    break
        print("Omitted", count, "sentence(s).")
    else:
        print_usage()