def get_word_frequencies():
    """ Return a list of (word, frequency) tuples, sorted by frequency, form most to least frequent. """
    tagged_corpus = unpickle_cds()
    words = []
    for file_ in tagged_corpus:
        for sentence_ in tagged_corpus[file_]:
            sentence_ = [(token, collapse_function_tags(pos_tag)) for token, pos_tag in sentence_]
            words += sentence_
    counted_tokens = Counter(words)
    word_frequencies = sorted(counted_tokens.items(), key=operator.itemgetter(1))
    word_frequencies.reverse()
    return word_frequencies
def get_cds_words(collapse_function_words=True):
    """
    Load CDS from disk and return lower-cased tokens, as a list of sentences, where a sentence is a list of
    POS-ttagged tokens. Tokens are in the format 'word-pos_tag'. Optionally replace all closed class / function word
    POS tags with the single tag 'fn'.
    """
    sentences = []
    CDS = unpickle_cds()
    for file_name in CDS:
        for s in CDS[file_name]:
            sentences.append([])
            for w, pos_tag in s:
                w = w.lower()
                if collapse_function_words:
                    sentences[-1].append(w + "-" + collapse_function_tags(pos_tag))
                else:
                    sentences[-1].append(w + "-" + pos_tag)
    return sentences