コード例 #1
0
ファイル: generation.py プロジェクト: soywalker/prosaic
def extract_rule(conn, corpus_id, letter_sound_map, raw_pair):
    rule_key = first(raw_pair)
    value = second(raw_pair)
    rule = None

    if rule_key == 'rhyme': rule = dogma.RhymeRule(letter_sound_map.get(value))
    elif rule_key == 'blank': rule = dogma.BlankRule()
    elif rule_key == 'alliteration': rule = dogma.AlliterationRule(value)
    elif rule_key == 'keyword': rule = dogma.KeywordRule(value, conn, corpus_id)
    elif rule_key == 'fuzzy': rule = dogma.FuzzyKeywordRule(value, conn, corpus_id)
    elif rule_key == 'syllables': rule = dogma.SyllableCountRule(value)

    return rule
コード例 #2
0
def words(sentence):
    tagged_sentence = tag(sentence)
    tagged_words = filter(lambda tu: match(word_tag_re, second(tu)), tagged_sentence)
    ws = map(first, tagged_words)
    return list(ws)
コード例 #3
0
@lru_cache(maxsize=2056)
def words(sentence):
    tagged_sentence = tag(sentence)
    tagged_words = filter(lambda tu: match(word_tag_re, second(tu)),
                          tagged_sentence)
    ws = map(first, tagged_words)
    return list(ws)


def stem_sentence(sentence):
    stemmed = map(stem_word, words(sentence))
    return list(stemmed)


is_divider = lambda tu: DIVIDER_TAG == second(tu)


def split_multiclause(sentence, tagged_sentence):
    # extract the text the divider tag represents
    divider = first(find_first(is_divider, tagged_sentence))
    if divider is not None:
        first_clause = sentence[0:sentence.index(divider)].rstrip()
        second_clause = sentence[sentence.index(divider) + 1:].lstrip()
        return [first_clause, second_clause]
    else:
        return [sentence]


def expand_multiclauses(sentences):
    # TODO consider itertools
コード例 #4
0
ファイル: nlp.py プロジェクト: merlinschumacher/prosaic
    tokenized_words = nltk.word_tokenize(sentence_string)
    return nltk.pos_tag(tokenized_words)

word_tag_re = re.compile("^[A-Z]+$")
@lru_cache(maxsize=2056)
def words(sentence):
    tagged_sentence = tag(sentence)
    tagged_words = filter(lambda tu: match(word_tag_re, second(tu)), tagged_sentence)
    ws = map(first, tagged_words)
    return list(ws)

def stem_sentence(sentence):
    stemmed = map(stem_word, words(sentence))
    return list(stemmed)

is_divider = lambda tu: DIVIDER_TAG == second(tu)

def split_multiclause(sentence, tagged_sentence):
    # extract the text the divider tag represents
    divider = first(find_first(is_divider, tagged_sentence))
    if divider is not None:
        first_clause = sentence[0:sentence.index(divider)].rstrip()
        second_clause = sentence[sentence.index(divider)+1:].lstrip()
        return [first_clause, second_clause]
    else:
        return [sentence]

def expand_multiclauses(sentences):
    # TODO consider itertools
    split = []
    for sentence in sentences: