Example #1
0
def p2():
    p = "the {place} was filled with the {adjective1} {noun1} of {nouns1}, and when the {adjective2} {adjective3} {noun2} stirred admist the {nouns2} of the {noun3}, there came through the {adjective4} {noun4} the {adjective5} {noun5} of the {noun6}, or the more {adjective6} {noun7} of the {adjective7} {noun8}.".format(
        place = random.choice(ns),
        adjective1 = random.choice(adjs),
        noun1 = random.choice(ns),
        nouns1 = pluralize(random.choice(ns)),
        adjective2 = random.choice(adjs),
        adjective3 = random.choice(adjs),
        noun2 = random.choice(ns),
        nouns2 = pluralize(random.choice(ns)),
        noun3 = random.choice(ns),
        adjective4 = random.choice(adjs),
        noun4 = random.choice(ns),
        adjective5 = random.choice(adjs),
        noun5 = random.choice(ns),
        noun6 = random.choice(ns),
        adjective6 = random.choice(adjs),
        noun7 = random.choice(ns),
        adjective7 = random.choice(adjs),
        noun8 = random.choice(ns)
    )

    if random.random() > 0.6:
        p = random.choice([look_around_you(p), get_lost(p)])
    elif random.random() > 0.8:
        p = suddenly(p)

    return capitalize(p)
Example #2
0
        def make_noun_string(np, plural=False):
            # random chance of removing modifier
            #if random.random() < 0.5:
            #    np[0] == ''

            # common mass nouns

            if np[1] in ['data', 'information', 'children', 'people', 'stuff', 'equipment']:
                return ' '.join(np).strip()

            elif any(np[1].lower().startswith(x) for x in ('every', 'any', 'some')) or np[1] in ('nothing', 'nobody'):
                return np[1]

            quantifiers = ['many', 'few', 'several', 'various', 'multiple', 'fewer', 'more']
            if np[0] in quantifiers:
                return np[0] + ' ' + pluralize(np[1])

            else:
                die_roll = random.random()
                if die_roll < 0.15 or plural:
                    return ' '.join((np[0], pluralize(np[1]))).strip()
                elif die_roll < 0.25:
                    return random.choice(('his', 'her', 'their', 'your')) + ' ' + ' '.join(np).strip()
                elif random.random() < 0.45:
                    return referenced(' '.join(np).strip())
                else:
                    return 'the ' + ' '.join(np).strip()
Example #3
0
 def getKeywords(self):
     """
     Extract keywords using POS tagging
     :return: Query keywords
     """
     nouns = []
     if len(self.sentences) == 1:
         s = re.sub('[' + string.punctuation + ']', '', self.sentences[0])
         self.r.extract_keywords_from_text(s)
         rp = self.r.get_ranked_phrases()
         for n in rp:
             tokens = nltk.tokenize.word_tokenize(n)
             if len(tokens) == 1:
                 item, tag = nltk.pos_tag(tokens)[0]
                 if 'NN' in tag:
                     if len(item) > 1:
                         if singularize(item) not in nouns and pluralize(
                                 item) not in nouns:
                             nouns.append(item)
             else:
                 nouns.append(n)
         return nouns
     for s in self.sentences:
         s = re.sub('[' + string.punctuation + ']', '', s)
         tokens = nltk.tokenize.word_tokenize(s)
         tagged = nltk.pos_tag(tokens)
         final_nouns = []
         for item, t in tagged:
             if 'NN' in t:
                 if len(item) > 1:
                     if singularize(item) not in final_nouns and pluralize(
                             item) not in final_nouns:
                         final_nouns.append(item)
         nouns.append(final_nouns)
     return nouns
Example #4
0
    def generate_refexs(self, answer_list):
        """
        Given all of the possible answers, generate the referring expressions to
        store in dictionary.
        """

        # TODO: Make referring expression data-driven

        for aa in answer_list:
            ans = aa.split("_(")[0]
            for jj in ans.split():
                # each word and plural form of each word
                self._refex_lookup[aa].add(jj.lower())
                self._refex_lookup[aa].add(pluralize(jj).lower())
                self._refex_count[jj] += 1
                self._refex_count[pluralize(jj)] += 1

            # answer and plural form
            self._refex_count[ans.lower()] += 1
            self._refex_count[pluralize(ans).lower()] += 1
            self._refex_lookup[aa].add(ans.lower())
            self._refex_lookup[aa].add(pluralize(ans).lower())

            # THE answer
            self._refex_count["the %s" % ans.lower()] += 1
            self._refex_lookup[aa].add("the %s" % ans.lower())
Example #5
0
    def pluralize(self, word):
        '''
        Given base-form of the word, return back plural form of the word
        (For Noun only)
        Args:
            word (str): base-form of the word

        Raises:
            ValueError: The vocabulary does not contain the base-form
            ValueError: Can not find the base-form of the given word

        Returns:
            str: plural form of the word
        '''
        if word in self._word2index:
            return pluralize(word)
        else:
            try:
                base_form_word = lemma(word)
                if base_form_word in self._word2index:
                    return pluralize(base_form_word)
                else:
                    raise ValueError(
                        "Found the base-form for '{}': '{}'. But even the base-form not in vocabulary"
                        .format(word, base_form_word))
            except:
                raise ValueError(
                    "Can not found base-form for '{}'".format(word))
Example #6
0
def make_thesaurus(file_path):
    """
    Returns dict of counters 'thesaurus', where
    thesaurus[word] = { synonym1: 4, syn2: 8, syn3: 1, ... }
    """
    thesaurus = defaultdict(lambda: Counter())

    with open(file_path, "r") as f:
        for line in f:

            # Ignore repeated book title headers
            if _is_title(line):
                continue

            parsed = parse(line)

            for tagged_word in parsed.split()[0]:
                word = tagged_word[0].strip().lower()
                pos = tagged_word[1][0]  # get pos for word

                # Reject non-ASCII characters
                try:
                    word = word.decode("ascii")
                except (UnicodeDecodeError, UnicodeEncodeError):
                    continue

                # Reject whitespace character
                if re.match("^[\s]*$", word):
                    continue

                # Increment word count of word w
                thesaurus[word].update([word])

                # Retrieve syn = synonym[w], add to thesaurus[syn]
                for syn in wn.get_synonyms(word):
                    syn = syn.name().split(".")[0]

                    # if noun, add plural form if word is plural, else add singular
                    if pos == "N":
                        if word == pluralize(word):
                            thesaurus[pluralize(syn)].update([word])
                        else:
                            thesaurus[syn].update([word])
                    # if verb, conjugate synonyms to the right form before adding them to thes
                    elif pos == "V":
                        word_tenses = tenses(word)
                        if word_tenses:
                            thesaurus[conjugate(syn, tense=word_tenses[0][0])].update([word])
                        else:
                            thesaurus[syn].update([word])
                    else:
                        thesaurus[syn].update([word])

    # Update thesaurus with mappings, if map_file exists
    file_path = file_path.replace(config.CORPUS_FOLDER, config.MAPPING_FOLDER)
    map_file = file_path.replace(config.CORP_TAG, config.MAP_TAG)
    thesaurus = _add_mappings(map_file, thesaurus)

    return thesaurus
Example #7
0
def find_noun_form(original_form, original_lemma, new_lemma):
    """
    Figure out whether original form was singular or plural.
    If plural, then return new_lemma in plural form
    """
    if pluralize(original_lemma) == original_form:
        return pluralize(new_lemma)
    else:
        return new_lemma
Example #8
0
def p3():
    p = "{pronoun} seated in {noun}, surrounded by {nouns1} and {nouns2}.".format(
        pronoun = capitalize(PRONOUN) + " " + VERB,
        noun = referenced(random.choice(ns)),
        nouns1 = pluralize(random.choice(ns)),
        nouns2 = pluralize(random.choice(ns))
    )

    return capitalize(p)
def fix_rejects(rejects, ingredient_book):
    fixed = []
    for r in rejects:
        r_split = r.split()
        # r_split = [w for w in r_split if not re.search(fraction_match, w)] # take out nums
        # print r_split
        # get bbc ingredients
        # all_i = open("new_ing.txt", "r")
        # all_i = all_i.read()

        all_ingredients = ingredient_book.keys()

        # lst = all_i.split(',')
        # all_ingredients = []
        # for i in lst:
        #     i = i.replace('u\'', '').replace('\'', '').strip()
        #     all_ingredients.append(i)

        t = []
        for true_ing in all_ingredients:  # real ings from bbc
            ing_regex_single = r'\b{0}\b'.format(true_ing)
            ing_regex_plural = r'\b{0}\b'.format(pluralize(true_ing))
            match_single = re.search(ing_regex_single, true_ing.lower())
            match_plural = re.search(ing_regex_plural, true_ing.lower())
            if match_single:
                t.append(true_ing.lower())
            elif match_plural:
                t.append(pluralize(true_ing.lower()))

        full_name = []
        for s in r_split:  # word in rejected ingredient
            match_flag = False
            for i in t:  # i in ing_book_keys
                s_regex_single = r'\b{0}\b'.format(s)
                s_regex_plural = r'\b{0}\b'.format(pluralize(s))
                match_single = re.search(s_regex_single, i.lower())
                match_plural = re.search(s_regex_plural, i.lower())
                if match_single:
                    match_flag = True
                    full_name.append(i)
                    # fixed.append(i)
                elif match_plural:
                    match_flag = True
                    full_name.append(i)
                    # fixed.append(i)
                else:
                    if s not in full_name:
                        full_name.append(s)

            if match_flag:
                break
        f = ' '.join(full_name)
        full_name_result = ' '.join(unique_list(f.split()))
        fixed.append(full_name_result)
    return fixed
Example #10
0
def make_thesaurus_lesk(file_path):
    """
    Returns dict of counters 'thesaurus', where
    thesaurus[synset] = { word1: 4, word2: 8, word3: 1, ... }
    """
    thesaurus = defaultdict(lambda: Counter())

    with open(file_path, "r") as f:

        f = f.read().split()
        for i, word_and_tag in enumerate(f):

            word, tag = word_and_tag.rsplit("_", 1)

            # Reject non-ASCII characters
            try:
                word = word.decode("ascii")
            except (UnicodeDecodeError, UnicodeEncodeError):
                continue

            # look at a window of 9 words each time lesk is called
            window = [i - WINDOW, i + WINDOW]
            if i < WINDOW:
                window = [i, i + 2 * WINDOW]
            elif i >= len(f) - WINDOW:
                window = [i - 2 * WINDOW, i]

            synset = lesk.my_lesk(f[window[0] : window[1]], word)

            # if lesk can decide on a meaning for that word, add
            # that meaning, i.e., that synset, to thesaurus
            if not synset:
                continue

            # if word is verb, only add present tense to thesaurus
            if tag[0] == "V":
                word_tenses = tenses(word.lower())
                if "inf" in word_tenses or "1sg" in word_tenses or "2sg" in word_tenses or "3sg" in word_tenses:
                    thesaurus[str(synset)].update([word.lower()])
            elif tag[0] == "N":
                synset_name = synset.name().split(".")[0]
                if synset_name == pluralize(synset_name):
                    thesaurus[str(synset)].update([pluralize(word.lower())])
                else:
                    thesaurus[str(synset)].update([singularize(word.lower())])
            else:
                thesaurus[str(synset)].update([word.lower()])
    # Update thesaurus with mappings, if map_file exists
    file_path = file_path.replace(config.CORPUS_FOLDER, config.MAPPING_FOLDER)
    map_file = file_path.replace(config.CORP_TAG, config.MAP_TAG)

    thesaurus = _add_mappings(map_file, thesaurus)
    return thesaurus
def pluralizationError(text, nlp, correctFlag=False):
    '''
    Purpose: To check for pluralization error. 
             Additionally, it returns corrected sentence.
             
    Parameters: text: string
                    A string of text-single or a paragraph.
                    
                correctFlag:boolean 
                   True or False
                    
    Returns: count: integer  
             text: Corrected sentence. (If correctFlag is True)
    '''

    doc = nlp(text)
    count = 0
    text = ""
    for s in doc.sentences:
        for i in range(len(s.words)):
            if (i != len(s.words) - 1) and (s.words[i].xpos == "NN"
                                            or s.words[i].xpos == "NNP"):
                if s.words[i + 1].xpos in ["VB", "VBP"]:
                    count += 1
                    text += pluralize(s.words[i].text) + " "
                else:
                    text += s.words[i].text + " "
            elif (i != len(s.words) - 1) and (s.words[i].xpos == "NNS"
                                              or s.words[i].xpos == "NNPS"):
                if s.words[i + 1].xpos == "VBZ":
                    text += singularize(s.words[i].text) + " "
                else:
                    text += s.words[i].text + " "
            elif (i != len(s.words) - 1) and s.words[i].xpos == "CD":
                if s.words[i].text == "1" or s.words[i].text == "one":
                    if s.words[i +
                               1].xpos == "NNS" or s.words[i +
                                                           1].xpos == "NNPS":
                        count += 1
                        s.words[i + 1].text = singularize(s.words[i + 1].text)
                        text += s.words[i].text + " "
                else:
                    if s.words[i + 1].xpos == "NN" or s.words[i +
                                                              1].xpos == "NNP":
                        count += 1
                        s.words[i + 1].text = pluralize(s.words[i + 1].text)
                        text += s.words[i].text + " "
            else:
                text += s.words[i].text + " "
    if correctFlag == True:
        return count, text
    else:
        return count
def get_all_names(ings, ingredient_book):
    #scrape
    # recipe = scrapeRecipe(url)
    # ings = recipe[0]
    ings = map(lambda x: x.lower(), ings)

    # get bbc ingredients
    # all_i = open("new_ing.txt", "r")
    # all_i = all_i.read()

    all_ingredients = ingredient_book.keys()

    # lst = all_i.split(',')
    # all_ingredients = []
    # for i in lst:
    #     i = i.replace('u\'', '').replace('\'', '').strip()
    #     all_ingredients.append(i)

    #get names
    t = []
    rejected = []
    for true_ing in all_ingredients:  # real ings from bbc
        ing_regex_single = r'\b{0}\b'.format(true_ing)
        ing_regex_plural = r'\b{0}\b'.format(pluralize(true_ing))
        match_single = re.search(ing_regex_single, true_ing.lower())
        match_plural = re.search(ing_regex_plural, true_ing.lower())
        if match_single:
            t.append(true_ing.lower())
        elif match_plural:
            t.append(pluralize(true_ing.lower()))

    seen = set()
    seen_add = seen.add
    t = [x for x in t if not (x in seen or seen_add(x))]

    t.sort(key=lambda x: len(x.split()), reverse=True)

    names = []
    desc_and_prep = []

    for i in ings:
        for db_ingredient in t:
            if db_ingredient in i.lower():
                names.append(db_ingredient)
                # db_ingredient is now the name of the Ingredient
                desc_and_prep.append(i.split(db_ingredient))
                break
        else:
            rejected.append(i)

    return [names, desc_and_prep,
            rejected]  #[ ['white sugar'], [[u'1 cup', u'']]]
Example #13
0
def p10():
    p = "beneath {pronoun2} feet, the {adjective1} {noun1} {verbed} with the {adjective2} {quality} of {nouns1} and {nouns2}.".format(
        pronoun2 = random.choice([POS_PRONOUN, NAME + "\'s"]),
        adjective1 = random.choice(adjs),
        noun1 = random.choice(ns),
        verbed = random.choice(verbed),
        adjective2 = random.choice(adjs),
        quality = random.choice(qualities),
        nouns1 = pluralize(random.choice(ns)),
        nouns2 = pluralize(random.choice(ns))
    )

    return capitalize(p)
Example #14
0
def p15():
    p = "the {noun1} consisted of {noun2}-like {adjective} {nouns1}, interupted at {number} points by {nouns2}.".format(
        noun1 = random.choice(ns),
        noun2 = random.choice(ns),
        adjective = random.choice(adjs),
        nouns1 = pluralize(random.choice(ns)),
        number = str(random.randint(3, 9)),
        nouns2 = pluralize(random.choice(ns))
    )
    if random.random() < 0.25:
        p = look_around_you(p)

    return capitalize(p)
Example #15
0
def p16():
    p = "the {noun1} was {adjective1} {noun2}, divided into {adjective2} {nouns1} by {quant} of {adjective3} {color} {nouns2}.".format(
        noun1 = random.choice(ns),
        adjective1 = referenced(random.choice(adjs)),
        noun2 = random.choice(ns),
        adjective2 = random.choice(adjs),
        nouns1 = pluralize(random.choice(ns)),
        quant = referenced(random.choice(quants)),
        adjective3 = random.choice(ns),
        color = random.choice(colors),
        nouns2 = pluralize(random.choice(ns))
    )

    return capitalize(p)
Example #16
0
def generate_phrase_2():
    '''Return a phrase and its entropy (in bits) of the form
       (# adj noun) (adverb verb) (adjective noun punctuation)

    E.g.,
       17 MODERATE TRAYS At once live outed wORTH bOSSES
    '''
    selections = [ADJECTIVES, NOUNS,
                  ADVERBS, TRANSITIVE_VERBS,
                  ADJECTIVES, NOUNS, TERMINAL_PUNCTUATION]
    entropy = sum([log(len(item), 2) for item in selections])
    conjugations = [None, None,
                    None, [random_item_from_list([PAST, PRESENT]), 3, PLURAL],
                    None, None,
                    None]
    sub_list = [random_item_from_list(item) for item in selections]
    for idx, word in enumerate(sub_list):
        if conjugations[idx]:
            sub_list[idx] = conjugate(word, *conjugations[idx])
    entropy += 1

    sub_list[1] = pluralize(sub_list[1])
    sub_list[5] = pluralize(sub_list[5])

    entropy += log(997, 2)

    for idx, item in enumerate(sub_list):
        rnd = randint(4)
        if rnd == 1:
            sub_list[idx] = item.capitalize()
        if rnd == 2:
            sub_list[idx] = item.upper()
        if rnd == 3:
            sub_list[idx] = item[0] + item[1:].upper()

        entropy += 2

    phrase = ('%i %s %s %s %s %s %s%s' % tuple([randint(997) + 2] + sub_list)).replace('_', ' ')

    # Insert a random symbol into the sentence
    insert_point = randint(len(phrase) + 1)
    entropy += log(len(phrase) + 1, 2) + log(len(SYMBOLS), 2)
    phrase = phrase[:insert_point] + random_item_from_list(SYMBOLS) + phrase[insert_point:]

    insert_point = randint(len(phrase) + 1)
    entropy += log(len(phrase) + 1, 2) + log(len(SYMBOLS), 2)
    phrase = phrase[:insert_point] + random_item_from_list(SYMBOLS) + phrase[insert_point:]

    return phrase, entropy
Example #17
0
def p8():
    p = "the walls of the {noun1} were metre-high {quants} of {adjective1} {noun2}, like the {adjective2} {nouns} of a {noun3}.".format(
        noun1 = random.choice(ns),
        quants = pluralize(random.choice(quants)),
        adjective1 = random.choice(adjs),
        noun2 = random.choice(ns),
        adjective2 = random.choice(adjs),
        nouns = pluralize(random.choice(ns)),
        noun3 = random.choice(ns)
    )

    if random.random() < 0.25:
        p = random.choice([look_around_you(p), get_lost(p)])

    return capitalize(p)
Example #18
0
def tryPOS(word, p, target):
    if target in p and target not in ['RB', 'DT', 'RP']:
        if target == 'PRP' or target == 'WP':
            d = WPD
            if target == 'PRP':
                d = PRPD
            for k in d:
                if d[k] == word:
                    return k
            return None
        return wn.morphy(word)

    #else
    if target == 'PRP$' and p == 'PRP':
        return PRPD.get(word)
    if target == 'WP$':
        return WPD.get(word)
    if p == 'NN':
        if target == 'NNP':
            return word
        else:
            return pluralize(word)
    if p == 'NNP':
        return pluralize(word)
    if 'VB' in p:
        t = ''
        if target == 'VBD':
            t = PAST
        if target == 'VBP':
            t = INFINITIVE
        if target == 'VBZ':
            t = PRESENT
        if target == 'VBN':
            t = PAST + PARTICIPLE
        if target == 'VBG':
            t = PARTICIPLE
        if t:
            return conjugate(word, tense=t)

    ret = ''
    if target == 'JJR' or target == 'RBR':
        ret = comparative(word)
    if target == 'JJS' or target == 'RBS':
        ret = superlative(word)
    if not ret or ' ' in ret:
        return None  #default
    else:
        return ret
Example #19
0
def pos_all(word):
    rlist = []
    _rtense = ('infinitive', 'present', 'past', 'future')
    _rperson = (1, 2, 3)
    _rnumber = ('singular', 'plural')
    _rmood = ('indicative', 'imperitive', 'conditional', 'subjuntive')
    _raspect = ('imperfective', 'perfective', 'progressive')
    for rtense in _rtense:
        for rperson in _rperson:
            for rnumber in _rnumber:
                for rmood in _rmood:
                    for raspect in _raspect:
                        item = conjugate(word,
                                         tense=rtense,
                                         person=rperson,
                                         number=rnumber,
                                         mood=rmood,
                                         aspect=raspect,
                                         negated=False)
                        if item not in rlist:
                            rlist.append(item)

    print bcolors.Magenta + "All pos of " + word
    print_list(rlist, 4)
    print "Singluar    : " + singularize(
        word) + "			Plural      : " + pluralize(word)
    print "Comparative : " + comparative(
        word) + " 			Superlative : " + superlative(word)
Example #20
0
    def do_flower(self, i, j):
        """Process finding a flower and possibly doing something with it"""

        # Get a random color and flower name
        color = random.choice(self.JSON['colors'])['color']
        flower = singularize(random.choice(self.JSON['flowers']))

        # Print them
        self.TEMP += "There was a beautiful " + color + " " + flower + " there. "
        self.TEMP += "It smelled like " + pluralize(
            random.choice(self.JSON['fruits'])) + "."

        # Put a square on the map to mark the flower
        self.IMAGE.filledRectangle((i * 15 + 4, j * 15 + 4),
                                   (i * 15 + 11, j * 15 + 10),
                                   self.COLORS['purple'])

        # Is the narrator keeping this flower?
        if random.randrange(100) < 10:
            self.TEMP += " I picked it"

            if self.FLOWERS:
                self.TEMP += " and added it to the rest of my bouquet"

            self.TEMP += "."

            self.FLOWERS.append({'color': color, 'flower': flower})

        # Does the narrator eat this flower instead?
        elif random.randrange(100) < 5:
            self.TEMP += " For some reason I ate it. It tasted " + random.choice(
                self.TASTES) + "."

        self.TEMP += "\n"
        self.THEN = False
def conceptnet_relatedness(subject, candidates, object):
    base_score = call_cp_api(subject, object)

    pred_subject = subject

    # print(base_score)
    # Is there any other label in the ranking making more sense?

    for o_class, confidence in candidates.items():

        f_class, _ = formatlabel(o_class)

        if f_class == subject:
            continue  # Skip the object itself

        score = call_cp_api(f_class, object)

        if score > base_score:
            base_score = score
            pred_subject = o_class

    print("CONCEPTNET: Within the ranking, the most likely subject is %s" %
          pred_subject)
    if singularize(pred_subject) == pred_subject:
        # Re-format back for evaluation
        pred_subject = pluralize(pred_subject)

    pred_subject = reverse_map(pred_subject)

    return pred_subject.replace('_', '-'), base_score
Example #22
0
def get_plural_box(cat, nodes, max_boxes=False, new_label=None):
	"""Get plural box for given category/label.
	max_boxes: used for limiting number of nodes used per category."""

	# Get min and max values for nodes

	# If max_boxes is set, only take limited number of boxes for merging
	if max_boxes:
		ymax = max([node.ymax for node in nodes if int(node.ID) <= max_boxes])
		ymin = min([node.ymin for node in nodes if int(node.ID) <= max_boxes])
		xmax = max([node.xmax for node in nodes if int(node.ID) <= max_boxes])
		xmin = min([node.xmin for node in nodes if int(node.ID) <= max_boxes])
	else:
		ymax = max([node.ymax for node in nodes])
		ymin = min([node.ymin for node in nodes])
		xmax = max([node.xmax for node in nodes])
		xmin = min([node.xmin for node in nodes])

	# Get new (merged) coordinates
	coodindates = [xmin, ymin, xmax, ymax]

	# Get plural label (pluralize -- not used here!)
	plural_label = pat.pluralize(cat)

	# Define the bounding box entry with new label
	if new_label:
		bbox_line = new_label+'-pl ; ('+", ".join([str(c) for c in coodindates])+')'
	else:
		# Create bounding box line
		bbox_line = cat+'-pl ; ('+", ".join([str(c) for c in coodindates])+')'

	return bbox_line
Example #23
0
File: faq.py Project: VRDate/twss
def process(statement,database_name = DATABASE_NAME):
  ''' Allows us to create entities via statements like "There is a course CSCI4702 called Mobile Programming" 
      and modify entities with statements like "CSCI4702 has a start date of Jan 31st 2013"
      
      already encountering a statement like "There is a game engine Unity3d" gives us trouble
      seems like we need named entity recognition to be able to extract types like that ... or perhaps rely on capitalization
      which doesn't really work for things like CTO as a category of items, hmm
      
      >>> sent = "There is a game engine Unreal Engine".split()
      >>> print nltk.ne_chunk(nltk.pos_tag(sent))
      '''
  # this runs real fast, but it doesn't quite get the NN/NNP combination I hoped for from "There is a game engine Unity3D"
  # although it does now with light=True setting, but now it doesn't get the NNP in "There is a game engine Source"

  s = parse(statement, relations=True, lemmata=True, light=True) 
  s = split(s)

  #result = search('There be DT NN+ (DT) (RB) (JJ) NNP+ (call) (DT) (RB) (JJ) (NNPS|NNP)+', s)
  s, result = extract(statement)
  if result:
    #try:
      noun = search('(NN)+', s)[0].string
      table = pluralize(noun.replace(' ','_'))
      result = search('(JJ|NNPS|NNP)+', s) # this pulls in adjectives, but there's supposed to be a better fix coming
      ident = result[0].string
      name = result[1].string if len(result) > 1 else ident
      #raise Exception(table+"; "+ident+"; "+name)
      return newTable(table,ident,name,database_name)
    #except:
      #return regexMatch(statement,database_name)
  else:
    return regexMatch(statement,database_name)
Example #24
0
   def do_flower(self, i, j):
      """Process finding a flower and possibly doing something with it"""

      # Get a random color and flower name
      color  = random.choice(self.JSON['colors'])['color']
      flower = singularize(random.choice(self.JSON['flowers']))

      # Print them
      self.TEMP += "There was a beautiful " + color + " " + flower + " there. "
      self.TEMP += "It smelled like " + pluralize(random.choice(self.JSON['fruits'])) + "."

      # Put a square on the map to mark the flower
      self.IMAGE.filledRectangle((i * 15 + 4, j * 15 + 4), (i * 15 + 11, j * 15 + 10), self.COLORS['purple'])

      # Is the narrator keeping this flower?
      if random.randrange(100) < 10:
         self.TEMP += " I picked it"

         if self.FLOWERS:
            self.TEMP += " and added it to the rest of my bouquet"
 
         self.TEMP += "."

         self.FLOWERS.append({'color': color, 'flower': flower})

      # Does the narrator eat this flower instead?
      elif random.randrange(100) < 5:
         self.TEMP += " For some reason I ate it. It tasted " + random.choice(self.TASTES) + "."

      self.TEMP += "\n"
      self.THEN = False
Example #25
0
def process_agent_output(answer_template, noun, nouns, noun_topics,
                         answer_sentiment):
    agent_output = answer_template.answer
    temp_nouns = nouns
    #print(agent_output, nouns, noun_topics, (nouns))
    if answer_template.fetch_count > 0 and noun_topics != None and len(
            noun_topics) > 0:
        #print(noun_topics)
        if question_sentiment in sentiment_opt_pos:
            temp_nouns = topic_favorites[noun_topics[0]]
            #like_memory.loc[like_memory['sentiment'] > 0.5 && like_memory['topic'] == noun_topics[0]].sample().subject
        elif question_sentiment in sentiment_opt_neg:
            temp_nouns = topic_dislike[noun_topics[0]]
        sing_noun = singularize(noun)
        plural_noun = pluralize(noun)
        if sing_noun in temp_nouns: temp_nouns.remove(sing_noun)
        elif plural_noun in temp_nouns: temp_nouns.remove(plural_noun)

    #replace nouns
    for i in range(1, answer_template.fetch_count + 1):
        temp = "noun_" + str(i)

        agent_output = agent_output.replace(wildcards[temp], temp_nouns[i - 1])

    if answer_template.use_noun:
        agent_output = agent_output.replace(wildcards["noun"], noun)
    if answer_template.use_sentiment:
        agent_output = agent_output.replace(wildcards["sentiment"],
                                            question_sentiment)
    agent_output = agent_output.replace(wildcards["agent_sentiment"],
                                        answer_sentiment)
    #print(agent_output)
    return agent_output
Example #26
0
def p13():
    p = "the sound that the {nouns1} made was {adjective1} and {adjective2}; {adjective3} {adjective4} {nouns2} so {adjective5} that they were almost {verbed1} rather than {verbed2}.".format(
        nouns1 = pluralize(random.choice(ns)),
        adjective1 = random.choice(adjs),
        adjective2 = random.choice(adjs),
        adjective3 = random.choice(adjs),
        adjective4 = random.choice(adjs),
        nouns2 = pluralize(random.choice(ns)),
        adjective5 = random.choice(adjs),
        verbed1 = random.choice(verbed),
        verbed2 = random.choice(verbed)
    )
    if random.random() < 0.25:
        p = look_around_you(p)

    return capitalize(p)
def conjugate_noun(noun, pos):
    if pos == "NNS" or pos == "NNPS":
        return str(ptn.pluralize(noun))
    elif pos == "NN" or pos == "NNP":
        return str(ptn.singularize(noun))
    else:
        return noun
def conjugate_noun(noun, pos):
    if pos=="NNS" or pos =="NNPS":
        return str(pluralize(noun))
    elif pos=="NN" or pos =="NNP":
        return str(singularize(noun))
    else:
        return noun
Example #29
0
def synonyms(data):
    augment_n = 10
    data_dict = dict((key,[val]) for val,key,_ in data)

    is_plural = lambda word: singularize(word) <> word
    stops = set(stopwords.words('english') + ['l'])

    for disease in data:
        for _ in range(augment_n):
            new_facts_list = []
            for fact in disease[0]:
                new_fact = fact[:]
                for k,word in enumerate(fact):
                    if word not in stops:
                        syn = wordnet.synsets(word)
                        if syn:
                            random_syn = syn[0]              
                            random_lemma = random.choice(random_syn.lemma_names())
                            random_lemma = pluralize(random_lemma) if is_plural(word)\
                                                else random_lemma
                            random_lemma = random_lemma.lower()
                            random_lemma = random_lemma.replace('_',' ')
                            random_lemma = random_lemma.replace('-',' ')
                            if ' ' in random_lemma:
                                continue
                            new_fact[k] = random_lemma
                new_facts_list.append(new_fact)
            #print new_facts_list
            data_dict[disease[1]].append(new_facts_list[:])
    return data_dict
Example #30
0
def getInflections(key):

    inflections = set()
    # print('"%s"' % key)

    if key.isalpha():

        try:
            try:
                lexeme(key)
            except:
                pass

            inflections.add(lexeme(key))  # get all lexem inflections of words
            inflections.add(pluralize(key))  # add plural inflections

            inflections.intersection_update(wordlist)

            print(inflections)

        except:
            pass
            # print("Unexpected error")

    return inflections
def getSynonyms(word, part):
    synonyms = []
    wordToTry = lemma(word) if part[0] == 'V' else word
    synList = dictionary.synonym(wordToTry)
    if synList is None:
        return [word]
    for syn in synList:
        if " " not in syn:
            if part == "VB" or part == "VBP":
                synonyms.append(lemma(syn))
            elif part == "VBD" and len(lexeme(syn)) > 3:
                synonyms.append(lexeme(syn)[3])
            elif part == "VBG" and len(lexeme(syn)) > 0:
                synonyms.append(lexeme(syn)[0])
            elif part == "VBN" and len(lexeme(syn)) > 3:
                synonyms.append(lexeme(syn)[-1])
            elif part == "VBZ" and len(lexeme(syn)) > 1:
                synonyms.append(lexeme(syn)[1])
            elif part == "NN" and syn[-2:] != "ss":
                synonyms.append(singularize(syn))
            elif part == "NNS":
                synonyms.append(pluralize(syn))
            else:
                synonyms.append(syn)
    return list(set(synonyms))
def change_pluralization(token):
    singularForm = singularize(token)
    pluralForm = pluralize(token)
    if token == singularForm:
        return pluralForm
    else:
        return singularForm
Example #33
0
def process(statement, database_name=DATABASE_NAME):
    ''' Allows us to create entities via statements like "There is a course CSCI4702 called Mobile Programming" 
      and modify entities with statements like "CSCI4702 has a start date of Jan 31st 2013"
      
      already encountering a statement like "There is a game engine Unity3d" gives us trouble
      seems like we need named entity recognition to be able to extract types like that ... or perhaps rely on capitalization
      which doesn't really work for things like CTO as a category of items, hmm
      
      >>> sent = "There is a game engine Unreal Engine".split()
      >>> print nltk.ne_chunk(nltk.pos_tag(sent))
      '''
    # this runs real fast, but it doesn't quite get the NN/NNP combination I hoped for from "There is a game engine Unity3D"
    # although it does now with light=True setting, but now it doesn't get the NNP in "There is a game engine Source"

    s = parse(statement, relations=True, lemmata=True, light=True)
    s = split(s)

    #result = search('There be DT NN+ (DT) (RB) (JJ) NNP+ (call) (DT) (RB) (JJ) (NNPS|NNP)+', s)
    s, result = extract(statement)
    if result:
        #try:
        noun = search('(NN)+', s)[0].string
        table = pluralize(noun.replace(' ', '_'))
        result = search(
            '(JJ|NNPS|NNP)+', s
        )  # this pulls in adjectives, but there's supposed to be a better fix coming
        ident = result[0].string
        name = result[1].string if len(result) > 1 else ident
        #raise Exception(table+"; "+ident+"; "+name)
        return newTable(table, ident, name, database_name)
    #except:
    #return regexMatch(statement,database_name)
    else:
        return regexMatch(statement, database_name)
Example #34
0
def inject_sub_nn(sent_i, e_config):
    target_indices = []
    for i, w_i in enumerate(sent_i):
        if w_i['tag'] in ('NN', 'NNS'):
            target_indices.append(i)
    if target_indices:
        target_index = target_indices[random.randint(0,
                                                     len(target_indices) - 1)]
        target_token = sent_i[target_index]['form']
        target_tag = sent_i[target_index]['tag']

        new_token = ""
        new_tag = ""
        if target_tag == "NN":
            new_token = pluralize(target_token)
            new_tag = "NNS"
        elif target_tag == "NNS":
            new_token = singularize(target_token)
            new_tag = "NN"
        else:
            raise
        sent_i[target_index]['form'] = str(new_token)
        sent_i[target_index]['tag'] = new_tag
        sent_i[target_index]['ctag'] = new_tag
    else:
        pass
    return sent_i
Example #35
0
    def _transform_word(self, word, pos, less, more):
        """transforms a word to be less less and more more

        :param word: word to transform
        :type word: str

        :param pos: part of speech of the word
        :type pos: str

        :param less: list of 'less' words
        :type less: list

        :param more: list of 'more' words
        :type more: list

        :returns: transformed word
        :rtype: str
        """

        new_word = self._get_similar_word(word, less, more)
        new_pos = en.tag(new_word)[0][1]

        if (pos[:2] != new_pos[:2]) or word == new_word:
            return word

        # handle noun
        if pos.startswith('NN'):

            # pluralization
            if pos.endswith('S') and not new_pos.endswith('S'):
                new_word = en.pluralize(new_word)

            elif not pos.endswith('S') and new_pos.endswith('S'):
                new_word = en.singularize(new_word)

            # capitalization
            if word[0].isupper():
                new_word = new_word[0].upper() + new_word[1:]
            else:
                new_word = new_word.lower()

        # handle verb
        elif pos.startswith('VB'):

            tense, person, number = en.tenses(word)[0][:3]

            # conjugation
            conjugated = en.conjugate(new_word,
                                    tense=tense,
                                    person=person,
                                    number=number,
                                    parse=False)

            if conjugated is not None:
                new_word = conjugated

        # remove underscores for joint words
        new_word = new_word.replace('_', ' ')

        return new_word
Example #36
0
File: faq.py Project: VRDate/twss
def regexMatch(statement,database_name = DATABASE_NAME):
  match = re.search(r'There is an? ([\w]+) ([\s\w]+) called ([\s\w]+)\.?',statement)
  if match:
    table = pluralize(match.group(1))
    ident = match.group(2)
    name = match.group(3)
    return newTable(table,ident,name,database_name)
  return processNewAspect(statement,database_name)
Example #37
0
 def inject(self, title, word_pair):
     for i, cat in title.get_slots('NP'):
         if cat == 'plural':
             title.inject(pluralize(word_pair[0]).capitalize(), 'NP')
         else:
             title.inject(singularize(word_pair[0]).capitalize(), 'NP')
     for i, cat in title.get_slots('ADJ'):
         title.inject(word_pair[1].capitalize(), 'ADJ')
Example #38
0
def is_plural(word):
    if word.lower() in PRONOUN_PLURAL:
        return True

    if word.lower() in PRONOUN_SINGULAR:
        return False

    return True if pluralize(lemma(word.lower())) == word.lower() else False
Example #39
0
    def getTags(self):
        """
        Extract possible tags from the text using RAKE
        :return: Tag set
        """
        meaningset = []
        if len(self.sentences) == 1:
            s = re.sub('[' + string.punctuation + ']', '', self.sentences[0])
            self.r.extract_keywords_from_text(s)
            rp = self.r.get_ranked_phrases()
            self.phraseScore.append(self.r.get_ranked_phrases_with_scores())
            final_nouns = []
            for n in rp:
                tokens = nltk.tokenize.word_tokenize(n)
                if len(tokens) == 1:
                    item, tag = nltk.pos_tag(tokens)[0]
                    if 'NN' in tag:
                        if len(item) > 1:
                            if singularize(
                                    item) not in final_nouns and pluralize(
                                        item) not in final_nouns:
                                final_nouns.append(item)
                else:
                    final_nouns.append(n)
            return final_nouns

        for s in self.sentences:
            s = re.sub('[' + string.punctuation + ']', '', s)
            self.r.extract_keywords_from_text(s)
            rp = self.r.get_ranked_phrases()
            self.phraseScore.append(self.r.get_ranked_phrases_with_scores())
            final_nouns = []
            for n in rp:
                tokens = nltk.tokenize.word_tokenize(n)
                if len(tokens) == 1:
                    item, tag = nltk.pos_tag(tokens)[0]
                    if 'NN' in tag:
                        if len(item) > 1:
                            if singularize(
                                    item) not in final_nouns and pluralize(
                                        item) not in final_nouns:
                                final_nouns.append(item)
                else:
                    final_nouns.append(n)
            meaningset.append(final_nouns)
        return meaningset
def getPluralSingular(w):
    word = w
    plural = isplural(word)
    if plural:
        word  = singularize(word)
    else:
        word  = pluralize(word)
    return word
Example #41
0
def pluralize_singularize(word,prev_word):
    if "thing" in word:
        print word,prev_word
    if "these" in prev_word:
        return pluralize(word)
    elif "this" in prev_word:
        return singularize(word)
    else:
        return word
Example #42
0
def pluralize_singularize(word,prev_word):
    if "thing" in word:
        print word,prev_word
    if "these" in prev_word:
        return pluralize(word)
    elif "this" in prev_word:
        return singularize(word)
    else:
        return word
Example #43
0
def pl(word, num=2):
    ''' Pluralize word based on count '''
    if num != 1:
        if word not in PLURAL_EXCEPTIONS:
            word = pluralize(word, pos='NOUN')
        else:
            word = PLURAL_EXCEPTIONS[word]

    return word
Example #44
0
def shapeNoun(noun,posTag):
	"""
	Reshapes the base noun according to it's pos tag
	Assuming noun is in singular form
	"""
	if posTag == 'NNS' or posTag == 'NNPS':
		return pt.pluralize(noun)
	else:
		return noun
Example #45
0
def key_set(full_word):
    words = []
    # hack for class etc
    if singularize(full_word) == full_word or full_word.endswith('ss'):
        plural = pluralize(full_word)
        words = [full_word, plural]
    else:
        words = [singularize(full_word), full_word, pluralize(singularize(full_word))]

    for w in words[:]:
        # if not already plural like
        if not w.endswith('s'):
            suffix = 's'
            if any([w.endswith(suf) for suf in ['x', 'z', 'ch', 'sh']]):
                suffix = 'es'
            words.append('%s%s' % (w, suffix))
    tup = tuple(sorted(list(set(words))))
    return tup
Example #46
0
def pl(word, num=2):
    ''' Pluralize word based on count '''
    if num != 1:
        if word not in PLURAL_EXCEPTIONS:
            word = pluralize(word, pos='NOUN')
        else:
            word = PLURAL_EXCEPTIONS[word]

    return word
Example #47
0
def word_denormalize(word, part_of_speech, word_form):
    original_word_view = ""
    # decision tree for part of speech

    # Nouns. From singular to plural
    # maybe in that part I need use *args to send any arguments
    if part_of_speech in Nouns_tags and word_form == '?':  # I don't understand what I need to send in word_form
        original_word_view = pluralize(word, pos=NOUN, classical=True)
        return original_word_view
Example #48
0
def shapeNoun(noun, posTag):
    """
	Reshapes the base noun according to it's pos tag
	Assuming noun is in singular form
	"""
    if posTag == 'NNS' or posTag == 'NNPS':
        return pt.pluralize(noun)
    else:
        return noun
Example #49
0
def xproto_pluralize(field):
    try:
        # The user has set a plural, as an exception that cannot be handled automatically
        plural = field['options']['plural']
        plural = unquote(plural)
    except KeyError:
        plural = en.pluralize(field['name'])

    return plural
Example #50
0
def p7():
    p = "{adjective1} {nouns1} were detaching from the {adjective2} {nouns2} and {nouns3}, {verbing} in {adjective3} {quants}.".format(
        adjective1 = random.choice(adjs),
        nouns1 = pluralize(random.choice(ns)),
        adjective2 = random.choice(adjs),
        nouns2 = pluralize(random.choice(ns)),
        nouns3 = pluralize(random.choice(ns)),
        verbing = random.choice(verbing),
        adjective3 = random.choice(adjs),
        quants = pluralize(random.choice(quants))
    )

    if random.random() < 0.25:
        p = meanwhiler(p)
    elif random.random() < 0.5:
        p = suddenly(p)

    return capitalize(p)
Example #51
0
 def inject(self, title, word_pair):
     for (i, slot), word in zip(title.slots, word_pair):
         word = word.replace("_", " ").title()
         if slot == 'NOUN':
             title.inject(singularize(word), slot, i)
         elif slot == 'NOUNS':
             title.inject(pluralize(singularize(word)), slot, i)
         else:
             title.inject(word, slot, i)
Example #52
0
def regexMatch(statement, database_name=DATABASE_NAME):
    match = re.search(r'There is an? ([\w]+) ([\s\w]+) called ([\s\w]+)\.?',
                      statement)
    if match:
        table = pluralize(match.group(1))
        ident = match.group(2)
        name = match.group(3)
        return newTable(table, ident, name, database_name)
    return processNewAspect(statement, database_name)
Example #53
0
File: base.py Project: vpramo/xos-1
def xproto_pluralize(field):
    try:
        # The user has set a plural, as an exception that cannot be handled automatically
        plural = field['options']['plural']
        plural = unquote(plural)
    except KeyError:
        plural = en.pluralize(field['name'])

    return plural
Example #54
0
def build_from_clause(tables):
   """Assembles a FROM clause from a list of tables"""

   from_clause = ''
   for s in tables:
      if from_clause:
         from_clause = from_clause + ', '

      from_clause = from_clause + format(pluralize(s.hypernym[0]))

   return from_clause
Example #55
0
def p11():
    p = "the {noun1} was studded with an enormous number of {nouns}, flooding the {noun2} with {adjective} {noun3}.".format(
        noun1 = random.choice(ns),
        nouns = pluralize(random.choice(ns)),
        noun2 = random.choice(ns),
        adjective = random.choice(adjs),
        noun3 = random.choice(ns)
    )
    if random.random() < 0.25:
        p = look_around_you(p)

    return capitalize(p)
Example #56
0
def p12():
    p = "{noun1} stood in the middle of the {noun2}, surrounded by {noun3} of {adjective} {nouns}.".format(
        noun1 = referenced(random.choice(ns)),
        noun2 = random.choice(ns),
        verbed = random.choice(verbed),
        noun3 = referenced(random.choice(ns)),
        adjective = random.choice(adjs),
        nouns = pluralize(random.choice(ns))
    )
    if random.random() < 0.25:
        p = look_around_you(p)

    return capitalize(p)
Example #57
0
def inflate(s):
    
    """ Returns an exaggerated string:
        inflate("I'm eating a burger") => "I'm eating hundreds of burgers".
    """
    
    # Part-of-speech tagging identifies word types in a text.
    # For example, "can" can be a noun (NN) or a verb (VB),
    # depending on the words surrounding it.
    # http://www.clips.ua.ac.be/pages/pattern-en#noc_parser
    
    # A parse tree splits punctuation marks from words, tags words,
    # and constructs a nested tree of sentences that contain words.
    # http://www.clips.ua.ac.be/pages/pattern-en#tree
    t = parsetree(s)
    
    # We can use pattern.search to search for patterns inside a parse tree.
    # If you know what regular expressions are: this is similar,
    # only you can also search by part-of-speech tag.
    # This is very useful to retrieve syntactic structures, e.g.:
    # "any noun, optionally preceded by an adjective", or
    # "any conjugation of the verb to be".
    # http://www.clips.ua.ac.be/pages/pattern-search
    
    # The search pattern below means:
    # "any determiner (a, an, the), optionally followed by any adjective,
    #  followed by one or more nouns".
    # The search will yield a list of matches.
    # We'll pluralize the nouns in each match, so that "burger" becomes "burgers", etc.
    # Note the curly braces {}.
    # We can retrieve the words inside it with match.group().
    for match in search("{DT} {JJ?} {NN+}", t):
        x = choice(["dozens of ", "hundreds of ", "thousands of "])
        
        # We'll only look at matches that start with "a" or "an".
        # This indicates an object or a thing of which many can exist.
        # If the match starts with "the", it might indicate something unique,
        # like "the capital of Nairobi". It doesn't make sense to transform
        # it into "hundreds of capitals of Nairobi".
        if match.group(1).string.lower() not in ("a", "an"):
            continue
        
        # Include the adjective, if any.
        if match.group(2):
            x += match.group(2).string
            x += " "
            
        # Pluralize the group of nouns.
        x += pluralize(match.group(3).string)
        s = s.replace(match.group(0).string, x)
    return s