Example #1
0
    def create_claims(self):
        independent = '{0}. {1} for {2}, comprising:'
        dependent = '{0}. {1} of claim {2}, wherein said {3} comprises {4}.'

        self.claims = []
        claim_number = 0
        for prefix in self.prefixes:
            claim_number += 1
            claim = independent.format(
                claim_number, referenced(prefix).capitalize(), self.partial_title)
            terms = random.sample(list(self.artifacts),
                                  random.randint(2, min(len(self.artifacts), 5)))
            for term in terms[:-1]:
                claim += '\n\t' + referenced(term) + '; '
            claim += 'and \n\t' + referenced(terms[-1]) + '.'
            self.claims.append(claim)
            independent_claim_number = claim_number

            for term in terms:
                claim_number += 1
                claim = dependent.format(
                    claim_number,
                    referenced(prefix, article=DEFINITE).capitalize(),
                    independent_claim_number, term,
                    random.choice(self.unformatted_illustrations))
                self.claims.append(claim)
Example #2
0
def processArt(t):
    # lis=nltk.pos_tag(nltk.word_tokenize(s))
    # lis=x.split(" ")
    l = []
    cnt = 0
    for ele in t:
        cnt = cnt + 1
        # y=ele[1]
        # word=ele[0]
        # if ele in Articles:
        # 	if superlative(wnl.lemmatize(t[cnt]))==t[cnt]:
        # 		l=l+["the"]
        if ele == "a":
            if referenced(t[cnt]) == "a":
                l = l + [joinlist(["a", "the"])]
            else:
                l = l + [joinlist(["an", "the"])]
        elif ele == "an":
            if referenced(t[cnt]) == "an":
                l = l + [joinlist(["an", "the"])]
            else:
                l = l + [joinlist(["a", "the"])]
        else:
            l = l + [ele]
    return " ".join(l)
    def create_claims(self):
        independent = '{0}. {1} for {2}, comprising:'
        dependent = '{0}. {1} of claim {2}, wherein said {3} comprises {4}.'

        self.claims = []
        claim_number = 0
        for prefix in self.prefixes:
            claim_number += 1
            claim = independent.format(claim_number,
                                       referenced(prefix).capitalize(),
                                       self.partial_title)
            terms = random.sample(
                list(self.artifacts),
                random.randint(2, min(len(self.artifacts), 5)))
            for term in terms[:-1]:
                claim += '\n\t' + referenced(term) + '; '
            claim += 'and \n\t' + referenced(terms[-1]) + '.'
            self.claims.append(claim)
            independent_claim_number = claim_number

            for term in terms:
                claim_number += 1
                claim = dependent.format(
                    claim_number,
                    referenced(prefix, article=DEFINITE).capitalize(),
                    independent_claim_number, term,
                    random.choice(self.unformatted_illustrations))
                self.claims.append(claim)
Example #4
0
def testBasic():
    from pattern.en import referenced
    print referenced('hour')
    
    from pattern.en import conjugate, lemma, lexeme
    print lexeme('purr')
    print lemma('purring')
    print conjugate('purred', '3sg') # he / she / it
Example #5
0
def testBasic():
    from pattern.en import referenced
    print referenced('hour')

    from pattern.en import conjugate, lemma, lexeme
    print lexeme('purr')
    print lemma('purring')
    print conjugate('purred', '3sg')  # he / she / it
Example #6
0
        def _on_match_rewrite(matcher, doc, i, matches, pattern):
            match_id, start, end = matches[i]
            # get meta information for this on-match function
            rule_pattern = PatternMeta(
                before=self.normalize_pattern(pattern.before),
                after=self.normalize_pattern(pattern.after)
            )
            prev_text, after_text = doc[:start].text.strip(), doc[end:].text.strip()
            # keep all the orth in the pattern.
            between_tokens = [get_str_from_pattern(r) for r in rule_pattern.after]

            # fill in all the other None
            for idx, _ in enumerate(between_tokens):
                # if between_tokens[idx]:  # already filled in
                #    continue
                # check what OP it is.
                for op in self.ops:
                    # idx is the idx in the toIdxes
                    # make sure this is in the right count of words. 
                    # Should be because we are using one word a time
                    if op.toIdxes[0] <= idx and op.toIdxes[1] > idx and \
                        op.fromIdxes[1] - op.fromIdxes[0] == op.toIdxes[1] - op.toIdxes[0]:
                        # before, it is from the start to the fromIdx 0
                        # offset of the idx w.r.t op.toIdxes[0], then add the offset to the fromIdx
                        before_idx = idx - op.toIdxes[0] + op.fromIdxes[0]
                        before_doc_idx = before_idx + start
                        # then the after idx.
                        after_idx = idx #+ op.toIdxes[0]
                        
                        if before_idx < 0 or before_idx >= op.fromIdxes[1] or \
                            after_idx < 0 or after_idx >= op.toIdxes[1] or \
                            before_doc_idx < 0 or before_doc_idx >= len(doc):
                            continue
                        between_tokens[idx] = match_super(doc[before_doc_idx].text, 
                            change_matched_token_form(  # form change
                                a_token=doc[before_doc_idx],
                                a_pattern=rule_pattern.before[before_idx],
                                b_pattern=rule_pattern.after[after_idx]))
                        break
            # fill in all the other None
            for idx, _ in enumerate(between_tokens):
                if between_tokens[idx]:  # already filled in
                    continue
                # add appropriate DET
                if 'POS' in rule_pattern.after[idx] and rule_pattern.after[idx]['POS'] == 'DET':
                    if idx < len(between_tokens) - 1 and between_tokens[idx+1]:
                        between_tokens[idx] = referenced(between_tokens[idx+1]).split()[0]
                    elif (start + idx - 1 < len(doc)) and \
                         (start + idx - 1 >= 0):
                        between_tokens[idx] = referenced(doc[start + idx - 1].text).split()[0]  # noqa: E501
            if not None in between_tokens:
                generated_text = ' '.join([prev_text] + between_tokens + [after_text]).strip()
                return (match_id, generated_text)
            return None 
Example #7
0
def p12():
    p = "{noun1} stood in the middle of the {noun2}, surrounded by {noun3} of {adjective} {nouns}.".format(
        noun1 = referenced(random.choice(ns)),
        noun2 = random.choice(ns),
        verbed = random.choice(verbed),
        noun3 = referenced(random.choice(ns)),
        adjective = random.choice(adjs),
        nouns = pluralize(random.choice(ns))
    )
    if random.random() < 0.25:
        p = look_around_you(p)

    return capitalize(p)
Example #8
0
def p16():
    p = "the {noun1} was {adjective1} {noun2}, divided into {adjective2} {nouns1} by {quant} of {adjective3} {color} {nouns2}.".format(
        noun1 = random.choice(ns),
        adjective1 = referenced(random.choice(adjs)),
        noun2 = random.choice(ns),
        adjective2 = random.choice(adjs),
        nouns1 = pluralize(random.choice(ns)),
        quant = referenced(random.choice(quants)),
        adjective3 = random.choice(ns),
        color = random.choice(colors),
        nouns2 = pluralize(random.choice(ns))
    )

    return capitalize(p)
Example #9
0
def p5():
    p = "the {noun1} was surrounded by {quant} of {noun2} {nouns}, {adjective} as {noun3}.".format(
        noun1 = random.choice(ns),
        quant = referenced(random.choice(quants)),
        noun2 = random.choice(ns),
        nouns = pluralize(random.choice(ns)),
        adjective = random.choice(adjs),
        noun3 = referenced(random.choice(ns))
    )

    if random.random() > 0.6:
        p = random.choice([look_around_you(p), get_lost(p)])
    elif random.random() > 0.8:
        p = suddenly(p)

    return capitalize(p)
Example #10
0
def indef(word):
    if word[0] in SYMB_FOR_INDEF_AN:
        return 'an {0}'.format(word)
    else:
        return referenced(
            word) if word not in REFERENCED_EXCEPTIONS else '{0} {1}'.format(
                REFERENCED_EXCEPTIONS[word], word)
 def prefix(self):
     prefixes = ["system", "method", "apparatus", "device"]
     self.prefixes = random.sample(prefixes, 2)
     title = self.prefixes[0] + " and " + self.prefixes[1] + " for "
     if random.random() < .2:
         title = "web-based " + title
     return referenced(title)
Example #12
0
        def make_noun_string(np, plural=False):
            # random chance of removing modifier
            #if random.random() < 0.5:
            #    np[0] == ''

            # common mass nouns

            if np[1] in ['data', 'information', 'children', 'people', 'stuff', 'equipment']:
                return ' '.join(np).strip()

            elif any(np[1].lower().startswith(x) for x in ('every', 'any', 'some')) or np[1] in ('nothing', 'nobody'):
                return np[1]

            quantifiers = ['many', 'few', 'several', 'various', 'multiple', 'fewer', 'more']
            if np[0] in quantifiers:
                return np[0] + ' ' + pluralize(np[1])

            else:
                die_roll = random.random()
                if die_roll < 0.15 or plural:
                    return ' '.join((np[0], pluralize(np[1]))).strip()
                elif die_roll < 0.25:
                    return random.choice(('his', 'her', 'their', 'your')) + ' ' + ' '.join(np).strip()
                elif random.random() < 0.45:
                    return referenced(' '.join(np).strip())
                else:
                    return 'the ' + ' '.join(np).strip()
Example #13
0
 def prefix(self):
     prefixes = ["system", "method", "apparatus", "device"]
     self.prefixes = random.sample(prefixes, 2)
     title = self.prefixes[0] + " and " + self.prefixes[1] + " for "
     if random.random() < .2:
         title = "web-based " + title
     return referenced(title)
Example #14
0
def get_random_word(t, ref=False):
   """Return a random word from a set filtering on lexname category if necessary"""

   # If there are entries in the lexnames list for the given POS tag, limit results to that,
   # otherwise just return a random word for that POS
   word = None
   if len(POS[t]['lexnames']):

      lexname = ''
      while lexname not in POS[t]['lexnames']:
         word = random.choice(POS[t]['words'])[0]

         synset = wordnet.synsets(get_singular(word), pos=t)

         if synset:
            lexname = synset[0].lexname

   else:
      word = random.choice(POS[t]['words'])[0]

   # If required, prefix with an article 
   if ref:
      word = referenced(word)

   return word.lower()
Example #15
0
def ct(word, num, use_a=False):
    ''' Return a string counting + pluralizing, if necessary, the word  '''
    # Switch the numeral 1 to 'a' if the use_a flag is set
    if num == 1 and use_a:
        # Auto-choose "a" or "an" based on word
        return referenced(word)

    return '{0} {1}'.format(num, pl(word, num))
Example #16
0
def opening():
    p = "it was {adjective} and {adjectively} {noun}.".format(
        adjective=referenced(random.choice(adjs)), 
        adjectively=random.choice(adjeys), 
        noun=random.choice(ns)
        )

    return capitalize(p)
def describe(synset):
    names = synset.lemma_names
    name = referenced(names[0])
    definition = synset.definition.strip()
    string = "{0} is {1}".format(name, definition)
    if len(names) > 1:
        string += ', also known as {0}'.format(enum_or(names[1:]))
    return sentence(string)
Example #18
0
def ct(word, num, use_a=False):
    ''' Return a string counting + pluralizing, if necessary, the word  '''
    # Switch the numeral 1 to 'a' if the use_a flag is set
    if num == 1 and use_a:
        # Auto-choose "a" or "an" based on word
        return referenced(word)

    return '{0} {1}'.format(num, pl(word, num))
def specify(synset, hyponyms):
    name = referenced(synset.lemma_names[0])
    string = name
    hyp_names = [s.lemma_names[0] for s in hyponyms]
    if len(hyp_names) == 1:
        string += ' can, more specifically, be a {0}'.format(hyp_names[0])
    else:
        string += ' is either {0}. '.format(enum_or(hyp_names))
    return sentence(string)
Example #20
0
def p3():
    p = "{pronoun} seated in {noun}, surrounded by {nouns1} and {nouns2}.".format(
        pronoun = capitalize(PRONOUN) + " " + VERB,
        noun = referenced(random.choice(ns)),
        nouns1 = pluralize(random.choice(ns)),
        nouns2 = pluralize(random.choice(ns))
    )

    return capitalize(p)
Example #21
0
def add_sentence(noun, adjective, nutsness=10):
    '''
    Create a new sentence. Nutsness will define the chance on generating
    strange additions with pattern's drivel(). This is awesome.

    Input: String noun, String adjective, integer nutsness
    Output: String sentence
    '''
    nuts = 10 - nutsness
    n = noun.split()

    if randint(0, nuts) != 0:
        # return a ridiculous sentence
        sentence = '. It is {}'.format(referenced(adjective + drivel(n[-1])))
    else:
        # return a boring sentence
        sentence = '. This is a {} place'.format(referenced(adjective))

    return sentence
Example #22
0
 def create_abstract(self):
     artifacts = search.hypernym_combo(self.source_text, 'artifact', "JJ NN|NNS")
     #artifacts +=search.hypernym_combo(self.source_text, 'material', "JJ NN|NNS")
     artifacts = set(artifacts)
     self.artifacts = artifacts
     words = []
     words = [referenced(w) for w in artifacts]
     self.abstract = self.title + ". "
     self.abstract += "The devices comprises "
     self.abstract += ", ".join(words) 
Example #23
0
 def test_indefinite_article(self):
     # Assert "a" or "an".
     for article, word in (("an", "hour"), ("an", "FBI"), ("a", "bear"),
                           ("a", "one-liner"), ("a", "European"),
                           ("a", "university"), ("a", "uterus"),
                           ("an", "owl"), ("an", "yclept"), ("a", "year")):
         self.assertEqual(en.inflect.indefinite_article(word), article)
     self.assertEqual(en.article("heir", function=en.INDEFINITE), "an")
     self.assertEqual(en.referenced("ewe"), "a ewe")
     print "pattern.en.article()"
 def create_abstract(self):
     artifacts = search.hypernym_combo(self.source_text, 'artifact',
                                       "JJ NN|NNS")
     #artifacts +=search.hypernym_combo(self.source_text, 'material', "JJ NN|NNS")
     artifacts = set(artifacts)
     self.artifacts = artifacts
     words = []
     words = [referenced(w) for w in artifacts]
     self.abstract = self.title + ". "
     self.abstract += "The devices comprises "
     self.abstract += ", ".join(words)
 def clean_text(self, words):
     new_words = words.split(' ')
     doc = self.nlp(words)
     first_word_POS = doc[0].pos_
     if first_word_POS == 'VERB':
         new_words[0] = conjugate(new_words[0], tense=PARTICIPLE)
     if first_word_POS == 'NOUN' or first_word_POS == 'ADJ':
         if new_words[0] != 'a' or new_words[0] != 'an':
             new_words[0] = referenced(new_words[0])
     elif first_word_POS == 'NUM' and len(new_words) > 1:
         new_words[1] = pluralize(new_words[1])
     return ' '.join(new_words)
Example #26
0
def processArt(t):
    # lis=nltk.pos_tag(nltk.word_tokenize(s))
    # lis=x.split(" ")
    l = []
    cnt = 0
    for ele in t:
        cnt = cnt + 1
        # y=ele[1]
        # word=ele[0]
        if ele == "a":
            if referenced(t[count] == "a"):
                l = l + [joinlist(["a", "the"])]
            else:
                l = l + [joinlist(["an", "the"])]
        elif ele == "an":
            if referenced(t[count] == "an"):
                l = l + [joinlist(["an", "the"])]
            else:
                l = l + [joinlist(["a", "the"])]
        else:
            l = l + [ele]
    return " ".join(l)
Example #27
0
    def formats(self, phrase):
        doc = self.nlp(phrase)
        first_word_POS = doc[0].pos_

        tokens = phrase.split(' ')
        new_tokens = tokens.copy()

        new_phrases = []
        # original
        new_phrases.append(' '.join(new_tokens))

        # with indefinite article
        if first_word_POS == 'NOUN' or first_word_POS == 'ADJ':
            new_tokens[0] = referenced(tokens[0])
            new_phrases.append(' '.join(new_tokens))
        # with definite article
        if first_word_POS == 'NOUN' or first_word_POS == 'ADJ':
            new_tokens[0] = "the "+tokens[0]
            new_phrases.append(' '.join(new_tokens))
        # as gerund
        if first_word_POS == 'VERB':
            new_tokens[0] = conjugate(tokens[0], tense=PARTICIPLE)
            new_phrases.append(' '.join(new_tokens))
            if len(tokens) > 1:
                if tokens[1] == 'to' and len(tokens) > 2:
                    new_tokens[2] = referenced(tokens[2])
                else:
                    new_tokens[1] = referenced(tokens[1])
            new_phrases.append(' '.join(new_tokens))
            new_tokens[0] = tokens[0]
            new_phrases.append(' '.join(new_tokens))

        # account for numbers
        if first_word_POS == 'NUM' and len(tokens) > 1:
            new_tokens[1] = pluralize(tokens[1])
            new_phrases.append(' '.join(new_tokens))
        return new_phrases
Example #28
0
def p6():
    p = "the {adjective1} sound of {noun1} pushed itself into the room, disturbing the silence like {adjective2} {noun2}.".format(
        adjective1 = random.choice(adjs),
        noun1 = random.choice(ns),
        adjective2 = referenced(random.choice(adjs)),
        noun2 = random.choice(ns)
    )

    if random.random() < 0.25:
        p = meanwhiler(p)
    elif random.random() < 0.4:
        p = random.choice([look_around_you(p), get_lost(p)])
    elif random.random() < 0.6:
        p = suddenly(p)

    return capitalize(p)
Example #29
0
def p4():
    p = "the {noun1} was {verbing} {closer}, like {quant} of {adjective} {noun2}.".format(
        noun1 = random.choice(ns),
        verbing = random.choice(["creeping", "crawling", "sneaking", "slithering"]),
        closer = random.choice(["closer", "nearer"]),
        quant = referenced(random.choice(quants)),
        adjective = random.choice(adjs),
        noun2 = random.choice(ns)
    )

    if random.random() < 0.25:
        p = meanwhiler(p)
    elif random.random() < 0.4:
        p = random.choice([look_around_you(p), get_lost(p)])

    return capitalize(p)
Example #30
0
   def get_flowers(self):
      """Returns a textual representation of carried flowers"""

      temp = ""
      if self.FLOWERS:
         temp += "I left with a beautiful bouquet of flowers that contained: "

         for f in self.FLOWERS:
            if f == self.FLOWERS[-1] and len(self.FLOWERS) > 1:
               temp += ", and "

            elif f != self.FLOWERS[0]:
               temp += ", "

            temp += referenced(f['color'] + " " + f['flower'])

      return temp + ".\n"
Example #31
0
    def get_flowers(self):
        """Returns a textual representation of carried flowers"""

        temp = ""
        if self.FLOWERS:
            temp += "I left with a beautiful bouquet of flowers that contained: "

            for f in self.FLOWERS:
                if f == self.FLOWERS[-1] and len(self.FLOWERS) > 1:
                    temp += ", and "

                elif f != self.FLOWERS[0]:
                    temp += ", "

                temp += referenced(f['color'] + " " + f['flower'])

        return temp + ".\n"
Example #32
0
 def test_indefinite_article(self):
     # Assert "a" or "an".
     for article, word in (
      ("an", "hour"),
      ("an", "FBI"),
       ("a", "bear"),
       ("a", "one-liner"),
       ("a", "European"),
       ("a", "university"),
       ("a", "uterus"),
      ("an", "owl"),
      ("an", "yclept"),
       ("a", "year")):
         self.assertEqual(en.inflect.indefinite_article(word), article)
     self.assertEqual(en.article("heir", function=en.INDEFINITE), "an")
     self.assertEqual(en.referenced("ewe"), "a ewe")
     print "pattern.en.article()"
Example #33
0
def optionByproducts(opt):
    """Given a concept node that represents a verb phrase, figure out
	 what new items will be established in the scene after carrying out
	 that phrase. Print each item."""
    dirObj = findDirectObject(opt['label'])
    new_things = [dirObj] if dirObj else []

    effects = query(opt['term'], 'Causes')
    created = query(opt['term'], 'CreatedBy', reverse=True)
    if effects and conceptOptions(effects['term']):
        new_things.append(effects['term'])
    if created and conceptOptions(created['term']):
        new_things.append(created['term'])

    new_things = [attemptSingularization(t) for t in new_things]
    for t in new_things:
        print("There is now {} in the scene.".format(
            en.referenced(termToReadable(t))))
    return new_things
Example #34
0
    def generate_phrase(self):
        adj = choice([a for a in self.blackboard.pool.comparisons if len(self.blackboard.pool.comparisons[a]) > 0])
        parser = ChartParser(self.grammar)
        gr = parser.grammar()
        phrase = self.produce(gr, gr.start())
        noun = choice(list(self.blackboard.pool.comparisons[adj]))
        noun.name = en.singularize(noun.name)
        article = en.referenced(noun.name).split(" ")[0]
        replace_words = {'adj': adj, 'n': noun, 'det': article}

        for pos in replace_words:
            while pos in phrase:
                try:
                    phrase = self.replace_pos(
                        pos, replace_words[pos], phrase)
                except:
                    return
        for w in phrase:
            if not isinstance(w, Word):
                phrase[phrase.index(w)] = Word(w)
        return phrase
Example #35
0
   def do_animal(self, i, j):
      """Process finding an animal"""

      # Get a random animal and give it a name
      animal = random.choice(self.JSON['animals'])
      name = random.choice(self.JSON['names'])

      # Print that info
      self.TEMP += "There was " + referenced(animal) + " there. "
      self.TEMP += "I named it " + name + "."

      # Put a square on the map to denote finding an animal here
      self.IMAGE.filledRectangle((i * 15 + 4, j * 15 + 4), (i * 15 + 11, j * 15 + 10), self.COLORS['red'])

      # Did the animal follow the narrator?
      if random.randrange(100) < 10:
         self.ANIMALS.append({'name': name, 'animal': animal})
         self.get_animal_concepts(animal)
         self.TEMP += " It started following me."

      self.TEMP += "\n"
      self.THEN = False
Example #36
0
        def make_noun_string(np, plural=False):
            # random chance of removing modifier
            #if random.random() < 0.5:
            #    np[0] == ''

            # common mass nouns

            if np[1] in [
                    'data', 'information', 'children', 'people', 'stuff',
                    'equipment'
            ]:
                return ' '.join(np).strip()

            elif any(np[1].lower().startswith(x)
                     for x in ('every', 'any', 'some')) or np[1] in ('nothing',
                                                                     'nobody'):
                return np[1]

            quantifiers = [
                'many', 'few', 'several', 'various', 'multiple', 'fewer',
                'more'
            ]
            if np[0] in quantifiers:
                return np[0] + ' ' + pluralize(np[1])

            else:
                die_roll = random.random()
                if die_roll < 0.15 or plural:
                    return ' '.join((np[0], pluralize(np[1]))).strip()
                elif die_roll < 0.25:
                    return random.choice(
                        ('his', 'her', 'their',
                         'your')) + ' ' + ' '.join(np).strip()
                elif random.random() < 0.45:
                    return referenced(' '.join(np).strip())
                else:
                    return 'the ' + ' '.join(np).strip()
Example #37
0
    def do_animal(self, i, j):
        """Process finding an animal"""

        # Get a random animal and give it a name
        animal = random.choice(self.JSON['animals'])
        name = random.choice(self.JSON['names'])

        # Print that info
        self.TEMP += "There was " + referenced(animal) + " there. "
        self.TEMP += "I named it " + name + "."

        # Put a square on the map to denote finding an animal here
        self.IMAGE.filledRectangle((i * 15 + 4, j * 15 + 4),
                                   (i * 15 + 11, j * 15 + 10),
                                   self.COLORS['red'])

        # Did the animal follow the narrator?
        if random.randrange(100) < 10:
            self.ANIMALS.append({'name': name, 'animal': animal})
            self.get_animal_concepts(animal)
            self.TEMP += " It started following me."

        self.TEMP += "\n"
        self.THEN = False
Example #38
0
def indef(word):
    if word[0] in SYMB_FOR_INDEF_AN:
        return 'an {0}'.format(word)
    else:
        return referenced(word) if word not in REFERENCED_EXCEPTIONS else '{0} {1}'.format(REFERENCED_EXCEPTIONS[word], word)
def extractFeaturesAndWriteBio(READ_PATH,file_type):
    
    

    global ALL_poems,bio,cnt

    for subdir, dirs, files in os.walk(READ_PATH):
        for file in files:
            
            num_of_files = len(files)-1 # deduct the DS_store
            #print (num_of_files,'readDirectory',READ_PATH)
            
            if file_type in file  and 'readme' not in file:

                # ID
                id=file.split(".")[0]
                #print "\nID:",id.split("_")[1]

                filenames.append(id)
                cnt+=1

                # print('')
                # print('')
                # print('OPENED:',id)
                # print('')
                # print('')

                poem_replaced = ""
                replacement_word = ""
                previous_replacement_word = ""
                
                author=""
                titles=""
                title=""
                new_title=""

                replaced_ls =[]
                new_titles_ls = []
                quit_language=0
                oscillator=0

                word_cnt=0

                # if EXCEPTION is raised... do not add to html
                SKIP_bool=False

                ##########################
                # Load  POEM TEXT FILE     #
                ##########################

                ##
                # PAUSE
                ##
                #time.sleep(5)

                txt_fn_path = DATA_DIR + READ_TXT_PATH + id.split("_")[1]+".txt"
                #print "txt_fn_path:",txt_fn_path

                if os.path.isfile(txt_fn_path) and cnt>0:
                    txt_data=open(txt_fn_path).read()

                    # http://blog.webforefront.com/archives/2011/02/python_ascii_co.html
                    # txt_data.decode('ISO-8859-2') .decode('utf-8')
                    # unicode(txt_data)

                    author=txt_data.split("****!****")[0].strip(' \t\n\r')
                    
                    title=txt_data.split("****!****")[1].strip(' \t\n\r')
                    
                    bio=txt_data.split("****!****")[2]#.strip(' \t\n\r')

                    ######  CLEAN BIO
                    bio.replace("\t","&#9;")
                    bio.replace("\n"," <br>")
                    bio.replace("\r"," <br>")
                    poem_replaced=bio
                    #print poem_replaced

                    ###############################
                    # REPLACE AUTHOR NAME in poem
                    ##############################
                    author_ln=author.split(" ")[-1].lstrip()
                    author_fn=author.split(" ")[:-1]
                    author = " ".join(n for n in author_fn)+author_ln
                    #
                    #poem_replaced = poem_replaced.replace(author_ln,"Jhave")

                    #######################
                    # replace BOOK TITLES
                    #######################
                    #print "TITLES"]
                    new_title = getNewTitle("title").encode('utf-8')

                    #######################
                    # fake AUTHOR
                    #######################
                    
                    new_author= " ".join(random.choice(authors).split(" ")[1:-2])+" "+random.choice(authors).split(" ")[-2]
                    #print "new AUTHOR",new_author                           

                    ############################
                    # replace years with another
                    ############################
                    for w1 in poem_replaced.split("("):
                        for w2 in w1.split(")"):
                            if w2 is not None and w2.isdigit():
                                new_num = random.randint(int(w2)-5,int(w2)+5)
                                #print "REPLACING #:",w2,new_num
                                poem_replaced = poem_replaced.replace(w2,str(new_num))
                                replaced_ls.append(new_num)                            
                                               

                    #################
                    # Load JSON     #
                    #################
                    response = loadJSONfile(READ_JSON_PATH+"poetryFoundation_"+id.split("_")[1]+"_Alchemy_JSON.txt")

                    if response != "failed":

                        if response.get('entities') is not None:
                            for idx,entity in enumerate(response['entities']):

                                #print idx
                                ce = entity['text'].replace("0xc2"," ")
                                ce = ce.replace("0xe2","'")
                                ce = re.sub('(' + '|'.join(import_utilities.chars.keys()) + ')', import_utilities.replace_chars, ce)
                                ce = ce.encode('utf-8')

                                try:
                                    content = ce.decode('utf-8').encode('ascii', 'xmlcharrefreplace')
                                except UnicodeDecodeError:
                                    "AAAARGGGGHHH!!!!"

                                if content in poem_replaced:
                                                       
                                    ################################################
                                    # Replace similar entities from other JSON     #
                                    ################################################
                                    replacement_entity = findSimilarEntityinRandomJSON(content,entity['type'])

                                    cr = re.sub('(' + '|'.join(import_utilities.chars.keys()) + ')', import_utilities.replace_chars, replacement_entity)

                                    poem_replaced = poem_replaced.replace(content,replacement_entity)
                                    replaced_ls.append(replacement_entity)
                    

                    ##########################
                    #   POS REPLACMENT       #
                    ##########################

                    token_tuples = nltk.word_tokenize(poem_replaced)
                    tt = nltk.pos_tag(token_tuples)

                    #################
                    #  ADJECTIVES   #
                    #################
                    for i in tt:
                        if "/i" not in i[0] and len(i[0])>3 and i[0] != "died":
                            origw =  re.sub('(' + '|'.join(import_utilities.chars.keys()) + ')', import_utilities.replace_chars, i[0])
                            origw =import_utilities.strip_punctuation(origw) 
                            if i[1]=='JJ' :
                                JJr = random.choice(JJ)
                                # # JJr =  re.sub('(' + '|'.join(import_utilities.chars.keys()) + ')', import_utilities.replace_chars, JJr)
                                # JJr = import_utilities.strip_punctuation(JJr)
                                JJr = import_utilities.moveBeginAndEndPunctuationFromStrToString(i[0],JJr.lstrip().lstrip())
                                
                                if i[0].istitle():
                                    JJr = JJr.title()

                                poem_replaced = re.sub(r'\b' + import_utilities.strip_punctuation(i[0]) + r'\b', JJr, poem_replaced,1)#poem_replaced.replace(i[0],JJr,1)
                                replaced_ls.append(JJr)
                            if i[1]=='RB':
                                RBr = random.choice(RB)
                                RBr = import_utilities.moveBeginAndEndPunctuationFromStrToString(i[0],RBr.lstrip().lstrip())

                                if i[0].istitle():
                                    RBr = RBr.title()
                                poem_replaced = re.sub(r'\b' + import_utilities.strip_punctuation(i[0])  + r'\b', RBr, poem_replaced,1)
                                replaced_ls.append(RBr)


                    ########################
                    # IS IT ENGLISH?       #
                    ########################
                    for line  in poem_replaced.split('\n\r'):
                        if len(line)>0 :
                            if "english" not in import_utilities.get_language(line):
                                quit_language+=1
                                #print "NOT english:",quit_language,line
                            else:
                                quit_language-=1

                    
                    #########################
                    #   SYNSET REPLACE      #
                    #########################
                    for idx,word in enumerate(poem_replaced.split(' ')):




                        if "<br>" not in word and "&#9;" not in word and len(word)>0:




                            #########################
                            #   PRONOUN ' VERB      #
                            #########################
                            if len(word.split("'"))>1:
                                if word.split("'")[0] in personal_pronouns:
                                    replacement_word = random.choice(personal_pronouns)+"'"+word.split("'")[1]+' '
                                poem_replaced.replace(word,replacement_word)             
                                #print "word,",word,"replacement_word:",replacement_word
                           
                            ####################################################
                            # Replacement of OTHERs                            #
                            ####################################################

                            elif not word.lower().strip(" \n\t\r") in stopwords.words('english'):

                                # take off leading brackets, commas etc...
                                word_punct_nopunct = import_utilities.strip_punctuation_bool(word)
                                word_nopunct = word_punct_nopunct['word'].strip(" \n\t\r")
                                word_punct = word_punct_nopunct['punct']
                                punct_bool = word_punct_nopunct['punct_bool']

                             

                                #######################################################
                                # MAIN EXCHANGE PROCESS CALL >>>>>>>   GET THE SYNSET #
                                #######################################################    
                                if word_nopunct[-4:].lower()=="here":
                                    similarterm=random.choice(import_utilities.heres)
                                else:
                                    #print "WORD:",word_nopunct
                                    if len(word_nopunct)>3:

                                        oscillator  = oscillator+1
                                        
                                        ############################################
                                        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                                        # STYLE SWITCH..... should in future use POS
                                        # ... i.e. if noun & oscillator%3, do...
                                        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                                        ############################################
                                        # synset
                                        similarterm = import_utilities.synset_creeley(word_nopunct)
                                        #print "synset", similarterm

                                        if similarterm is not None and similarterm == word_nopunct and len(word_nopunct)>5:
                                            RESERVOIR.sort(key=len)
                                            similarterm= RESERVOIR[idx%len(RESERVOIR)]
                                            #print idx,len(RESERVOIR),similarterm,word_nopunct,"PRE>>>>>>>>LAST CHANGE STOP: ", word, "~",similarterm

                                            
                                                          
                                ############################################
                                # manually get rid of some terrible choices
                                ############################################
                                naw_terms=["mind","lonely"]
                                if similarterm == "ilk":
                                    ##print "like"
                                    similarterm = "like"
                                if similarterm == "Nox":
                                    ##print "like"
                                    similarterm = "nite"
                                if similarterm == "ope":
                                    ##print "doth"
                                    similarterm = "does"
                                if similarterm == "information technology":
                                    ##print "doth"
                                    similarterm = "it"
                                if similarterm == "velleity":
                                    ##print "doth"
                                    similarterm = "want"
                                if similarterm == "Crataegus laevigata":
                                    ##print "doth"
                                    similarterm = "may"
                                if similarterm == "eff":
                                    ##print "doth"
                                    similarterm = "know"
                                if similarterm == "naw":
                                    ##print "doth"
                                    similarterm = "mind"
                                #######################################                      
                                # abbreviations for f*****g states!   #
                                #######################################
                                if word_nopunct.upper() in import_utilities.state_abbrev and word_nopunct.lower() not in stopwords.words('english') and "me," not in word:
                                    tmp = similarterm
                                    if word_nopunct == "oh": 
                                        similarterm = random.choice(import_utilities.exclaims)
                                    else:

                                        similarterm = random.choice(RESERVOIR)
                                    #print word_nopunct," replaced by", tmp, "replaced with:",similarterm, "in:",line

                                ##############
                                # hyphenated #
                                ##############
                                hyp =word.split("-")
                                #print word,len(hyp)
                                if len(hyp) >1:
                                    similarterm=""
                                    for w in hyp:
                                        if len(w) > 2:
                                            if import_utilities.synset_creeley(w) is not None:
                                                similarterm +=  import_utilities.synset_creeley(w)+"-"
                                            else:
                                                similarterm += w+"-"
                                    similarterm = import_utilities.strip_underscore(similarterm[:-1])
                                    #print "hyphenated:",word,"replaced by: "+similarterm
                                        


                                
                                # #########################################################    
                                # # is it a TRUNCATED VERB slang as in singin or wishin   #
                                # #########################################################
                                # if similarterm == word_nopunct and len(word)>2 and 'in' in word_nopunct[-2:]:
                                #     similarterm = import_utilities.synset_creeley(word_nopunct+'g')
                                #     ## #print "TRUNCATED SLANG word: '"+word+"'",similarterm
                                #     interim = import_utilities.lemma(similarterm)
                                #     ## #print interim
                                #     similarterm = import_utilities.conjugate(interim, tense=import_utilities.PARTICIPLE, parse=True)[:-1] 
                                #     # # # #print word,"widx:",widx," line_pos_tags[widx][0]:",line_pos_tags[widx][0]," line_pos_tags[widx][1]:",line_pos_tags[widx][1]
                                   

                                #################      
                                # SWEAR WORD    #
                                #################
                                ##print "at the garden of if:", word
                                if word_nopunct in import_utilities.curses:
                                    similarterm = random.choice(import_utilities.curses)
                                    ##print "SWEAR WORD word: '"+word+"'",similarterm


                                #print "SIMILAR:",similarterm

                                if similarterm is not None:
                                    if len(hyp) >1:
                                        replacement_word = similarterm
                                    else:
                                        replacement_word = word.replace(word_nopunct, similarterm)
                                        replacement_word = import_utilities.strip_underscore(replacement_word)
                                        replacement_word = import_utilities.replaceNumbers(replacement_word)

                                #########################
                                # RESERVOIR_OF_WEIRDNESS  #
                                #########################  

                                if word_nopunct.lower() in import_utilities.impera:
                                    replacement_word=random.choice(import_utilities.impera)
                                    #print word,"IMPERA:",replacement_word
                                elif word_nopunct.lower() in import_utilities.conjuncts:
                                    replacement_word=random.choice(import_utilities.conjuncts)
                                    #print word," CONJUNCTION replaced with",replacement_word
                                elif word_nopunct.lower() in import_utilities.indef_prono:
                                    replacement_word=random.choice(import_utilities.indef_prono)
                                    #print word," INDEF_prono replaced with",replacement_word
                                elif word_nopunct.lower() in import_utilities.prepo:
                                    replacement_word=random.choice(import_utilities.prepo)
                                    #print word," prepo replaced with",replacement_word
                                elif word_nopunct.lower() in import_utilities.rel_prono:
                                    replacement_word=word
                                    #print word," rel_prono LEAVE alone: ",replacement_word
                                elif word_nopunct.lower()[-2:] =="ly":
                                    if import_utilities.synset_creeley(word) is not None:
                                        replacement_word=import_utilities.strip_underscore(import_utilities.synset_creeley(word))#(word[:-2])
                                    #print word," ADVERB: ",replacement_word
                                    # if replacement_word[-2:] !="ly":
                                    #     replacement_word +="ly"
                                                                            
                                else:
                                    if len(hyp) <2 and "like" not in word_nopunct and import_utilities.singularize(word_nopunct) ==  import_utilities.singularize(replacement_word) and word_nopunct.lower() not in import_utilities.stopwords_ls:

                                        if word not in RESERVOIR and quit_language<0 and import_utilities.countPunctuation(word)<1 and len(word_nopunct)>3 and not word_nopunct.istitle(): 
                                            
                                            #print "ADDING",word,"to reservoir"
                                            ############################
                                            # ADDING ONLY SMALL WORDS
                                            ############################
                                            if len(word)<7:
                                                RESERVOIR.append(word)
                                            
                                            replacement_word = random.choice(rap_mouth)# RESERVOIR)
                                            #print word_nopunct,"replaced from reservoir with", replacement_word
                                       # print "'"+word_nopunct+"'  vs RESERVOIR  replacement_word:",replacement_word #,"    new_line:",new_line
                                if quit_language>1 and not word_nopunct.istitle():
                                    #print quit_language, "Probably foreign language: make a word salad in english"
                                    replacement_word = random.choice(rap_mouth)#RESERVOIR)
                                    #print word_nopunct,"OTHER replaced from reservoir with", replacement_word
                                
                                ###################################################
                                # MOST REPLACEMENT occurs here...                 #
                                ###################################################
                                poem_ls = poem_replaced.split(' ')
                                idx =  poem_ls.index(word)

                                # print idx,",", poem_ls[idx],",", word ,",",replacement_word
                                #print word ," --- ",previous_replacement_word,replacement_word
                                
                                if len(word)>3 and replacement_word.lstrip().rstrip() == word_nopunct.lstrip().rstrip():
                                    # try alchemy?

                                    # a 
                                    RESERVOIR.sort(key=len)
                                    replacement_word = RESERVOIR[idx%len(RESERVOIR)]
                                    #print idx,len(RESERVOIR),"LAST CHANGE STOP: ", word, "~",replacement_word

                                try:

                                    if poem_ls[idx]==word and "****" not in word and "." != word and "\n" not in word:
                                        poem_ls[idx]=replacement_word#.encode('utf-8')
                                    poem_replaced = " ".join(poem_ls)

                                    # store this word so that conjugation can be checked 
                                    previous_replacement_word=replacement_word
                                except Exception, e:
                                    #print "PENULTIMATE SKIP_bool replace FAIL",e
                                    SKIP_bool=True
                                    continue

                    ###########################################################################
                    # testing Pattern.en as parser for conjugation and article replacement    #
                    # much more robust than my hand-coded hacks                               #        
                    ###########################################################################
                    
                    # correct CONJUGATion of paticiple verbs with pattern.en
                    parsed = parse(poem_replaced,tags = True) 
                    pre_verbal = ["'m","'s","'re"]
                    for idx,p in enumerate(parsed.split(" ")):
                        tok =p.split("/")[0]
                        typ=p.split("/")[1]
                        #print idx,tok,typ
                        if tok in pre_verbal:
                            #print "pre_verbal:",tok
                            next_word= parsed.split(" ")[idx+1].split("/")

                            # try try try
                            for ix,n in enumerate(next_word): 
                                next_word[ix] = re.sub('(' + '|'.join(import_utilities.chars.keys()) + ')', import_utilities.replace_chars, n).encode('utf-8')
                            try:
                                #print  next_word,next_word[0],next_word[1][:2]
                                # if it's a verb that follows
                                if next_word[1][:2] =="VB":
                                    before_verb = " ".join(w for w in poem_replaced.split(" ")[:idx])#.encode('utf-8')
                                    after_verb = " ".join(w for w in poem_replaced.split(" ")[idx+1:])#.encode('utf-8') 
                                    new_verb = conjugate(next_word[0], tense=PARTICIPLE, parse=True).encode('utf-8')
                                    # insert new
                                    #print "CONJUGATION needed, changing:",poem_replaced.split(" ")[idx],"to",parsed.split(" ")[idx],poem_replaced.split(" ")[idx-1]+" "+new_verb
                                    poem_replaced = before_verb+" "+new_verb+" "+after_verb
                            except Exception, e:
                                # print "INside parsed COnjugation loop",e
                                continue


                    # correct ARTICLES
                    for idx,word in enumerate(poem_replaced.split(" ")):
                        if len(word)>0 and idx != 0 and " " not in word:
                            # A or AN
                            if poem_replaced.split(" ")[idx-1].lower() =="a" or poem_replaced.split(" ")[idx-1].lower() =="an":
                                #print word,"---",article(word)+" "+word
                                before_article = " ".join(w for w in poem_replaced.split(" ")[:idx-1])
                                after_article = " ".join(w for w in poem_replaced.split(" ")[idx+1:])
                                new_conj = referenced(word)
                                # capitalize
                                if poem_replaced.split(" ")[idx-1].istitle():
                                    new_conj = new_conj.split(" ")[0].title()+" "+new_conj.split(" ")[1]
                                poem_replaced = before_article+" "+new_conj+" "+after_article


                    #########################
                    #   WRITE SINGLE POEM   #
                    #########################
                    if not SKIP_bool:

                        tmp_poem=""   

                        # poem_replaced.replace("\t","&#9;")
                        # poem_replaced.replace("\n"," <br>")
                        # poem_replaced.replace("\r"," <br>")

                        HTML_poem=""
                        for line in poem_replaced.split("\n"):
                            #print "LINE", line
                            HTML_poem += line+"<br>"

                        if len(response) >0 and len(id.split("_"))>1:
                            # ALL_poems = ALL_poems_intro + " ".join(i for i in ALL_poems.split("</h2>.")[0:])+"<br><br>~~~~~~~~~~~~~~~~~~~~~~~~~~<br>[ A poem generated from template : <b>"+ author+"</b>, <i>"+ title +"</i> ]<br><br><b>"+new_title+"<br><br></b>"+HTML_poem

                    # try:
                            ALL_poems = "<br>[ A  generated-poem based upon: <i>"+ title +"</i> by <b>"+ author+"</b>]<br><br><i>"+new_title+"</i><br> by <b>"+ new_author   +"</b><br>"+HTML_poem+ALL_poems.split("</h2>")[1].replace("  ","&nbsp")

                            tmp_poem= "[A generated-poem based upon: '"+ title+"' by "+ author +"]\n\n"+new_title+ "\nby "+new_author+"\n"+poem_replaced

                            print "\n~~~\n"  #+tmp_poem
                            # SLOW TYPEWRITER PRESENTATION
                            for line in tmp_poem:
                               for c in line:
                                    time.sleep(0.04)
                                    sys.stdout.write(c)#(c.encode("utf8"))
                                    sys.stdout.flush()
# 
                            #sys.stdout.write("\n")

                            txt_fn = id.split("_")[1]+"_POEMs.txt"

                            WRITE_BIO_PATH = DATA_DIR+"generated/POEMS/POEMS_"+datetime.datetime.now().strftime('%Y-%m-%d_%H')+"/"
                            if not os.path.exists(WRITE_BIO_PATH):
                                    os.makedirs(WRITE_BIO_PATH)

                            txt_fn_path = WRITE_BIO_PATH+txt_fn
                            f_txt=open(txt_fn_path,'w')
                            f_txt.write(tmp_poem)#.encode('utf-8'))       
                            f_txt.close();   
                            #print "\nTXT file created at:",txt_fn_path

                            
                            #######
                            #   write them all.... wasteful... but useful if run is interrupted....
                            ###########  

                            # if cnt==1:
                            #     ALL_poems = ALL_poems_intro+ALL_poems
                            # else:
                            ALL_poems = ALL_poems_intro+ALL_poems.replace("  ","&nbsp")
                            ALL_poems = ALL_poems.replace("$$datetime$$",datetime.datetime.now().strftime('%Y-%m-%d at %H:%M'))
                            ALL_poems = ALL_poems.replace("$$cnt$$",str(cnt))
                            #print "cnt",cnt
                            ALL_poems = ALL_poems.replace("$$gentime$$",str(time.time() - start_time))

                            # ALL POEMS
                            txt_fn = datetime.datetime.now().strftime('%Y-%m-%d_%H')+"_poetryFoundation_generatedPOEMS_CREELEYstyle_"+type_of_run+".html"
                            txt_fn_path = DATA_DIR+"generated/POEMS/"+txt_fn
                            f_txt=open(txt_fn_path,'w')
                            f_txt.write(ALL_poems+"</hmtl>")       
                            f_txt.close();   
                            #print "\nTXT file created at:",txt_fn_path
                        # except Exception, e:
                        #         print "At the final LOOP",e
                        #         #continue
                        #         pass


                        else:
                            pass
                            #print "~! EMPTY response:", author

                    else:
                        cnt = cnt-1
Example #40
0
# The en module has a range of tools for word inflection:
# guessing the indefinite article of a word (a/an?),
# pluralization and singularization, comparative and superlative adjectives, verb conjugation.

# INDEFINITE ARTICLE
# ------------------
# The article() function returns the indefinite article (a/an) for a given noun.
# The definitive article is always "the". The plural indefinite is "some".
print(article("bear") + " bear")
print("")

# The referenced() function returns a string with article() prepended to the given word.
# The referenced() funtion is non-trivial, as demonstrated with the exception words below:
for word in ["hour", "one-liner", "European", "university", "owl", "yclept", "year"]:
    print(referenced(word))
print("")

# PLURALIZATION
# -------------
# The pluralize() function returns the plural form of a singular noun (or adjective).
# The algorithm is robust and handles about 98% of exceptions correctly:
for word in ["part-of-speech", "child", "dog's", "wolf", "bear", "kitchen knife"]:
    print(pluralize(word))
print(pluralize("octopus", classical=True))
print(pluralize("matrix", classical=True))
print(pluralize("matrix", classical=False))
print(pluralize("my", pos=ADJECTIVE))
print("")

# SINGULARIZATION
from pattern.en import referenced, article
from pattern.en import pluralize, singularize
from pattern.en import conjugate, lemma, lexeme, tenses, PAST, PL
from pattern.en import quantify
from pattern.en import ngrams
from pattern.en import parse, tag, pprint
from pattern.en import sentiment, polarity, subjectivity, modality
from pattern.en import Sentence

#Indefinite article
print article('university')
print article('hour')

print referenced('university')
print referenced('hour')


#singularity
print pluralize('child')
print singularize('wolves')

#
print 
print lexeme('run')
print lemma('running')
print conjugate('purred', '3sg')
print PAST in tenses('purred') # 'p' in tenses() also works.
print (PAST, 1, PL) in tenses('purred') 

print 'Quantification'
Example #42
0
def indef(lemma):
	if lemma in ('someone', 'something'): return lemma
	return referenced(lemma, article=INDEFINITE)
Example #43
0
    def do_animal_conversation(self, i, j):
        """Make two animals talk to one another"""

        # Pick two animals and make sure they're not the same one
        to = random.choice(self.ANIMALS)
        fro = random.choice(self.ANIMALS)
        while fro == to:
            fro = random.choice(self.ANIMALS)

        # Check to make sure the these two animals didn't have a conversation already (or at least the "to"
        # animal didn't already initiate a conversation with the "from" animal
        already = False
        if to['name'] + to['animal'] in self.CONVOS.keys():
            if fro['name'] + fro['animal'] in self.CONVOS[to['name'] +
                                                          to['animal']]:
                already = True

        # If this is a new conversation, continue
        if not already:
            self.TEMP += "\n" + to['name'] + ' asked ' + fro[
                'name'] + ', "What exactly are you?"\n'
            self.TEMP += "\"Well, I'm " + referenced(fro['animal'])

            # If the "fro" animal has some properties in ConceptNet, print one randomly
            if "HasProperty" in self.ANIMAL_CONCEPTS[fro['animal']].keys():
                self.TEMP += " and I'm " + self.clean_phrase(
                    singularize(
                        random.choice(self.ANIMAL_CONCEPTS[fro['animal']]
                                      ['HasProperty'])))

            self.TEMP += "."

            # If the "fro" animal has a "HasA" relationship in ConceptNet, print one randomly
            hasa = False
            if "HasA" in self.ANIMAL_CONCEPTS[fro['animal']].keys():
                has = referenced(
                    singularize(
                        random.choice(
                            self.ANIMAL_CONCEPTS[fro['animal']]['HasA'])))
                self.TEMP += " I have " + self.clean_phrase(has)
                hasa = True

            # If the "fro" animal is capable of something, talk about it
            capable = False
            if "CapableOf" in self.ANIMAL_CONCEPTS[fro['animal']].keys():
                capable = True
                ability = random.choice(
                    self.ANIMAL_CONCEPTS[fro['animal']]['CapableOf'])

                # Sometimes the "CapableOf" relationship in ConceptNet is negated, so make
                # sure we have consistent logic
                can = "can"
                if ability.find("cannot ") == 0:
                    can = "cannot"
                    ability.replace("cannot ", "")

                if hasa:
                    self.TEMP += " and"

                # State the ability and ask the "to" animal if they can do the same thing
                self.TEMP += " I " + can + " " + self.clean_phrase(
                    ability) + ", can you?"

            if hasa and not capable:
                self.TEMP += "."

            self.TEMP += "\"\n"

            # If there was a stated ability for the "fro" animal
            if capable:

                # Check to see if the "to" animal also has the same ability, and if so say so
                canto = False
                if 'CapableOf' in self.ANIMAL_CONCEPTS[to['animal']].keys():
                    if ability in self.ANIMAL_CONCEPTS[
                            to['animal']]['CapableOf']:
                        canto = True
                        self.TEMP += '"Yes I can!"'

                # If not, say so
                if not canto:
                    self.TEMP += '"No I can' + "'t"

                    # If they have other abilities, though, pick one and print it
                    if 'CapableOf' in self.ANIMAL_CONCEPTS[
                            to['animal']].keys():
                        self.TEMP += ", but I do know how to " + self.clean_phrase(
                            random.choice(self.ANIMAL_CONCEPTS[to['animal']]
                                          ['CapableOf'])) + "!"
                    else:
                        self.TEMP += ","

                self.TEMP += '" replied ' + to['name'] + ".\n"

            # Add the conversation to the list
            if to not in self.CONVOS.keys():
                self.CONVOS[to['name'] + to['animal']] = []

            self.CONVOS[to['name'] + to['animal']].append(fro['name'] +
                                                          fro['animal'])

            self.THEN = False
Example #44
0
# The en module has a range of tools for word inflection:
# guessing the indefinite article of a word (a/an?),
# pluralization and singularization, comparative and superlative adjectives, verb conjugation.

# INDEFINITE ARTICLE
# ------------------
# The article() command returns the indefinite article (a/an) for a given noun.
# The definitive article is always "the". The plural indefinite is "some".
print article("bear"), "bear"
print

# The referenced() command returns a string with article() prepended to the given word.
# The referenced() command is non-trivial, as demonstrated with the exception words below:
for word in ["hour", "one-liner", "European", "university", "owl", "yclept", "year"]:
    print referenced(word)
print
print

# PLURALIZATION
# -------------
# The pluralize() command returns the plural form of a singular noun (or adjective).
# The algorithm is robust and handles about 98% of exceptions correctly:
for word in ["part-of-speech", "child", "dog's", "wolf", "bear", "kitchen knife"]:
    print pluralize(word)
print pluralize("octopus", classical=True)
print pluralize("matrix", classical=True)
print pluralize("matrix", classical=False)
print pluralize("my", pos=ADJECTIVE)
print
Example #45
0
f = """this’s pattern word tokenize"""
print "tokens:", tokenize(f)
sent_tokenize_test = """Tokenization is the process of breaking a stream of text up into words, phrases, symbols, or other meaningful elements called tokens. The list of tokens becomes input for further processing such as parsing or text mining. Tokenization is useful both in linguistics (where it is a form of text segmentation), and in computer science, where it forms part of lexical analysis."""
print "sentence:",tokenize(sent_tokenize_test)


from pattern.en import tag

g = """In corpus linguistics, part-of-speech tagging (POS tagging or POST), also called grammatical tagging or word-category disambiguation, is the process of marking up a word in a text (corpus) as corresponding to a particular part of speech, based on both its definition, as well as its context—i.e. relationship with adjacent and related words in a phrase, sentence, or paragraph. A simplified form of this is commonly taught to school-age children, in the identification of words as nouns, verbs, adjectives, adverbs, etc."""
tagged_result = tag(g)

print tagged_result


from pattern.en import referenced
referenced('book')

from pattern.en import singularize
singularize('wolves')

from pattern.en import comparative
comparative('bad')
#‘worse’

from pattern.en import superlative


from pattern.en import pluralize


from pattern.en import sentiment
def enum_or(words):
    if len(words) == 1:
        return referenced(words[0])     

    r = [referenced(w) for w in words]
    return '{0}, or {1}'.format(', '.join(r[:-1]), r[-1])
# the Free Software Foundation; either version 3, or (at your option)
# any later version.

# This file is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with GNU Emacs; see the file COPYING.  If not, write to
# the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
# Boston, MA 02110-1301 USA,

from pattern.en import referenced

print(referenced('university'))
print(referenced('hour'))

from pattern.en import pluralize, singularize

print(pluralize('child'))
print(singularize('wolves'))

from pattern.en import comparative, superlative

print(comparative('bad'))
print(superlative('bad'))

from pattern.en import conjugate, lemma, lexeme

print(lexeme('purr'))
Example #48
0
def article_check(sentence):  #sentence is a list of words

    s = " ".join(sentence)
    words = s.split()
    tags = tag(s)
    # print(tags)

    pos1 = []  #positions of NP,NNP
    pos2 = []
    ans1 = []
    ans2 = []

    cnt = 0
    b = False  #determiner in stack
    f = False  #1st NN/JJ/RR seen
    pronoun = False

    for i in range(len(tags)):
        word, pos = tags[i]
        if pos == "PRP$" or pos == "POS" or word == "i":
            pronoun = True

        elif word == "the" or word == "a" or word == "an":
            pos1.append([i, 0, 0])
            # cnt+=1
            # print(cnt)
            b = True
        elif (pos == "NN" or pos == "NNS" or pos == "NNP" or pos == "NNPS"
              or pos == "JJ" or pos == "JJR" or pos == "JJS" or pos == "RR"
              or pos == "RBR" or pos == "RBS"):
            if not pronoun:
                if b:
                    if not f:
                        pos1[cnt][1] = i
                        pos1[cnt][2] = pos
                        cnt += 1
                        f = True
                    if pos == "NN" or pos == "NNS" or pos == "NNP" or pos == "NNPS":
                        b = False
                        f = False
                else:
                    pos2.append([i, pos])
            else:
                pronoun = False

    # print("pos1",pos1)
    # print("pos2",pos2)

    adj = False
    adject = False
    first = True
    # insertPos=False

    for indDT, indN, pos in pos1:
        word = tags[indN][0]
        dt = referenced(word).split()[0]
        DT = aORthe(dt, word)
        ans1.append([indDT, indN, DT])
        # if pos=="JJ" or pos=="JJR" or pos =="JJS" or pos=="RR" or pos=="RBR" or pos =="RBS":
        # 	if first:
        # 		# insertPos=indN-1
        # 		adj=pos
        # 		adject=word
        # 		first=False

        # if pos=="NN":
        # 	if adj=="JJS" or adj=="RBS":
        # 		# dt=referenced(tags[indN][0])
        # 		ans1.append([indDT,indN,"the"])
        # 		adj=False
        # 		first=True
        # 		adject=False
        # 	elif adj=="JJ" or adj=="JJR" or adj=="RBR" or adj=="RR":
        # 		dt=referenced(adject).split()[0]
        # 		DT=aORthe(dt,adject)
        # 		ans1.append([indDT,indN,DT])
        # 		adj=False
        # 		first=True
        # 		adject=False
        # 	else:
        # 		dt=referenced(word).split()[0]
        # 		DT=aORthe(dt,word)
        # 		ans1.append([indDT,indN,DT])
        # 		adj=False
        # 		first=True
        # 		adject=False

    adj = False
    adject = False
    first = True
    insertPos = False

    for indN, pos in pos2:
        word = tags[indN][0]
        if pos == "JJ" or pos == "JJR" or pos == "JJS":
            if first:
                insertPos = indN
                adj = pos
                adject = word
                first = False

        elif pos == "NN":
            if adj == "JJS" or adj == "RBS":
                # dt=referenced(tags[indN][0])
                ans2.append([insertPos, indN, "the"])
                adj = False
                first = True
                adject = False
            elif adj == "JJ" or adj == "JJR" or adj == "RBR" or adj == "RR":
                dt = referenced(adject).split()[0]
                DT = aORthe(dt, adject)
                ans2.append([insertPos, indN, DT])
                adj = False
                first = True
                adject = False
            else:
                dt = referenced(word).split()[0]
                DT = aORthe(dt, word)
                ans2.append([insertPos, indN, DT])
                adj = False
                first = True
                adject = False

    s1 = ""
    for ws in words:
        s1 = s1 + ws + " "

    # print("replace: (insertPos,-,dt)")
    # print(ans1)
    aans1 = [[] for word in sentence]
    for insertPos, nounPOS, dt in ans1:
        aans1[insertPos] = dt
    # print(aans1)
    # print("insert: (insertPos,-,dt)")
    aans2 = [[] for word in sentence]
    for insertPos, nounPOS, dt in ans2:
        # print(insertPos, nounPOS, dt)
        # print(insertPos==False)
        if not insertPos:
            aans2[nounPOS] = [dt1 + " " + words[nounPOS] for dt1 in dt]
        else:
            aans2[insertPos] = [dt1 + " " + words[insertPos] for dt1 in dt]

    # print(aans2)
    # print(aans1)
    f_ans = []
    for i in range(len(aans2)):
        f_ans.append(aans2[i] + aans1[i])

    return f_ans
Example #49
0
def indef(lemma):
    if lemma in ('someone', 'something'): return lemma
    return referenced(lemma, article=INDEFINITE)
Example #50
0
from pattern.en import referenced
import random
import subprocess

with open('persons.txt') as f:
    persons = f.read().split('\n')

with open('actions.txt') as f:
    actions = f.read().split('\n')

random.shuffle(persons)
random.shuffle(actions)

text = '''% CONSIDER A NOVEL
# Chapter 1
Consider '''
text += ', or '.join(referenced(p) for p in persons)
text += ' who is '
text += ', '.join(actions)
text += '.'

with open('novel.md', 'w') as f:
    f.write(text) 

subprocess.call('pandoc -o novel.pdf novel.md')
Example #51
0
   def do_animal_conversation(self, i, j):
      """Make two animals talk to one another"""

      # Pick two animals and make sure they're not the same one
      to = random.choice(self.ANIMALS)
      fro = random.choice(self.ANIMALS)
      while fro == to:
         fro = random.choice(self.ANIMALS)

      # Check to make sure the these two animals didn't have a conversation already (or at least the "to"
      # animal didn't already initiate a conversation with the "from" animal
      already = False
      if to['name']+to['animal'] in self.CONVOS.keys():
         if fro['name']+fro['animal'] in self.CONVOS[to['name']+to['animal']]:
            already = True

      # If this is a new conversation, continue
      if not already:
         self.TEMP += "\n" + to['name'] + ' asked ' + fro['name'] + ', "What exactly are you?"\n'
         self.TEMP += "\"Well, I'm " + referenced(fro['animal'])

         # If the "fro" animal has some properties in ConceptNet, print one randomly
         if "HasProperty" in self.ANIMAL_CONCEPTS[fro['animal']].keys():
            self.TEMP += " and I'm " + self.clean_phrase(singularize(random.choice(self.ANIMAL_CONCEPTS[fro['animal']]['HasProperty'])))

         self.TEMP += "."

         # If the "fro" animal has a "HasA" relationship in ConceptNet, print one randomly
         hasa = False
         if "HasA" in self.ANIMAL_CONCEPTS[fro['animal']].keys():
            has = referenced(singularize(random.choice(self.ANIMAL_CONCEPTS[fro['animal']]['HasA'])))
            self.TEMP += " I have " + self.clean_phrase(has)
            hasa = True

         # If the "fro" animal is capable of something, talk about it
         capable = False
         if "CapableOf" in self.ANIMAL_CONCEPTS[fro['animal']].keys():
            capable = True
            ability = random.choice(self.ANIMAL_CONCEPTS[fro['animal']]['CapableOf'])

            # Sometimes the "CapableOf" relationship in ConceptNet is negated, so make 
            # sure we have consistent logic
            can = "can"
            if ability.find("cannot ") == 0:
               can = "cannot"
               ability.replace("cannot ", "")

            if hasa:
               self.TEMP += " and"

            # State the ability and ask the "to" animal if they can do the same thing
            self.TEMP += " I " + can + " " + self.clean_phrase(ability) + ", can you?"

         if hasa and not capable:
            self.TEMP += "."

         self.TEMP += "\"\n"

         # If there was a stated ability for the "fro" animal
         if capable:

            # Check to see if the "to" animal also has the same ability, and if so say so
            canto = False
            if 'CapableOf' in self.ANIMAL_CONCEPTS[to['animal']].keys():
               if ability in self.ANIMAL_CONCEPTS[to['animal']]['CapableOf']:
                  canto = True
                  self.TEMP += '"Yes I can!"'

            # If not, say so
            if not canto:
               self.TEMP += '"No I can' + "'t"

               # If they have other abilities, though, pick one and print it
               if 'CapableOf' in self.ANIMAL_CONCEPTS[to['animal']].keys():
                  self.TEMP += ", but I do know how to " + self.clean_phrase(random.choice(self.ANIMAL_CONCEPTS[to['animal']]['CapableOf'])) + "!"
               else:
                  self.TEMP += ","

            self.TEMP += '" replied ' + to['name'] + ".\n"

         # Add the conversation to the list
         if to not in self.CONVOS.keys():
            self.CONVOS[to['name']+to['animal']] = []

         self.CONVOS[to['name']+to['animal']].append(fro['name']+fro['animal'])

         self.THEN = False
def articleError(text, nlp, correctFlag=False):
    '''
    Purpose: To check if text has errors due to wrong article usage.
             Additionally, it returns corrected sentence.
             
    Parameters: text: string
                    A string of text-single or a paragraph.
                    
                correctFlag:boolean 
                   True or False
                    
    Returns: count: integer  
             text: Corrected sentence. (If correctFlag is True)
    '''
    path = "uncNouns.txt"
    unc_text = read_file(path)
    unc_words = []
    for i in unc_text:
        tokens = word_tokenize(i)
        unc_words.append(tokens[0].lower())
    doc = nlp(text)
    count = 0
    ntext = ""

    for s in doc.sentences:
        for t in range(len(s.words)):
            if s.words[t].text == 'a' or s.words[t].text == 'an':
                if ((s.words[t + 1].text in unc_words)
                        or s.words[t + 1].xpos == "NNS"
                        or s.words[t + 1].xpos == "NNPS"):
                    count += 1
                elif (t < len(s.words) - 2) and (s.words[t + 1].xpos in [
                        "JJ", "JJR"
                ]) and (s.words[t + 2].xpos in ['NNP', 'NN']):
                    if (s.words[t].text == 'a'
                            and referenced(s.words[t + 1].text)
                            == ('an ' + s.words[t + 1].text)):
                        ntext += 'an'
                        count += 1
                    elif (s.words[t].text == 'an'
                          and referenced(s.words[t + 1].text)
                          == ('a ' + s.words[t + 1].text)):
                        ntext += 'a'
                        count += 1
                    else:
                        ntext += s.words[t].text
                elif (s.words[t + 1].xpos not in ["NNP", "NN"]):
                    count += 1
                elif (s.words[t].text == 'a' and referenced(
                        s.words[t + 1].text) == ('an ' + s.words[t + 1].text)):
                    ntext += 'an'
                    count += 1
                elif (s.words[t].text == 'an' and referenced(
                        s.words[t + 1].text) == ('a ' + s.words[t + 1].text)):
                    ntext += 'a'
                    count += 1
                else:
                    ntext += s.words[t].text
                ntext += " "
#             elif (t<len(s.words)-1) and (s.words[t].xpos in ["JJ","JJR"]) and (s.words[t+1].xpos in ['NNP','NN']):
#                 ntext+=referenced(s.words[t].text)+" "
            else:
                ntext += s.words[t].text
                ntext += " "
    if correctFlag == True:
        return count, ntext
    else:
        return count
def extractFeaturesAndWriteBio(READ_PATH, file_type):

    global ALL_poems, bio, cnt

    for subdir, dirs, files in os.walk(READ_PATH):
        for file in files:

            num_of_files = len(files) - 1  # deduct the DS_store
            #print (num_of_files,'readDirectory',READ_PATH)

            if file_type in file and 'readme' not in file:

                # ID
                id = file.split(".")[0]
                print "\nID:", id.split("_")[1]

                filenames.append(id)
                cnt += 1

                # print('')
                # print('')
                # print('OPENED:',id)
                # print('')
                # print('')

                poem_replaced = ""
                replacement_word = ""
                previous_replacement_word = ""

                author = ""
                titles = ""
                title = ""
                new_title = ""

                replaced_ls = []
                new_titles_ls = []
                quit_language = 0
                oscillator = 0

                # if EXCEPTION is raised... do not add to html
                SKIP_bool = False

                ##########################
                # Load  POEM TEXT FILE     #
                ##########################

                txt_fn_path = DATA_DIR + READ_TXT_PATH + id.split(
                    "_")[1] + ".txt"
                #print "txt_fn_path:",txt_fn_path

                if os.path.isfile(txt_fn_path) and cnt > 0:
                    txt_data = open(txt_fn_path).read()

                    # http://blog.webforefront.com/archives/2011/02/python_ascii_co.html
                    # txt_data.decode('ISO-8859-2') .decode('utf-8')
                    # unicode(txt_data)

                    author = txt_data.split("****!****")[0].strip(' \t\n\r')

                    title = txt_data.split("****!****")[1].strip(' \t\n\r')

                    bio = txt_data.split("****!****")[2]  #.strip(' \t\n\r')

                    ######  CLEAN BIO
                    bio.replace("\t", "&#9;")
                    bio.replace("\n", " <br>")
                    bio.replace("\r", " <br>")
                    poem_replaced = bio
                    #print poem_replaced

                    ###############################
                    # REPLACE AUTHOR NAME in poem
                    ##############################
                    author_ln = author.split(" ")[-1].lstrip()
                    author_fn = author.split(" ")[:-1]
                    author = " ".join(n for n in author_fn) + author_ln
                    #
                    #poem_replaced = poem_replaced.replace(author_ln,"Jhave")

                    #######################
                    # replace BOOK TITLES
                    #######################
                    #print "TITLES"]
                    new_title = getNewTitle("title").encode('utf-8')

                    #######################
                    # fake AUTHOR
                    #######################

                    new_author = " ".join(
                        random.choice(authors).split(" ")
                        [1:-2]) + " " + random.choice(authors).split(" ")[-2]
                    #print "new AUTHOR",new_author

                    ############################
                    # replace years with another
                    ############################
                    for w1 in poem_replaced.split("("):
                        for w2 in w1.split(")"):
                            if w2 is not None and w2.isdigit():
                                new_num = random.randint(
                                    int(w2) - 5,
                                    int(w2) + 5)
                                #print "REPLACING #:",w2,new_num
                                poem_replaced = poem_replaced.replace(
                                    w2, str(new_num))
                                replaced_ls.append(new_num)

                    #################
                    # Load JSON     #
                    #################
                    response = loadJSONfile(READ_JSON_PATH +
                                            "poetryFoundation_" +
                                            id.split("_")[1] +
                                            "_Alchemy_JSON.txt")

                    if response != "failed":

                        if response.get('entities') is not None:
                            for idx, entity in enumerate(response['entities']):

                                #print idx
                                ce = entity['text'].replace("0xc2", " ")
                                ce = ce.replace("0xe2", "'")
                                ce = re.sub(
                                    '(' +
                                    '|'.join(import_utilities.chars.keys()) +
                                    ')', import_utilities.replace_chars, ce)
                                ce = ce.encode('utf-8')

                                try:
                                    content = ce.decode('utf-8').encode(
                                        'ascii', 'xmlcharrefreplace')
                                except UnicodeDecodeError:
                                    "AAAARGGGGHHH!!!!"

                                if content in poem_replaced:

                                    ################################################
                                    # Replace similar entities from other JSON     #
                                    ################################################
                                    replacement_entity = findSimilarEntityinRandomJSON(
                                        content, entity['type'])

                                    cr = re.sub(
                                        '(' + '|'.join(
                                            import_utilities.chars.keys()) +
                                        ')', import_utilities.replace_chars,
                                        replacement_entity)

                                    poem_replaced = poem_replaced.replace(
                                        content, replacement_entity)
                                    replaced_ls.append(replacement_entity)

                    ##########################
                    #   POS REPLACMENT       #
                    ##########################

                    token_tuples = nltk.word_tokenize(poem_replaced)
                    tt = nltk.pos_tag(token_tuples)

                    #################
                    #  ADJECTIVES   #
                    #################
                    for i in tt:
                        if "/i" not in i[0] and len(
                                i[0]) > 2 and i[0] != "died":
                            origw = re.sub(
                                '(' + '|'.join(import_utilities.chars.keys()) +
                                ')', import_utilities.replace_chars, i[0])
                            origw = import_utilities.strip_punctuation(origw)
                            if i[1] == 'JJ':
                                JJr = random.choice(JJ)
                                # # JJr =  re.sub('(' + '|'.join(import_utilities.chars.keys()) + ')', import_utilities.replace_chars, JJr)
                                # JJr = import_utilities.strip_punctuation(JJr)
                                JJr = import_utilities.moveBeginAndEndPunctuationFromStrToString(
                                    i[0],
                                    JJr.lstrip().lstrip())

                                if i[0].istitle():
                                    JJr = JJr.title()

                                poem_replaced = re.sub(
                                    r'\b' +
                                    import_utilities.strip_punctuation(i[0]) +
                                    r'\b', JJr, poem_replaced,
                                    1)  #poem_replaced.replace(i[0],JJr,1)
                                replaced_ls.append(JJr)
                            if i[1] == 'RB':
                                RBr = random.choice(RB)
                                RBr = import_utilities.moveBeginAndEndPunctuationFromStrToString(
                                    i[0],
                                    RBr.lstrip().lstrip())

                                if i[0].istitle():
                                    RBr = RBr.title()
                                poem_replaced = re.sub(
                                    r'\b' +
                                    import_utilities.strip_punctuation(i[0]) +
                                    r'\b', RBr, poem_replaced, 1)
                                replaced_ls.append(RBr)
                                #print "RBr=",RBr,"repaced",i[0]

                    ########################
                    # IS IT ENGLISH?       #
                    ########################
                    for line in poem_replaced.split('\n\r'):
                        if len(line) > 0:
                            if "english" not in import_utilities.get_language(
                                    line):
                                quit_language += 1
                                #print "NOT english:",quit_language,line
                            else:
                                quit_language -= 1

                    #########################
                    #   SYNSET REPLACE      #
                    #########################
                    for idx, word in enumerate(poem_replaced.split(' ')):

                        if "<br>" not in word and "&#9;" not in word and len(
                                word) > 0:

                            #########################
                            #   PRONOUN ' VERB      #
                            #########################
                            if len(word.split("'")) > 1:
                                if word.split("'")[0] in personal_pronouns:
                                    replacement_word = random.choice(
                                        personal_pronouns) + "'" + word.split(
                                            "'")[1] + ' '
                                poem_replaced.replace(word, replacement_word)
                                #print "word,",word,"replacement_word:",replacement_word

                            ####################################################
                            # Replacement of OTHERs                            #
                            ####################################################

                            else:
                                # elif not word.lower().strip(" \n\t\r") in stopwords.words('english'):

                                # take off leading brackets, commas etc...
                                word_punct_nopunct = import_utilities.strip_punctuation_bool(
                                    word)
                                word_nopunct = word_punct_nopunct[
                                    'word'].strip(" .\n\t\r")
                                word_punct = word_punct_nopunct['punct']
                                punct_bool = word_punct_nopunct['punct_bool']

                                #print "word_nopunct:",word_nopunct

                                #######################################################
                                # MAIN EXCHANGE PROCESS CALL >>>>>>>   GET THE SYNSET #
                                #######################################################
                                similarterm = ""
                                if word_nopunct[-4:].lower() == "here":
                                    similarterm = random.choice(
                                        import_utilities.heres)
                                else:
                                    #print "WORD:",word_nopunct
                                    if len(word_nopunct) > 3:

                                        oscillator = oscillator + 1

                                        ############################################
                                        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                                        # STYLE SWITCH..... should in future use POS
                                        # ... i.e. if noun & oscillator%3, do...
                                        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                                        ############################################

                                        similarterm = import_utilities.find_synset_word(
                                            word_nopunct)

                                        # synset
                                        # if oscillator%4==0:
                                        #     # SYNSET
                                        #     similarterm = import_utilities.find_synset_word(word_nopunct)
                                        #     #print "synset", similarterm

                                        # elif oscillator%3==0:
                                        #     # RAP MOUTH
                                        #     similarterm = random.choice(rap_mouth)
                                        #     #print "rap",similarterm

                                        # # elif oscillator%2==0:
                                        # else:
                                        #     similarterm = import_utilities.find_synset_word(word_nopunct)
                                        # # SCIENCE MOUTH
                                        # similarterm = random.choice(science_mouth)
                                        # if similarterm.endswith('logy'):
                                        #         similarterm = similarterm[:-4]
                                        # if similarterm.endswith('o'):
                                        #         similarterm = similarterm[:-1]
                                        #print "science_mouth",similarterm
                                        # if len(similarterm)<6:
                                        #     similarterm = random.choice(import_utilities.curses)

                                        # else:
                                        #     # FILTH
                                        #     print "filth"
                                        #     similarterm = random.choice(import_utilities.curses)

                                ############################################
                                # manually get rid of some terrible choices
                                ############################################
                                if similarterm == "ilk":
                                    ##print "like"
                                    similarterm = "like"
                                if similarterm == "ope":
                                    ##print "doth"
                                    similarterm = "does"
                                if similarterm == "information technology":
                                    ##print "doth"
                                    similarterm = "it"
                                if similarterm == "velleity":
                                    ##print "doth"
                                    similarterm = "want"
                                if similarterm == "Crataegus laevigata":
                                    ##print "doth"
                                    similarterm = "may"
                                if similarterm == "brunet" or similarterm == "ot":
                                    ##print "doth"
                                    similarterm = random.choice(
                                        import_utilities.curses)
                                if similarterm == "ge":
                                    ##print "doth"
                                    similarterm = random.choice(science_mouth)
                                if similarterm.lower() == "nox":
                                    ##print "doth"
                                    similarterm = random.choice(science_mouth)
                                if similarterm.lower() == "paunited":
                                    print "################### paUnited ###################"
                                    similarterm = word

                                #######################################
                                # abbreviations for f*****g states!   #
                                #######################################
                                if word_nopunct.upper(
                                ) in import_utilities.state_abbrev and word_nopunct.lower(
                                ) not in stopwords.words(
                                        'english') and "me," not in word:
                                    tmp = similarterm
                                    if word_nopunct == "oh":
                                        similarterm = random.choice(
                                            import_utilities.exclaims)
                                    else:
                                        similarterm = random.choice(
                                            rap_mouth)  # RESERVOIR)RESERVOIR)
                                    #print word_nopunct," replaced by", tmp, "replaced with:",similarterm, "in:",line

                                ##############
                                # hyphenated #
                                ##############
                                hyp = word.split("-")
                                #print word,len(hyp)
                                if len(hyp) > 1:
                                    similarterm = ""
                                    for w in hyp:
                                        if len(w) > 2:
                                            similarterm += import_utilities.find_synset_word(
                                                w) + "-"
                                    similarterm = import_utilities.strip_underscore(
                                        similarterm[:-1])
                                    #print "hyphenated:",word,"replaced by: "+similarterm

                                #########################################################
                                # is it a TRUNCATED VERB slang as in singin or wishin   #
                                #########################################################
                                if similarterm == word_nopunct and len(
                                        word
                                ) > 2 and 'in' in word_nopunct[-2:]:
                                    similarterm = import_utilities.find_synset_word(
                                        word_nopunct + 'g')
                                    #print "TRUNCATED SLANG word: '"+word+"'",similarterm
                                    interim = import_utilities.lemma(
                                        similarterm)
                                    ## #print interim
                                    similarterm = import_utilities.conjugate(
                                        interim,
                                        tense=import_utilities.PARTICIPLE,
                                        parse=True)[:-1]
                                    # # # #print word,"widx:",widx," line_pos_tags[widx][0]:",line_pos_tags[widx][0]," line_pos_tags[widx][1]:",line_pos_tags[widx][1]

                                #################
                                # SWEAR WORD    #
                                #################
                                ##print "at the garden of if:", word
                                if word_nopunct in import_utilities.curses:
                                    similarterm = random.choice(
                                        import_utilities.curses)
                                    #print "SWEAR WORD word: '"+word+"'",similarterm

                                if len(hyp) > 1:
                                    replacement_word = similarterm
                                else:
                                    replacement_word = word.replace(
                                        word_nopunct, similarterm)
                                    replacement_word = import_utilities.strip_underscore(
                                        replacement_word)
                                    replacement_word = import_utilities.replaceNumbers(
                                        replacement_word)

                                #print "replacement_word:",replacement_word

                                #########################
                                # RESERVOIR_OF_WEIRDNESS  #
                                #########################

                                if word_nopunct.lower(
                                ) in import_utilities.impera:
                                    replacement_word = random.choice(
                                        import_utilities.impera)
                                    #print word,"IMPERA:",replacement_word
                                elif word_nopunct.lower(
                                ) in import_utilities.conjuncts:
                                    replacement_word = random.choice(
                                        import_utilities.conjuncts)
                                    #print word," CONJUNCTION replaced with",replacement_word
                                elif word_nopunct.lower(
                                ) in import_utilities.indef_prono:
                                    replacement_word = random.choice(
                                        import_utilities.indef_prono)
                                    #print word," INDEF_prono replaced with",replacement_word
                                elif word_nopunct.lower(
                                ) in import_utilities.prepo:
                                    replacement_word = random.choice(
                                        import_utilities.prepo)
                                    #print word," prepo replaced with",replacement_word
                                elif word_nopunct.lower(
                                ) in import_utilities.rel_prono:
                                    replacement_word = word
                                    #print word," rel_prono LEAVE alone: ",replacement_word
                                elif word_nopunct.lower(
                                )[-2:] == "ly" or word_nopunct.lower(
                                )[-3:] == "ly.":
                                    replacement_word = import_utilities.strip_underscore(
                                        import_utilities.find_synset_word(
                                            word))  #(word[:-2])
                                    #print word," ADVERB: ",replacement_word
                                    # if replacement_word[-2:] !="ly":
                                    #     replacement_word +="ly"

                                else:
                                    if len(
                                            hyp
                                    ) < 2 and "like" not in word_nopunct and import_utilities.singularize(
                                            word_nopunct
                                    ) == import_utilities.singularize(
                                            replacement_word
                                    ) and word_nopunct.lower(
                                    ) not in import_utilities.stopwords_ls:

                                        if word not in RESERVOIR and import_utilities.countPunctuation(
                                                word
                                        ) < 1 and len(
                                                word_nopunct
                                        ) > 3 and not word_nopunct.istitle():

                                            if len(
                                                    word
                                            ) > 4 and english_dict.check(word):
                                                #print "ADDING",word,"to reservoir"
                                                RESERVOIR.append(word)
                                                #RESERVOIR = list(set())

                                            replacement_word = random.choice(
                                                RESERVOIR)
                                            #print word_nopunct,"replaced from reservoir with", replacement_word
                                    # print "'"+word_nopunct+"'  vs RESERVOIR  replacement_word:",replacement_word #,"    new_line:",new_line
                                if quit_language > 1 and not word_nopunct.istitle(
                                ):
                                    #print quit_language, "Probably foreign language: make a word salad in english"
                                    replacement_word = random.choice(
                                        rap_mouth)  #RESERVOIR)
                                    #print word_nopunct,"OTHER replaced from reservoir with", replacement_word

                                ###################################################
                                # MOST REPLACEMENT occurs here...                 #
                                ###################################################
                                poem_ls = poem_replaced.split(' ')
                                idx = poem_ls.index(word)

                                #print idx,",", poem_ls[idx],",", word ,",",replacement_word
                                #print word ," --- ",previous_replacement_word,replacement_word

                                try:
                                    #print "poem_ls[idx]",poem_ls[idx],"word",word
                                    if poem_ls[
                                            idx] == word and "****" not in word and "." != word and "\n" not in word:
                                        # if "\n" in word:
                                        #     replacement_word=replacement_word+"\n"
                                        # if replacement_word=="":
                                        #     replacement_word=random.choice(RESERVOIR)
                                        poem_ls[
                                            idx] = replacement_word  #.encode('utf-8')
                                        "REPLACE", word, "with", replacement_word
                                    poem_replaced = " ".join(poem_ls)

                                    # store this word so that conjugation can be checked
                                    previous_replacement_word = replacement_word
                                except Exception, e:
                                    print "PENULTIMATE SKIP_bool replace FAIL", e
                                    SKIP_bool = True
                                    continue

                    ###########################################################################
                    # testing Pattern.en as parser for conjugation and article replacement    #
                    # much more robust than my hand-coded hacks                               #
                    ###########################################################################

                    # correct CONJUGATion of paticiple verbs with pattern.en
                    parsed = parse(poem_replaced, tags=True)
                    pre_verbal = ["'m", "'s", "'re"]
                    for idx, p in enumerate(parsed.split(" ")):
                        tok = p.split("/")[0]
                        typ = p.split("/")[1]
                        #print idx,tok,typ
                        if tok in pre_verbal:
                            #print "pre_verbal:",tok
                            next_word = parsed.split(" ")[idx + 1].split("/")

                            # try try try
                            for ix, n in enumerate(next_word):
                                next_word[ix] = re.sub(
                                    '(' +
                                    '|'.join(import_utilities.chars.keys()) +
                                    ')', import_utilities.replace_chars,
                                    n).encode('utf-8')
                            try:
                                #print  next_word,next_word[0],next_word[1][:2]
                                # if it's a verb that follows
                                if next_word[1][:2] == "VB":
                                    before_verb = " ".join(
                                        w for w in poem_replaced.split(" ")
                                        [:idx])  #.encode('utf-8')
                                    after_verb = " ".join(
                                        w for w in poem_replaced.split(" ")
                                        [idx + 1:])  #.encode('utf-8')
                                    new_verb = conjugate(
                                        next_word[0],
                                        tense=PARTICIPLE,
                                        parse=True).encode('utf-8')
                                    # insert new
                                    #print "CONJUGATION needed, changing:",poem_replaced.split(" ")[idx],"to",parsed.split(" ")[idx],poem_replaced.split(" ")[idx-1]+" "+new_verb
                                    poem_replaced = before_verb + " " + new_verb + " " + after_verb
                            except Exception, e:
                                #print "INside parsed COnjugation loop",e
                                continue

                    # correct ARTICLES
                    for idx, word in enumerate(poem_replaced.split(" ")):
                        if len(word) > 0 and idx != 0 and " " not in word:
                            # A or AN
                            if poem_replaced.split(" ")[idx - 1].lower(
                            ) == "a" or poem_replaced.split(" ")[
                                    idx - 1].lower() == "an":
                                #print word,"---",article(word)+" "+word
                                before_article = " ".join(
                                    w for w in poem_replaced.split(" ")[:idx -
                                                                        1])
                                after_article = " ".join(
                                    w for w in poem_replaced.split(" ")[idx +
                                                                        1:])
                                new_conj = referenced(word)
                                # capitalize
                                if poem_replaced.split(" ")[idx - 1].istitle():
                                    new_conj = new_conj.split(" ")[0].title(
                                    ) + " " + new_conj.split(" ")[1]
                                poem_replaced = before_article + " " + new_conj + " " + after_article

                    #########################
                    #   WRITE SINGLE POEM   #
                    #########################
                    if not SKIP_bool:

                        tmp_poem = ""

                        # poem_replaced.replace("\t","&#9;")
                        # poem_replaced.replace("\n"," <br>")
                        # poem_replaced.replace("\r"," <br>")

                        HTML_poem = ""
                        for line in poem_replaced.split("\n"):
                            #print "LINE", line
                            HTML_poem += line + "<br>"

                        if len(response) > 0 and len(id.split("_")) > 1:
                            # ALL_poems = ALL_poems_intro + " ".join(i for i in ALL_poems.split("</h2>.")[0:])+"<br><br>~~~~~~~~~~~~~~~~~~~~~~~~~~<br>[ A poem generated from template : <b>"+ author+"</b>, <i>"+ title +"</i> ]<br><br><b>"+new_title+"<br><br></b>"+HTML_poem

                            try:
                                ALL_poems = "<br>[ A  generated-poem based upon: <i>" + title + "</i> by <b>" + author + "</b>]<br><br><i>" + new_title + "</i><br> by <b>" + new_author + "</b><br>" + HTML_poem + ALL_poems.split(
                                    "</h2>")[1].replace("  ", "&nbsp")

                                tmp_poem = "[A generated-poem based upon: '" + title + "' by " + author + "]\n\n" + new_title + "\nby " + new_author + "\n" + poem_replaced

                                print "\n~~~\n\n" + tmp_poem
                                #print "\nORIGINAL:",bio

                                txt_fn = id.split("_")[1] + "_POEMs.txt"

                                WRITE_BIO_PATH = DATA_DIR + "generated/POEMS/POEMS_" + datetime.datetime.now(
                                ).strftime('%Y-%m-%d_%H') + "/"
                                if not os.path.exists(WRITE_BIO_PATH):
                                    os.makedirs(WRITE_BIO_PATH)

                                txt_fn_path = WRITE_BIO_PATH + txt_fn
                                f_txt = open(txt_fn_path, 'w')
                                f_txt.write(tmp_poem)  #.encode('utf-8'))
                                f_txt.close()
                                #print "\nTXT file created at:",txt_fn_path

                                #######
                                #   write them all.... wasteful... but useful if run is interrupted....
                                ###########

                                # if cnt==1:
                                #     ALL_poems = ALL_poems_intro+ALL_poems
                                # else:
                                ALL_poems = ALL_poems_intro + ALL_poems.replace(
                                    "  ", "&nbsp")
                                ALL_poems = ALL_poems.replace(
                                    "$$datetime$$",
                                    datetime.datetime.now().strftime(
                                        '%Y-%m-%d at %H:%M'))
                                ALL_poems = ALL_poems.replace(
                                    "$$cnt$$", str(cnt))
                                #print "cnt",cnt
                                ALL_poems = ALL_poems.replace(
                                    "$$gentime$$",
                                    str(time.time() - start_time))

                                # ALL POEMS
                                txt_fn = datetime.datetime.now().strftime(
                                    '%Y-%m-%d_%H'
                                ) + "_poetryFoundation_generatedPOEMS_" + type_of_run + ".html"
                                txt_fn_path = DATA_DIR + "generated/POEMS/" + txt_fn
                                f_txt = open(txt_fn_path, 'w')
                                f_txt.write(ALL_poems + "</hmtl>")
                                f_txt.close()
                                #print "\nTXT file created at:",txt_fn_path
                            except Exception, e:
                                print "At the final LOOP", e
                                continue

                        else:
                            print "~! EMPTY response:", author

                    else:
                        cnt = cnt - 1
Example #54
0
# guessing the indefinite article of a word (a/an?),
# pluralization and singularization, comparative and superlative adjectives, verb conjugation.

# INDEFINITE ARTICLE
# ------------------
# The article() function returns the indefinite article (a/an) for a given noun.
# The definitive article is always "the". The plural indefinite is "some".
print article("bear"), "bear"
print

# The referenced() function returns a string with article() prepended to the given word.
# The referenced() funtion is non-trivial, as demonstrated with the exception words below:
for word in [
        "hour", "one-liner", "European", "university", "owl", "yclept", "year"
]:
    print referenced(word)
print
print

# PLURALIZATION
# -------------
# The pluralize() function returns the plural form of a singular noun (or adjective).
# The algorithm is robust and handles about 98% of exceptions correctly:
for word in [
        "part-of-speech", "child", "dog's", "wolf", "bear", "kitchen knife"
]:
    print pluralize(word)
print pluralize("octopus", classical=True)
print pluralize("matrix", classical=True)
print pluralize("matrix", classical=False)
print pluralize("my", pos=ADJECTIVE)
def extractFeaturesAndWritePoem(READ_PATH,file_type):
    
    

    global ALL_poems,bio,cnt,SMALL_POEM,SMALL_POEM_ALL

    inp=0
    sub_cnt=0
    words_total=0
    lines_total=0

    pause_every = 0

    for subdir, dirs, files in os.walk(READ_PATH):

        #print "randomizing",datetime.datetime.now()
        random.seed(datetime.datetime.now())
        random.shuffle(files)

        for file in files:


            
            num_of_files = len(files)-1 # deduct the DS_store
            #print (num_of_files,'readDirectory',READ_PATH)
            
            if file_type in file  and 'readme' not in file:

                JSON_alchemy_loaded = False

                # ID
                id=file.split(".")[0]
                #print "\nID:",id.split("_")[1]

                filenames.append(id)
                cnt+=1

                # print('')
                # print('')
                # print('OPENED:',id)
                # print('')
                # print('')

                ##############
                #  HOW MANY? #
                ##############
                sub_cnt+=1
                if sub_cnt>=int(inp):
                    if int(inp) != 0:
                        end_time = time.time()
                        es = end_time-start_time
                        print "\n",sub_cnt, "poems,\n",lines_total,"lines,\n",words_total,"words \ngenerated in\n",("%.2f" % es),"seconds"
                        
                    words_total=0
                    lines_total=0

                    # RESTART

                    sub_cnt=0
                    inp = raw_input("\n\n^^^^^^^^^^^^^^\n\nHow many poems do u want? ")

                    if not inp:
                        print "You entered nothing! 10 poems will be generated."
                        inp=10
                        
                    pause_every = raw_input("\nPause every 1 or 2 or ... poems?")
                    if not pause_every:
                        print "You entered nothing! Pause will occur every 10 poems."
                        pause_every=10

                    sleep_time = raw_input("\nPause for how many seconds?")
                    if not sleep_time:
                        print "You entered no time! 10 second wait assigned."
                        sleep_time=10

                    print "\n\n^^^^^^^^^^^^^^^"
                    start_time = time.time()

                print 'Poem #',sub_cnt

                poem_replaced = ""
                replacement_word = ""
                previous_replacement_word = ""
                
                author=""
                titles=""
                title=""
                new_title=""

                replaced_ls =[]
                new_titles_ls = []
                quit_language=0
                oscillator=0

                word_cnt=0

                # if EXCEPTION is raised... do not add to html
                SKIP_bool=False

                ##########################
                # Load  POEM TEXT FILE   #
                ##########################

                ##
                # PAUSE
                ##
                #time.sleep(5)

                txt_fn_path = DATA_DIR + READ_TXT_PATH + id.split("_")[1]+".txt"
                #print "txt_fn_path:",txt_fn_path

                if os.path.isfile(txt_fn_path) and cnt>0:
                    txt_data=open(txt_fn_path).read()

                    # http://blog.webforefront.com/archives/2011/02/python_ascii_co.html
                    # txt_data.decode('ISO-8859-2') .decode('utf-8')
                    # unicode(txt_data)


                    author=txt_data.split("****!****")[0].strip(' \t\n\r')
                    
                    title=txt_data.split("****!****")[1].strip(' \t\n\r')
                    
                    bio=txt_data.split("****!****")[2]#.strip(' \t\n\r')

                    ######  CLEAN BIO
                    bio.replace("\t","&#9;")
                    bio.replace("\n"," <br>")
                    bio.replace("\r"," <br>")
                    bio.replace("","~~~~!~~~")
                    poem_replaced=bio
                    #print poem_replaced

                    ###############################
                    # REPLACE AUTHOR NAME in poem #
                    ###############################
                    author_ln=author.split(" ")[-1].lstrip()
                    author_fn=author.split(" ")[:-1]
                    author = " ".join(n for n in author_fn)+author_ln
                    #
                    #poem_replaced = poem_replaced.replace(author_ln,"Jhave")

                    #######################
                    # replace BOOK TITLES #
                    #######################
                    #print "TITLES"]
                    new_title = getNewTitle("title").encode('utf-8')

                    #######################
                    # fake AUTHOR         #
                    #######################
                    
                    new_author= " ".join(random.choice(authors).split(" ")[1:-2])+" "+random.choice(authors).split(" ")[-2]
                    #print "new AUTHOR",new_author                           

                    ############################
                    # replace years with another
                    ############################
                    for w1 in poem_replaced.split("("):
                        for w2 in w1.split(")"):
                            if w2 is not None and w2.isdigit():
                                new_num = random.randint(int(w2)-5,int(w2)+5)
                                #print "REPLACING #:",w2,new_num
                                poem_replaced = poem_replaced.replace(w2,str(new_num))
                                replaced_ls.append(new_num)                            
                                               

                    #################
                    # Load JSON     #
                    #################
                    response = loadJSONfile(READ_JSON_PATH+"poetryFoundation_"+id.split("_")[1]+"_Alchemy_JSON.txt")

                    if response != "failed":

                        JSON_alchemy_loaded = True

                        if response.get('entities') is not None:
                            for idx,entity in enumerate(response['entities']):

                                #DATA clean the original words (redundant duplicate but for some reason it works... and is necessary... a kludge of crowbars and bleach)
                                ce = entity['text'].replace("0xc2"," ")
                                ce = ce.replace("0xe2","'")
                                ce = re.sub('(' + '|'.join(import_utilities.chars.keys()) + ')', import_utilities.replace_chars, ce)
                                ce = ce.encode('utf-8')

                                try:
                                    content = ce.decode('utf-8').encode('ascii', 'xmlcharrefreplace')
                                except UnicodeDecodeError:
                                    "AAAARGGGGHHH!!!!"

                                if content in poem_replaced:
                                                       
                                    #################################################
                                    #                                               #
                                    # Replace similar entities from other JSON      #
                                    # Using data from ALCHEMY API                   #
                                    #                                               #
                                    #################################################
                                    replacement_entity = findSimilarEntityinRandomJSON(content,entity['type'])

                                    cr = re.sub('(' + '|'.join(import_utilities.chars.keys()) + ')', import_utilities.replace_chars, replacement_entity)

                                    poem_replaced = poem_replaced.replace(content,replacement_entity)

                                    replaced_ls.append(replacement_entity)
                    

                    ##########################
                    #   POS REPLACMENT       #
                    ##########################

                    token_tuples = nltk.word_tokenize(poem_replaced)
                    tt = nltk.pos_tag(token_tuples)

                    #################
                    #  ADJECTIVES   #
                    #################
                    for i in tt:
                        if "/i" not in i[0] and len(i[0])>3 and i[0] != "died":
                            origw =  re.sub('(' + '|'.join(import_utilities.chars.keys()) + ')', import_utilities.replace_chars, i[0])
                            origw =import_utilities.strip_punctuation(origw) 
                            if i[1]=='JJ' :
                                JJr = random.choice(JJ)
                                # # JJr =  re.sub('(' + '|'.join(import_utilities.chars.keys()) + ')', import_utilities.replace_chars, JJr)
                                # JJr = import_utilities.strip_punctuation(JJr)
                                JJr = import_utilities.moveBeginAndEndPunctuationFromStrToString(i[0],JJr.lstrip().lstrip())
                                
                                if i[0].istitle():
                                    JJr = JJr.title()

                                poem_replaced = re.sub(r'\b' + import_utilities.strip_punctuation(i[0]) + r'\b', JJr, poem_replaced,1)#poem_replaced.replace(i[0],JJr,1)
                                replaced_ls.append(JJr)
                            if i[1]=='RB':
                                RBr = random.choice(RB)
                                RBr = import_utilities.moveBeginAndEndPunctuationFromStrToString(i[0],RBr.lstrip().lstrip())

                                if i[0].istitle():
                                    RBr = RBr.title()
                                poem_replaced = re.sub(r'\b' + import_utilities.strip_punctuation(i[0])  + r'\b', RBr, poem_replaced,1)
                                replaced_ls.append(RBr)


                    ########################
                    # IS IT ENGLISH?       #
                    ########################
                    for line  in poem_replaced.split('\n\r'):
                        if len(line)>0 :
                            if "english" not in import_utilities.get_language(line):
                                quit_language+=1
                                #print "NOT english:",quit_language,line
                            else:
                                quit_language-=1

                    
                    #########################
                    #   SYNSET REPLACE      #
                    #########################
                    for idx,word in enumerate(poem_replaced.split(' ')):

                        similarterm=""

                        if "<br>" not in word and "&#9;" not in word and len(word)>0:


                            words_total+=1


                            #########################
                            #   PRONOUN ' VERB      #
                            #########################
                            if len(word.split("'"))>1:
                                if word.split("'")[0] in personal_pronouns:
                                    replacement_word = random.choice(personal_pronouns)+"'"+word.split("'")[1]+' '
                                poem_replaced.replace(word,replacement_word)             
                                #print "word,",word,"replacement_word:",replacement_word
                           
                            ####################################################
                            # Replacement of OTHERs                            #
                            ####################################################

                            elif not word.lower().strip(" \n\t\r") in stopwords.words('english'):

                                # take off leading brackets, commas etc...
                                word_punct_nopunct = import_utilities.strip_punctuation_bool(word)
                                word_nopunct = word_punct_nopunct['word'].strip(" \n\t\r")
                                word_punct = word_punct_nopunct['punct']
                                punct_bool = word_punct_nopunct['punct_bool']

                             

                                #######################################################
                                # MAIN EXCHANGE PROCESS CALL >>>>>>>   GET THE SYNSET #
                                #######################################################    
                                if word_nopunct[-4:].lower()=="here":
                                    similarterm=random.choice(import_utilities.heres)
                                else:
                                    #print "WORD:",word_nopunct
                                    if len(word_nopunct)>3:

                                        oscillator  = oscillator+1
                                        
                                        ############################################
                                        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                                        # STYLE SWITCH..... should in future use POS
                                        # ... i.e. if noun & oscillator%3, do...
                                        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                                        ############################################
                                        # synset
                                        similarterm = import_utilities.synset_creeley(word_nopunct)
                                        #print "synset", similarterm

                                        if similarterm is not None and similarterm == word_nopunct and len(word_nopunct)>4:
                                            #RESERVOIR.sort(key=len)
                                            poetry_mouth.sort(key=len)
                                            similarterm= poetry_mouth[idx%len(poetry_mouth)]#RESERVOIR[idx%len(RESERVOIR)]
                                            #print "NEW",idx,len(RESERVOIR),similarterm,word_nopunct,"PRE>>>>>>>>LAST CHANGE STOP: ", word, "~",similarterm

                                            

                                #######################################                      
                                # abbreviations for f*****g states!   #
                                #######################################
                                if word_nopunct.upper() in import_utilities.state_abbrev and word_nopunct.lower() not in stopwords.words('english') and "me," not in word:
                                    tmp = similarterm
                                    if word_nopunct == "oh": 
                                        similarterm = random.choice(import_utilities.exclaims)
                                    else:

                                        similarterm = random.choice(poetry_mouth)#RESERVOIR)
                                    #print word_nopunct," replaced by", tmp, "replaced with:",similarterm, "in:",line

                                ##############
                                # hyphenated #
                                ##############
                                hyp =word.split("-")
                                #print word,len(hyp)
                                if len(hyp) >1:
                                    similarterm=""
                                    for w in hyp:
                                        if len(w) > 2:
                                            if import_utilities.synset_creeley(w) is not None:
                                                similarterm +=  import_utilities.synset_creeley(w)+"-"
                                            else:
                                                similarterm += w+"-"
                                    similarterm = import_utilities.strip_underscore(similarterm[:-1])
                                    #print "hyphenated:",word,"replaced by: "+similarterm

                                
                                #########################################################    
                                # is it a TRUNCATED VERB slang as in singin or wishin   #
                                #########################################################
                                # if similarterm == word_nopunct and len(word)>2 and 'in' in word_nopunct[-2:]:
                                #     similarterm = import_utilities.synset_creeley(word_nopunct+'g')
                                #     ## #print "TRUNCATED SLANG word: '"+word+"'",similarterm
                                #     interim = import_utilities.lemma(similarterm)
                                #     ## #print interim
                                #     similarterm = import_utilities.conjugate(interim, tense=import_utilities.PARTICIPLE, parse=True)[:-1] 
                                #     # # # #print word,"widx:",widx," line_pos_tags[widx][0]:",line_pos_tags[widx][0]," line_pos_tags[widx][1]:",line_pos_tags[widx][1]
                                   

                                #################      
                                # SWEAR WORD    #
                                #################
                                ##print "at the garden of if:", word
                                if word_nopunct in import_utilities.curses:
                                    similarterm = random.choice(import_utilities.curses)
                                    ##print "SWEAR WORD word: '"+word+"'",similarterm

                                                          
                                ############################################
                                # manually get rid of some terrible choices
                                ############################################
                                naw_terms=["mind","lonely"]
                                if similarterm == "ilk":
                                    similarterm = "like"

                                if similarterm == "Nox":
                                    similarterm = "oil"

                                if similarterm == "ope":
                                    similarterm = "does"

                                if similarterm == "information technology":
                                    similarterm = "it"

                                if similarterm == "velleity":
                                    similarterm = "want"

                                if similarterm == "Crataegus laevigata":
                                    similarterm = "may"

                                if similarterm == "eff":
                                    similarterm = "know"

                                if similarterm == "naw":
                                    similarterm = "mind"

                                if similarterm == "lento":
                                    similarterm = "slow"

                                #print "SIMILAR:",similarterm

                                if similarterm is not None:
                                    if len(hyp) >1:
                                        replacement_word = similarterm
                                    else:
                                        replacement_word = word.replace(word_nopunct, similarterm)
                                        replacement_word = import_utilities.strip_underscore(replacement_word)
                                        replacement_word = import_utilities.replaceNumbers(replacement_word)
                                else:
                                    replacement_word = random.choice(poetry_mouth)#RESERVOIR)

                                ################################
                                # RESERVOIR_OF_WEIRDNESS       #
                                # create a large pool of words #
                                ################################  

                                if word_nopunct.lower() in import_utilities.impera:
                                    replacement_word=random.choice(import_utilities.impera)
                                    #print word,"IMPERA:",replacement_word
                                elif word_nopunct.lower() in import_utilities.conjuncts:
                                    replacement_word=random.choice(import_utilities.conjuncts)
                                    #print word," CONJUNCTION replaced with",replacement_word
                                elif word_nopunct.lower() in import_utilities.indef_prono:
                                    replacement_word=random.choice(import_utilities.indef_prono)
                                    #print word," INDEF_prono replaced with",replacement_word
                                elif word_nopunct.lower() in import_utilities.prepo:
                                    replacement_word=random.choice(import_utilities.prepo)
                                    #print word," prepo replaced with",replacement_word
                                elif word_nopunct.lower() in import_utilities.rel_prono:
                                    replacement_word=word
                                    #print word," rel_prono LEAVE alone: ",replacement_word
                                elif word_nopunct.lower()[-2:] =="ly":
                                    if import_utilities.synset_creeley(word) is not None:
                                        replacement_word=import_utilities.strip_underscore(import_utilities.synset_creeley(word))#(word[:-2])
                                    #print word," ADVERB: ",replacement_word
                                    # if replacement_word[-2:] !="ly":
                                    #     replacement_word +="ly"
                                                                            
                                else:
                                    if len(hyp) <2 and "like" not in word_nopunct and import_utilities.singularize(word_nopunct) ==  import_utilities.singularize(replacement_word) and word_nopunct.lower() not in import_utilities.stopwords_ls:

                                        if word not in RESERVOIR and quit_language<0 and import_utilities.countPunctuation(word)<1 and len(word_nopunct)>3 and not word_nopunct.istitle(): 
                                            
                                            #print "ADDING",word,"to reservoir"
                                            #################################################
                                            # ADDING ONLY SMALL WORDS 
                                            # & MAKING A POEM OUT OF THEM
                                            #################################################
                                            if len(word)<7 and len(word)>0:
                                                small_word = word
                                                if random.randint(0,4)==3:
                                                    small_word +="\n"
                                                #print small_word
                                                small_word +=" "
                                                SMALL_POEM+=small_word

                                                RESERVOIR.append(word)
                                                #SMALL_POEM_ALL.append(small_word)
                                            
                                            replacement_word = random.choice(poetry_mouth)#RESERVOIR)#rap_mouth)# RESERVOIR)
                                            #print word_nopunct,"replaced from reservoir with", replacement_word
                                       # print "'"+word_nopunct+"'  vs RESERVOIR  replacement_word:",replacement_word #,"    new_line:",new_line
                                if quit_language>1 and not word_nopunct.istitle():
                                    #print quit_language, "Probably foreign language: make a word salad in english"
                                    replacement_word = random.choice(poetry_mouth)#RESERVOIR)#science_mouth)#RESERVOIR)
                                    #print word_nopunct,"OTHER replaced from reservoir with", replacement_word
                                
                                ###################################################
                                # MOST REPLACEMENT occurs here...                 #
                                ###################################################
                                poem_ls = poem_replaced.split(' ')
                                idx =  poem_ls.index(word)

                                # print idx,",", poem_ls[idx],",", word ,",",replacement_word
                                #print word ," --- ",previous_replacement_word,replacement_word

                                idx_2 =  poem_ls.index(word)

                                # BUG test: is potential replacement a comma or period or empty?
                                if replacement_word.lstrip().rstrip() =="," or replacement_word.lstrip().rstrip() =="" or replacement_word.lstrip().rstrip() ==".":
                                    #print "found a comma/empty why?",replacement_word.lstrip().rstrip()
                                    replacement_word=random.choice(poetry_mouth)
                                    #print "line633 REPLACING with ",replacement_word

                                if poem_ls[idx]==word and poem_ls[idx]==replacement_word:
                                    #print "SAME idx-2 replacement_word=",replacement_word
                                    # search for same grammatical type the NLTK lists
                                    replacement_word= findSamePOS(replacement_word)
                                    #print "after findSamePOS replacement_word=",replacement_word

                                #print idx,idx_2,"  poem_ls[idx_2]=", poem_ls[idx_2],"  poem_ls[idx]=", poem_ls[idx]," word=", word ,"    replacement=",replacement_word


                                if replacement_word == "~~~~!~~~" or poem_ls[idx]==  "~~~~!~~~": 
                                    print "~~~~!~~~ FOUND ******"

                                else:
                                    if poem_ls[idx]==word:
                                        poem_ls[idx]=replacement_word
                                    if poem_ls[idx_2]==word:
                                        poem_ls[idx_2]=replacement_word
                                    poem_replaced = " ".join(poem_ls)


                                # still the same? try another game
                                if len(word)>5 and replacement_word.lstrip().rstrip() == word_nopunct.lstrip().rstrip():

                                    #################################################
                                    # since word is same as replacement, try alchemy?  
                                    #################################################
                                    
                                    #replacement_entity = findSimilarEntityinRandomJSON(content,entity['type'])

                                    # a last ditch pseudo random select 
                                    # TODO USE THE NLTK LISTS TO SELECT POS WORD
                                    # RESERVOIR.sort(key=len)
                                    # replacement_word = RESERVOIR[idx%len(RESERVOIR)]
                                    #poetry_mouth.sort(key=len)


                                    #INSERTION usi
                                    #replacement_word = random.choice(poetry_mouth)#[idx%len(poetry_mouth)]
                                    replacement_word= findSamePOS(replacement_word)
                                    #print "NEWEST POS",idx,len(poetry_mouth),"LAST CHANGE STOP: ", word, "~",replacement_word

                                # check again
                                if poem_ls[idx]==word and poem_ls[idx]==replacement_word:
                                    #print "AGAIN SAME idx replacement_word=",replacement_word
                                    replacement_word=random.choice(poetry_mouth)
                                    #print "line663 AGAIN NEW rand pf=",replacement_word

                        
                                # REPLACE (but catch for weird chars)
                                try:

                                    if poem_ls[idx]==word and "****" not in word and "." != word and "\n" not in word:

                                        # INSERTION
                                        poem_ls[idx]=replacement_word
                                        #print "line673 REPLACING",poem_ls[idx]," with ",replacement_word


                                    # REASSEMBLE the poem    
                                    poem_replaced = " ".join(poem_ls)

                                    # store this word so that conjugation can be checked 
                                    previous_replacement_word=replacement_word

                                except Exception, e:
                                    #print "PENULTIMATE SKIP_bool replace FAIL",e
                                    SKIP_bool=True
                                    continue

                    ###########################################################################
                    # testing Pattern.en as parser for conjugation and article replacement    #
                    # much more robust than my hand-coded hacks                               #        
                    ###########################################################################
                    
                    # correct CONJUGATion of paticiple verbs with pattern.en
                    parsed = parse(poem_replaced,tags = True) 
                    pre_verbal = ["'m","'s","'re"]
                    for idx,p in enumerate(parsed.split(" ")):
                        tok =p.split("/")[0]
                        typ=p.split("/")[1]
                        #print idx,tok,typ
                        if tok in pre_verbal:
                            #print "pre_verbal:",tok
                            next_word= parsed.split(" ")[idx+1].split("/")

                            # try try try
                            for ix,n in enumerate(next_word): 
                                next_word[ix] = re.sub('(' + '|'.join(import_utilities.chars.keys()) + ')', import_utilities.replace_chars, n).encode('utf-8')
                            try:
                                #print  next_word,next_word[0],next_word[1][:2]
                                # if it's a verb that follows
                                if next_word[1][:2] =="VB":
                                    before_verb = " ".join(w for w in poem_replaced.split(" ")[:idx])#.encode('utf-8')
                                    after_verb = " ".join(w for w in poem_replaced.split(" ")[idx+1:])#.encode('utf-8') 
                                    new_verb = conjugate(next_word[0], tense=PARTICIPLE, parse=True).encode('utf-8')
                                    # insert new
                                    #print "CONJUGATION needed, changing:",poem_replaced.split(" ")[idx],"to",parsed.split(" ")[idx],poem_replaced.split(" ")[idx-1]+" "+new_verb
                                    poem_replaced = before_verb+" "+new_verb+" "+after_verb
                            except Exception, e:
                                # print "INside parsed COnjugation loop",e
                                continue


                    # correct ARTICLES
                    for idx,word in enumerate(poem_replaced.split(" ")):
                        if len(word)>0 and idx != 0 and " " not in word:
                            # A or AN
                            if poem_replaced.split(" ")[idx-1].lower() =="a" or poem_replaced.split(" ")[idx-1].lower() =="an":
                                #print word,"---",article(word)+" "+word
                                before_article = " ".join(w for w in poem_replaced.split(" ")[:idx-1])
                                after_article = " ".join(w for w in poem_replaced.split(" ")[idx+1:])
                                new_conj = referenced(word)
                                # capitalize
                                if poem_replaced.split(" ")[idx-1].istitle():
                                    new_conj = new_conj.split(" ")[0].title()+" "+new_conj.split(" ")[1]
                                poem_replaced = before_article+" "+new_conj+" "+after_article

                    #########################
                    #  bug check ,,         #
                    #########################
                    poem_replaced = poem_replaced.replace(",,", ",")
                    poem_replaced = poem_replaced.replace(",.", ",")
                    poem_replaced = poem_replaced.replace(".,", ".")

                    #########################
                    #   WRITE SINGLE POEM   #
                    #########################
                    if not SKIP_bool:

                        tmp_poem=""   

                        # poem_replaced.replace("\t","&#9;")
                        # poem_replaced.replace("\n"," <br>")
                        # poem_replaced.replace("\r"," <br>")

                        HTML_poem=""
                        for line in poem_replaced.split("\n"):
                            #print "LINE", line
                            lines_total+=1
                            HTML_poem += line+"<br>"

                        if len(response) >0 and len(id.split("_"))>1:

                            ALL_poems = "<br>[ A  generated-poem based upon: <i>"+ title +"</i> by <b>"+ author+"</b>]<br><br><i>"+new_title+"</i><br> by <b>"+ new_author   +"</b><br>"+HTML_poem+ALL_poems.split("</h2>")[1].replace("  ","&nbsp")

                            tmp_poem= "\n[A generated-poem based upon: '"+ title+"' by "+ author +"]\n\n"+new_title+ "\nby "+new_author+"\n"+poem_replaced
  
                            #####################
                            #                   #
                            #                   #
                            #     PAUSE IT      #
                            #                   #
                            #                   #
                            #####################

                            if (int(sub_cnt)%int(pause_every) == 0 and int(sub_cnt) !=0):
                                time.sleep(int(sleep_time))

                            #####################
                            #                   #
                            #                   #
                            #       PRINT       #
                            #                   #
                            #                   #
                            #####################

                            print "\n~~~\n"  +tmp_poem+"\n~~~\n" 

                            # SLOW TYPEWRITER PRESENTATION
                            # for line in tmp_poem:
                            #    for c in line:
                            #         time.sleep(0.04)
                            #         sys.stdout.write(c)#(c.encode("utf8"))
                            #         sys.stdout.flush()
# 
                            #sys.stdout.write("\n")

                            txt_fn = id.split("_")[1]+"_POEMs.txt"

                            WRITE__PATH = "../../generated/poetryFoundation/"+poem_style+datetime.datetime.now().strftime('%Y-%m-%d_%H')+"/"
                            if not os.path.exists(WRITE__PATH):
                                    os.makedirs(WRITE__PATH)

                            txt_fn_path = WRITE__PATH+txt_fn
                            f_txt=open(txt_fn_path,'w')
                            f_txt.write(tmp_poem)#.encode('utf-8'))       
                            f_txt.close();   
                            #print "\nTXT file created at:",txt_fn_path

                            WRITE__PATH = "../../generated/poetryFoundation/"+poem_style+"_SMALL_POEMS"+datetime.datetime.now().strftime('%Y-%m-%d_%H')+"/"
                            if not os.path.exists(WRITE__PATH):
                                    os.makedirs(WRITE__PATH)
                            txt_fn_path = WRITE__PATH+txt_fn
                            f_txt=open(txt_fn_path,'w')
                            f_txt.write("[A generated-poem based upon: '"+ title+"' by "+ author +"]\n\n"+SMALL_POEM)#.encode('utf-8'))       
                            f_txt.close(); 
                            SMALL_POEM=""  
                            
                            #######
                            #   write them all.... wasteful... but useful if run is interrupted....
                            ###########  

                            # if cnt==1:
                            #     ALL_poems = ALL_poems_intro+ALL_poems
                            # else:
                            ALL_poems = ALL_poems_intro+ALL_poems.replace("  ","&nbsp")
                            ALL_poems = ALL_poems.replace("$$datetime$$",datetime.datetime.now().strftime('%Y-%m-%d at %H:%M'))
                            ALL_poems = ALL_poems.replace("$$cnt$$",str(cnt))
                            ALL_poems = ALL_poems.replace("$$style$$",poem_style)
                            ALL_poems = ALL_poems.replace("$$gentime$$",str(time.time() - start_time))

                            # ALL POEMS
                            txt_fn = datetime.datetime.now().strftime('%Y-%m-%d')+"_BDP_generated_"+poem_style+"_POEMS_"+str(poem_id)+".html"
                            

                            GEN_PATH = GENERATED_DIR+type_of_run+"_html/"
                            if not os.path.exists(GEN_PATH):
                                    os.makedirs(GEN_PATH)

                            txt_fn_path = GEN_PATH+txt_fn

                            f_txt=open(txt_fn_path,'w')
                            f_txt.write(ALL_poems+"</hmtl>")       
                            f_txt.close();   
                            #print "\nTXT file created at:",txt_fn_path
                        # except Exception, e:
                        #         print "At the final LOOP",e
                        #         #continue
                        #         pass


                        else:
                            pass
                            #print "~! EMPTY response:", author

                    else:
                        cnt = cnt-1