Ejemplo n.º 1
0
 def generate(self):
     """
     Generates text using provided source text data
     """
     if self.data is not None:
         with open(self.data, 'r') as data_file:
             text = data_file.read()
             tagged = StanfordTagger(verbose=True).tag(text)
         if self.save_tagged_data:
             save_file = self.data + ".tags"
             with open(self.data + ".tags", 'w') as save_file:
                 save = csv.writer(save_file)
                 save.writerows(tagged)
     elif self.tagged_data is not None:
         with open(self.tagged_data, 'r') as data_file:
             tagged = csv.reader(data_file)
             tagged = [tuple(row) for row in tagged]
     detok = StanfordDetokenizer()
     model = LVGNgramGenerator(tagged, self.n_gram)
     methods = {'b': model.generate_without_pos, 'n': model.generate, 't': model.generate_alternative}
     while True:
         num_words = input('\nEnter the length in words to generate (or "q" to exit): ')
         if num_words.isdigit():
             method = input('Enter a generation method {b: baseline, n: normal, t: tuned}: ')
             if method in methods:
                 print("\n\t" + detok.detokenize(' '.join(methods[method](int(num_words)))))
         elif num_words == 'q':
             break
Ejemplo n.º 2
0
def yelp_or_gutenberg(file: str, n: int, words: int, method: str):
    ''' Returns the list of generated sentences
       file -> name of file
       n -> number of n-grams
       words -> how many sentences to generate
       method -> method used to generate
    '''
    file = open(file, 'r')
    tag = csv.reader(file)
    tagged = []
    try:
        for row in tag:
            tagged.append(tuple(row))
    except:
        pass
    file.close()
    detok = StanfordDetokenizer()
    model = LVGNgramGenerator(tagged, n)
    methods = {'b': model.generate_without_pos, 'n': model.generate, 't': model.generate_alternative}
    x = 0
    result = []
    final = []    
    while x < words:
        num_words = str(random.randint(20, 65))
        if num_words.isdigit():
            if method in methods:
                result.append(detok.detokenize(' '.join(methods[method](int(num_words)))))
        x += 1
    for i in result:
        if i.startswith("forced"):
            pass
        else:
            final.append(i)
    return final
        
Ejemplo n.º 3
0
def twitter(file: str, n: int, words: int, method: str):
    ''' Returns the list of generated tweets
       file -> name of file
       n -> number of n-grams
       words -> how many tweets to generate
       method -> method used to generate
    '''
    file = open(file, 'r')
    text = file.read()
    tagged = StanfordTagger(verbose=True).tag(text)
    file.close()

    detok = StanfordDetokenizer()
    model = LVGNgramGenerator(tagged, n)
    methods = {'b': model.generate_without_pos, 'n': model.generate, 't': model.generate_alternative}
    result = []
    x = 0
    while x < words:
        num_words = str(random.randint(20, 65))
        if num_words.isdigit():
            if method in methods:
                result.append(detok.detokenize(' '.join(methods[method](int(num_words)))))
        x += 1
    final = []
    for i in result:
        if i.startswith("forced"):
            pass
        else:
            final.append(i)
    return final
Ejemplo n.º 4
0
def yelp_or_gutenberg(file: str, n: int, words: int, method: str):
    ''' Returns the list of generated sentences
       file -> name of file
       n -> number of n-grams
       words -> how many sentences to generate
       method -> method used to generate
    '''
    file = open(file, 'r')
    tag = csv.reader(file)
    tagged = []
    try:
        for row in tag:
            tagged.append(tuple(row))
    except:
        pass
    file.close()
    detok = StanfordDetokenizer()
    model = LVGNgramGenerator(tagged, n)
    methods = {'b': model.generate_without_pos, 'n': model.generate, 't': model.generate_alternative}
    x = 0
    result = []
    final = []    
    while x < words:
        num_words = str(random.randint(20, 65))
        if num_words.isdigit():
            if method in methods:
                result.append(detok.detokenize(' '.join(methods[method](int(num_words)))))
        x += 1
    for i in result:
        if i.startswith("forced"):
            pass
        else:
            final.append(i)
    return final