Ejemplos de getTextFromFile en Python

Lenguaje de programación: Python

Namespace/Package Name: corpus

Método / Función: getTextFromFile

Ejemplos en hotexamples.com: 4

Python getTextFromFile - 4 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de corpus.getTextFromFile extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Ejemplo n.º 1

Mostrar archivo

Archivo: generateFRQP.py Proyecto: dcavar/Py3L

#!/usr/bin/env python3
# -*- coding: UTF-8 -*-

from math import log


#import corpus
from corpus import getTextFromFile, makeFrequencyProfile, tokenize, relativizeFP

mydict = makeFrequencyProfile( tokenize( getTextFromFile("pg873.txt") ) )   
relativizeFP(mydict)

#for key in mydict:
#   print(key, mydict[key], sep="\t")

mysportsdict = makeFrequencyProfile( tokenize( getTextFromFile("sports.txt") ) )
relativizeFP(mysportsdict)

unktokens = tokenize("""
The young King was eating pomegranates and talking about his soul and other emotional issues.
""")

probpomeg = 0.0
probsports = 0.0
for token in unktokens:
   probpomeg += log(mydict.get(token, 0.00000000000001))
   probsports += log(mysportsdict.get(token, 0.00000000000001))

if probpomeg > probsports:
   print("This text is probably House of Pomeg.")
else:

Ejemplo n.º 2

Mostrar archivo

Archivo: list-loop-1.py Proyecto: dcavar/Py3L

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

from corpus import relativizeFP, getTextFromFile, tokenize, removeJunk
from operator import itemgetter



#mylist = [ "A", "B", "C", "D", "E", "A", "B", "C" ]

mytokens = tokenize(getTextFromFile("pg873.txt"))


# use this:

#junk = " \n\t"
#mynewtokens = []
#for x in mytokens:
#   if x in junk:
#      continue
#   mynewtokens.append(x)
#mytokens = mynewtokens[:]

# or this:
mytokens = [e for e in mytokens if e not in junk]



def getMeTheNGramModel(tokens, n):
   mydict = {}
   position = 0

Ejemplo n.º 3

Mostrar archivo

Archivo: collocations1.py Proyecto: dcavar/Py3L

    'but', 'by', 'can', 'cannot', 'could', 'dear', 'did', 'do', 'does',
    'either', 'else', 'ever', 'every', 'for', 'from', 'get', 'got', 'had',
    'has', 'have', 'he', 'her', 'hers', 'him', 'his', 'how', 'however', 'i',
    'if', 'in', 'into', 'is', 'it', 'its', 'just', 'least', 'let', 'like',
    'likely', 'may', 'me', 'might', 'most', 'must', 'my', 'neither', 'no',
    'nor', 'not', 'of', 'off', 'often', 'on', 'only', 'or', 'other', 'our',
    'own', 'rather', 'said', 'say', 'says', 'she', 'should', 'since', 'so',
    'some', 'than', 'that', 'the', 'their', 'them', 'then', 'there', 'these',
    'they', 'this', 'tis', 'to', 'too', 'twas', 'us', 'wants', 'was', 'we',
    'were', 'what', 'when', 'where', 'which', 'while', 'who', 'whom', 'why',
    'will', 'with', 'would', 'yet', 'you', 'your'
]
stopwordsEN = stopwordsEN + [x.capitalize() for x in stopwordsEN]
#print(stopwordsEN)

mytokens = tokenize(getTextFromFile("pg873.txt"))

# filter out empty string tokens
mytokens = [x for x in mytokens if x]
#print(mytokens)

# filter out stopwords
mytokens = [x for x in mytokens if x not in stopwordsEN]
#print(mytokens)

unigrams = getNGramModel(mytokens, 1)
bigrams = getNGramModel(mytokens, 2)

#print(unigrams)

# prettyPrintFRP(bigrams, myreverse=False)

Ejemplo n.º 4

Mostrar archivo

Archivo: spamFrequency.py Proyecto: colinarobinson/WatermelonChaser

#!/usr/bin/env python3


from corpus import getTextFromFile, tokenize, makeFrequencyProfile, removeJunk, prettyPrintFRP


for x in range (1,6):
    loadSpam.split_data( x , 5, spamPath)

for file in spamList:
    mytokens = tokenize(getTextFromFile(file) )

mydict = makeFrequencyProfile(mytokens)

junk = " ,;:-+=()[]'\"?!%.<>"

removeJunk(mydict, junk)

if "" in mydict:
   del mydict[""]

prettyPrintFRP (mydict)