Ejemplo n.º 1
0
    def parse_file(self, fname):
                
        if '://' in fname:
            print "Recuperando archivo, puede tardar..."
            arch = open_as_mozilla(fname)
        else:
            arch = open(fname)
            
        for linea in arch:
            self.parse_una_cadena(linea)

        arch.close()
        self.measure()
Ejemplo n.º 2
0
def corpus_compiler(lista):
    out = ''
    for fn in lista:
        print "abriendo %s" % (fn[fn.rfind("/") + 1:])
        arch = open_as_mozilla(fn)
        out += arch.read()
    return out


#esfiles = ['http://www.gutenberg.org/cache/epub/33885/pg33885.txt', 
#           'http://www.gutenberg.org/cache/epub/26508/pg26508.txt', 
#           'http://www.gutenberg.org/cache/epub/26231/pg26231.txt']
#
#
#defiles = ['http://www.gutenberg.org/files/14225/14225-0.txt', 
#           'http://www.gutenberg.org/files/16880/16880-0.txt',
#           'http://www.gutenberg.org/cache/epub/39669/pg39669.txt']
#
#enfiles = ['http://www.gutenberg.org/dirs/etext05/cfgsh10.txt',
#           'http://www.gutenberg.org/cache/epub/76/pg76.txt', 
#           'http://www.gutenberg.org/cache/epub/1661/pg1661.txt']