Ejemplos de SpellSuggestion en Python

Lenguaje de programación: Python

Namespace/Package Name: eWRT.stat.string.spelling

Clase / Tipo: SpellSuggestion

Ejemplos en hotexamples.com: 9

Python SpellSuggestion - 9 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de eWRT.stat.string.spelling.SpellSuggestion extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

replace(3)

SpellSuggestion(2)

correct(1)

train(1)

union(1)

verbose(1)

words(1)

Ejemplo n.º 1

Mostrar archivo

Archivo: text.py Proyecto: project-asap/ewrt

    def __init__(self, s=None):
        """ @param[in] s optional SpellSuggestion object to use
                         for the spell checking 
        """
        WordCleanupModule.__init__(self)

        if s == None:
            self.s = SpellSuggestion()
        else:
            self.s = s

Ejemplo n.º 2

Mostrar archivo

Archivo: text.py Proyecto: k3njiy/ewrt

    def __init__(self, s=None):
        """ @param[in] s optional SpellSuggestion object to use
                         for the spell checking 
        """
        WordCleanupModule.__init__(self)

        if s==None:
            self.s = SpellSuggestion()
        else:
            self.s = s

Ejemplo n.º 3

Mostrar archivo

Archivo: text.py Proyecto: project-asap/ewrt

class FixSpelling(WordCleanupModule):
    """ @class FixSpelling 
        fixes spelling mistakes """
    def __init__(self, s=None):
        """ @param[in] s optional SpellSuggestion object to use
                         for the spell checking 
        """
        WordCleanupModule.__init__(self)

        if s == None:
            self.s = SpellSuggestion()
        else:
            self.s = s

    def __call__(self, l):
        return [self.s.correct(w)[1] for w in l]

    def numMistakesFixed(self, l):
        """ @returns the number of mistakes fixed by the
                     spelling module """
        return len([True for w in l if self.s.correct(w)[1] != w])

Ejemplo n.º 4

Mostrar archivo

Archivo: text.py Proyecto: k3njiy/ewrt

class FixSpelling(WordCleanupModule):
    """ @class FixSpelling 
        fixes spelling mistakes """

    def __init__(self, s=None):
        """ @param[in] s optional SpellSuggestion object to use
                         for the spell checking 
        """
        WordCleanupModule.__init__(self)

        if s==None:
            self.s = SpellSuggestion()
        else:
            self.s = s

    def __call__(self, l):
        return [ self.s.correct(w)[1] for w in l ]

    def numMistakesFixed(self, l):
        """ @returns the number of mistakes fixed by the
                     spelling module """
        return len( [ True for w in l if self.s.correct(w)[1] != w ] )

Ejemplo n.º 5

Mostrar archivo

Archivo: ontology-stats.py Proyecto: k3njiy/ewrt

from rdflib import Namespace
from collections import defaultdict
from itertools import izip_longest
from operator import itemgetter
from csv import writer

# a directory containing all cxl ontology files
ONTOLOGY_DIR = "/home/albert/data/ac/research/inwork/pakdd2011-ontology-evaluation/data/ontologies/risk/week2"

# required namespaces
NS_RDFS = Namespace("http://www.w3.org/2000/01/rdf-schema#")
NS_WL   = Namespace("http://www.weblyzard.com/2005/03/31/wl#")

# cleanup pipeline
CUSTOM_RISK_CORPUS = "risk-corpus.text.bz2"
s = SpellSuggestion()
s.verbose=True
s.train( SpellSuggestion.words( BZ2File( CUSTOM_RISK_CORPUS ).read() ) )

# compile cleanup queue

strCleanupPipe = (lambda s:s.replace(u'\xd7', " "), unicode.lower, RemovePossessive(), FixDashSpace() )
phrCleanupPipe = (SplitEnumerations(), SplitMultiTerms(), SplitBracketExplanations() )
fs = FixSpelling(s)
wrdCleanupPipe = (fs, RemovePunctationAndBrackets(),)
phraseCleanup = PhraseCleanup(strCleanupPipe, phrCleanupPipe, wrdCleanupPipe )


def extractSPO(rdfOntology):
    """ extracts a set of all relations present in the given ontology
        @param[in] rdfOntology    the rdflib.Graph object representing the ontology

Ejemplo n.º 6

Mostrar archivo

Archivo: create-hybrid-ongology.py Proyecto: project-asap/ewrt

from collections import defaultdict
from itertools import izip_longest
from operator import itemgetter
from csv import writer

# a directory containing all cxl ontology files
ONTOLOGY_DIR            = "/home/albert/data/ac/research/inwork/pakdd2011-ontology-evaluation/data/ontologies/risk/week2"
IMPORTANT_CONCEPTS_LIST = "top-terms.text"

# required namespaces
NS_RDFS = Namespace("http://www.w3.org/2000/01/rdf-schema#")
NS_WL   = Namespace("http://www.weblyzard.com/2005/03/31/wl#")

# cleanup pipeline
CUSTOM_RISK_CORPUS = "risk-corpus.text.bz2"
s = SpellSuggestion()
s.verbose=True
s.train( SpellSuggestion.words( BZ2File( CUSTOM_RISK_CORPUS ).read() ) )

# compile cleanup queue

strCleanupPipe = (lambda s:s.replace(u'\xd7', " "), unicode.lower, RemovePossessive(), FixDashSpace() )
phrCleanupPipe = (SplitEnumerations(), SplitMultiTerms(), SplitBracketExplanations() )
fs = FixSpelling(s)
wrdCleanupPipe = (fs, RemovePunctationAndBrackets(),)
phraseCleanup = PhraseCleanup(strCleanupPipe, phrCleanupPipe, wrdCleanupPipe )


def extractSPO(rdfOntology):
    """ extracts a set of all relations present in the given ontology
        @param[in] rdfOntology    the rdflib.Graph object representing the ontology

Ejemplo n.º 7

Mostrar archivo

Archivo: ontology-stats.py Proyecto: yaniamac/ewrt

from csv import writer

from eWRT.input.conv.cxl import XCL2RDF
from eWRT.input.clean.text import *
from eWRT.stat.string.spelling import SpellSuggestion

# a directory containing all cxl ontology files
# ONTOLOGY_DIR = "/home/albert/data/ac/research/inwork/pakdd2011-ontology-evaluation/data/ontologies/risk/week2"

# required namespaces
NS_RDFS = Namespace("http://www.w3.org/2000/01/rdf-schema#")
NS_WL = Namespace("http://www.weblyzard.com/2005/03/31/wl#")

# cleanup pipeline
CUSTOM_RISK_CORPUS = "risk-corpus.text.bz2"
s = SpellSuggestion()
s.verbose = True
s.train(SpellSuggestion.words(BZ2File(CUSTOM_RISK_CORPUS).read()))

# compile cleanup queue

strCleanupPipe = (lambda s: s.replace(u'\xd7', " "), str.lower,
                  RemovePossessive(), FixDashSpace())
phrCleanupPipe = (SplitEnumerations(), SplitMultiTerms(),
                  SplitBracketExplanations())
fs = FixSpelling(s)
wrdCleanupPipe = (
    fs,
    RemovePunctationAndBrackets(),
)
phraseCleanup = PhraseCleanup(strCleanupPipe, phrCleanupPipe, wrdCleanupPipe)

Ejemplo n.º 8

Mostrar archivo

import logging
logging.basicConfig(filename="/tmp/termtest.log",level=logging.DEBUG)
log = logging.getLogger("geoTEF.examples.ontology.termTest")

NS_RDFS = Namespace("http://www.w3.org/2000/01/rdf-schema#")
NS_WL   = Namespace("http://www.weblyzard.com/2005/03/31/wl#")

SOURCE_DIR         = "./source"
RESULT_DIR         = "./result"
CUSTOM_RISK_CORPUS = "risk-corpus.text.bz2"

# basic concept cleanup
cleanup = lambda c: " ".join(c.replace("'s", "").split() ).lower()

# compile customized spelling suggestions
s = SpellSuggestion()
s.verbose=True
s.train( SpellSuggestion.words( BZ2File( CUSTOM_RISK_CORPUS ).read() ) )

# compile cleanup queue

strCleanupPipe = (unicode.lower, RemovePossessive(), FixDashSpace() )
phrCleanupPipe = (SplitEnumerations(), SplitMultiTerms(), SplitBracketExplanations() )
wrdCleanupPipe = (FixSpelling(), RemovePunctationAndBrackets(),)
phraseCleanup = PhraseCleanup(strCleanupPipe, phrCleanupPipe, wrdCleanupPipe )

def extractConceptSet(rdfOntology):
    """ extracts a set of all concepts present in the given ontology
        @param[in] rdfOntology    the rdflib.Graph object representing the ontology
        @returns a set of all concepts present in the given ontology 
    """

Ejemplo n.º 9

Mostrar archivo

Archivo: ontology-evaluator.py Proyecto: AlbertWeichselbraun/geoTEF

# logging
import logging
logging.basicConfig(filename="/tmp/evaluator.log", filemode="w", level=logging.DEBUG)
log = logging.getLogger("geoTEF.examples.ontology.evaluator")

NS_RDFS = Namespace("http://www.w3.org/2000/01/rdf-schema#")
NS_WL   = Namespace("http://www.weblyzard.com/2005/03/31/wl#")

SOURCE_DIR = "./source"
RESULT_DIR = "./result"
CUSTOM_RISK_CORPUS = "risk-corpus.text.bz2"
PMI_CUTOFF_LEVEL   = 0.5

# compile customized spelling suggestions
s = SpellSuggestion()
s.verbose=True
s.train( SpellSuggestion.words( BZ2File( CUSTOM_RISK_CORPUS ).read() ) )

# compile cleanup queue

cleanXA = lambda x: x.replace("&#xa;", " ") # cleans up unicode characters used in the concept names

strCleanupPipe = (unicode.lower, cleanXA, RemovePossessive(), FixDashSpace() )
phrCleanupPipe = (SplitEnumerations(), SplitMultiTerms(), SplitBracketExplanations() )
wrdCleanupPipe = (FixSpelling(), RemovePunctationAndBrackets(),)
phraseCleanup = PhraseCleanup(strCleanupPipe, phrCleanupPipe, wrdCleanupPipe )


def extractSPO(rdfOntology):
    """ extracts a set of all relations present in the given ontology