Python standardise Examples

Programming Language: Python

Namespace/Package Name: standardiser

Method/Function: standardise

Examples at hotexamples.com: 3

Python standardise - 3 examples found. These are the top rated real world Python examples of standardiser.standardise extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: document.py Project: JaDogg/__py_playground

 def __init__(self, sentenceStr, position):
     self.string = sentenceStr
     # Lowercase the first word
     if sentenceStr[0].isupper():
         letters = list(sentenceStr)
         letters[0] = letters[0].lower()
         sentenceStr = "".join(letters)
     tokens = word_tokenise(sentenceStr)
     bow = defaultdict(int)
     for token in tokens:
         term = standardise(token)
         if term:
             hashed = hash(term)
             bow[hashed] += 1
     self.bagOfWords = bow
     self.position = position

Example #2

Show file

File: document.py Project: JaDogg/__py_playground

 def loadWeights(n):
     dfLoc = localPath("wiki_doc_freqs_trim.dat")
     # Read in the document freqs.
     # Have to do this first because we collapse some freqs through
     # standardisation.
     weights = defaultdict(int)
     for line in utf8open(dfLoc):
         term, freq = line.split("\t")
         term = standardise(term)
         if term:
             weights[hash(term)] += int(freq)
     # Turn the frequencies into IDF weights.
     for term, freq in weights.items():
         idf = log(n / freq, 10)
         weights[term] = int(idf)
     IDFWeightedDocument.weights = weights

Example #3

Show file

File: summariser.py Project: JaDogg/__py_playground

 def _loadIDFs(self, n):
     dfLoc = localPath('wiki_doc_freqs_trim.dat')
     dfs = collections.defaultdict(int)
     # Convenience for codecs.open.
     lines = utf8open(dfLoc).read().strip().split('\n')
     # Read in the document freqs.
     # Have to do this first because we collapse some freqs
     # through standardisation.
     for line in lines:
         token, freq = line.split('\t')
         token = standardise(token)
         if token:
             dfs[token] += int(freq)
     # Turn the frequencies into IDF weights.
     idfs = collections.defaultdict(float)
     for token, freq in dfs.items():
         idf = log(n/freq, 10)
         idfs[token] = idf
     return idfs