def _main(): a = stdio.readAllStrings() reverse(a) for v in a[:-1]: stdio.writef('%s ', v) stdio.writeln(a[-1])
def _main(): words = stdio.readAllStrings() write_word_frequencies(count_word_frequencies(words))
def main(): a = stdio.readAllStrings() sort(a) for s in a: stdio.write(s + ' ') stdio.writeln()
#----------------------------------------------------------------------- # frequencycount.py #----------------------------------------------------------------------- import sys import stdio from counter import Counter # Read words from standard input, and write the frequency counts # of the words to standard output. words = stdio.readAllStrings() # Previous doesn't eliminate punctuation chars, but this does: # import re # s = stdio.readAll() # regExp = re.compile(r'\w+') # One or more alphanumeric chars # words = regExp.findall(s) words.sort() # or merge.sort(words) zipf = [] for i in range(len(words)): if (i == 0) or (words[i] != words[i - 1]): entry = Counter(words[i], len(words)) zipf += [entry] zipf[len(zipf) - 1].increment() zipf.sort() # or merge.sort(zipf) zipf.reverse() for entry in zipf:
import stdio from instream import InStream from sketch import Sketch #----------------------------------------------------------------------- # Accept integers k and d as command-line arguments. Read a document # list from standard input, compute profiles based on k-gram # frequencies for all the documents, and write a matrix of similarity # measures between all pairs of documents. d is the dimension of the # profiles. k = int(sys.argv[1]) d = int(sys.argv[2]) filenames = stdio.readAllStrings() sketches = stdarray.create1D(len(filenames)) for i in range(len(filenames)): text = InStream(filenames[i]).readAll() sketches[i] = Sketch(text, k, d) stdio.write(' ') for i in range(len(filenames)): stdio.writef('%8.4s', filenames[i]) stdio.writeln() for i in range(len(filenames)): stdio.writef('%.4s', filenames[i]) for j in range(len(filenames)): stdio.writef('%8.2f', sketches[i].similarTo(sketches[j]))
import sys import stdio from bst import OrderedSymbolTable # Accept integers minLength and minCount as command-line arguments. Read # words from standard input until end-of-file. Create an index # indicating where each word appears within standard input. Consider # only words that have at least minLength characters. Then write # the index to standard output. Write only words that occur at # least minCount times. minLength = int(sys.argv[1]) minCount = int(sys.argv[2]) words = stdio.readAllStrings() bst = OrderedSymbolTable() for i in range(len(words)): word = words[i] if len(word) >= minLength: if not word in bst: bst[word] = [] bst[word] += [i] for word in bst: occurrences = bst[word] if len(occurrences) >= minCount: stdio.write(word + ': ') for occurrence in occurrences: