def _main():
    a = stdio.readAllStrings()
    reverse(a)
    for v in a[:-1]:
        stdio.writef('%s ', v)
    stdio.writeln(a[-1])
コード例 #2
0
ファイル: word_frequencies.py プロジェクト: Dpinchuk20/CS110
def _main():
    words = stdio.readAllStrings()
    write_word_frequencies(count_word_frequencies(words))
コード例 #3
0
ファイル: merge.py プロジェクト: davidhuizhou/python
def main():
    a = stdio.readAllStrings()
    sort(a)
    for s in a:
        stdio.write(s + ' ')
    stdio.writeln()
コード例 #4
0
def main():
    a = stdio.readAllStrings()
    sort(a)
    for s in a:
        stdio.write(s + ' ')
    stdio.writeln()
コード例 #5
0
#-----------------------------------------------------------------------
# frequencycount.py
#-----------------------------------------------------------------------

import sys
import stdio
from counter import Counter

# Read words from standard input, and write the frequency counts
# of the words to standard output.

words = stdio.readAllStrings()

# Previous doesn't eliminate punctuation chars, but this does:
#   import re
#   s = stdio.readAll()
#   regExp = re.compile(r'\w+') # One or more alphanumeric chars
#   words = regExp.findall(s)

words.sort()  # or merge.sort(words)

zipf = []
for i in range(len(words)):
    if (i == 0) or (words[i] != words[i - 1]):
        entry = Counter(words[i], len(words))
        zipf += [entry]
    zipf[len(zipf) - 1].increment()

zipf.sort()  # or merge.sort(zipf)
zipf.reverse()
for entry in zipf:
コード例 #6
0
import stdio
from instream import InStream
from sketch import Sketch

#-----------------------------------------------------------------------

# Accept integers k and d as command-line arguments. Read a document
# list from standard input, compute profiles based on k-gram
# frequencies for all the documents, and write a matrix of similarity
# measures between all pairs of documents. d is the dimension of the
# profiles.

k = int(sys.argv[1])
d = int(sys.argv[2])

filenames = stdio.readAllStrings()
sketches = stdarray.create1D(len(filenames))

for i in range(len(filenames)):
    text = InStream(filenames[i]).readAll()
    sketches[i] = Sketch(text, k, d)
    
stdio.write('    ')
for i in range(len(filenames)):
    stdio.writef('%8.4s', filenames[i])
stdio.writeln()

for i in range(len(filenames)):
    stdio.writef('%.4s', filenames[i])
    for j in range(len(filenames)):
        stdio.writef('%8.2f', sketches[i].similarTo(sketches[j]))
コード例 #7
0
ファイル: index.py プロジェクト: davidhuizhou/python
import sys
import stdio
from bst import OrderedSymbolTable

# Accept integers minLength and minCount as command-line arguments. Read
# words from standard input until end-of-file. Create an index
# indicating where each word appears within standard input. Consider
# only words that have at least minLength characters. Then write
# the index to standard output. Write only words that occur at
# least minCount times.

minLength = int(sys.argv[1])
minCount = int(sys.argv[2])

words = stdio.readAllStrings()

bst = OrderedSymbolTable()

for i in range(len(words)):
    word = words[i]
    if len(word) >= minLength:
        if not word in bst:
            bst[word] = []
        bst[word] += [i]

for word in bst:
    occurrences = bst[word]
    if len(occurrences) >= minCount:
        stdio.write(word + ': ')
        for occurrence in occurrences:
コード例 #8
0
import stdio
from instream import InStream
from sketch import Sketch

#-----------------------------------------------------------------------

# Accept integers k and d as command-line arguments. Read a document
# list from standard input, compute profiles based on k-gram
# frequencies for all the documents, and write a matrix of similarity
# measures between all pairs of documents. d is the dimension of the
# profiles.

k = int(sys.argv[1])
d = int(sys.argv[2])

filenames = stdio.readAllStrings()
sketches = stdarray.create1D(len(filenames))

for i in range(len(filenames)):
    text = InStream(filenames[i]).readAll()
    sketches[i] = Sketch(text, k, d)

stdio.write('    ')
for i in range(len(filenames)):
    stdio.writef('%8.4s', filenames[i])
stdio.writeln()

for i in range(len(filenames)):
    stdio.writef('%.4s', filenames[i])
    for j in range(len(filenames)):
        stdio.writef('%8.2f', sketches[i].similarTo(sketches[j]))