예제 #1
0
def preprocess_file(file_name):
    sf = open(server.short_file_path(file_name), 'w')
    output_file = open(server.count_file_path(file_name), 'w')
    z = zipfile.ZipFile(server.zip_file_path(file_name), 'r')
    internal_name = z.namelist()[0]
    f = z.open(internal_name)

    word = ''
    word2 = ''
    count = 0
    count_pair = 0

    for line in f:
      tokens = line.split()
      if tokens[0] != word:
        append_word_to_file(output_file, word, count)
        append_word_to_short_file(sf, word, word2, count_pair)
        count = 0
        word = tokens[0]
        word2 = tokens[1]
        count_pair = 0
      elif tokens[1] != word2:
        append_word_to_short_file(sf, word, word2, count_pair)
        word2 = tokens[1]
        count_pair = 0
      count += int(tokens[3])
      count_pair += int(tokens[3])

    append_word_to_file(output_file, word, count)
    append_word_to_short_file(sf, word, word2, count_pair)
          
    f.close()
    sf.close()
    z.close()
    output_file.close()
예제 #2
0
def get_counts_from_file(file_name, word):
  file = open(server.count_file_path(file_name), 'r')
  count = 0
  for line in file:
    (candidate_word, candidate_count) = line.split()
    if candidate_word == word:
      count += int(candidate_count)
    elif is_word_less_than_candidate(word, candidate_word):
      break
  file.close()
  return count