Esempio n. 1
0
 def test_tabs(self):
     self.assertEqual(
         count_words('rah rah ah ah ah\troma roma ma\tga ga oh la la\t'
                     'want your bad romance'),
         {'rah': 2, 'ah': 3, 'roma': 2, 'ma': 1, 'ga': 2, 'oh': 1, 'la': 2,
          'want': 1, 'your': 1, 'bad': 1, 'romance': 1}
     )
 def test_word_count_passage(self):
     word = ("The number of orderings of the 52 cards in a deck of cards "
             "is so great that if every one of the almost 7 billion people alive "
             "today dealt one ordering of the cards per second, it would take "
             "2.5 * 10**40 times the age of the universe to order the cards in every "
             "possible way.")
     result = word_count.count_words(word)
     self.assertEqual(result, 56)
Esempio n. 3
0
    def callback(ch, method, properties, body):
        print " [x] Received %r" % (body)
        document_meta = json.loads(body)
        start = datetime.datetime.now()


        print '     Fetching from S3'
        s3_key = document_meta['s3_key']
        s3_path = get_s3_path(s3_key)
        response = urllib2.urlopen(s3_path)
        text_blob = response.read()

        word_count = count_words(text_blob)

        update_file_upload_meta(
            document_slug = document_meta['document_slug'],
            time_uploaded = document_meta['time_uploaded'],
            word_counts = json.dumps(word_count),
        )
        duration = datetime.datetime.now() - start
        print '     Done in {0}!'.format(duration)
Esempio n. 4
0
 def test_alternating_word_separators_not_detected_as_a_word(self):
     self.assertEqual(
         count_words(",\n,one,\n ,two \n 'three'"), {"one": 1, "two": 1, "three": 1}
     )
Esempio n. 5
0
 def test_include_numbers(self):
     self.assertEqual(
         count_words("testing, 1, 2 testing"), {"testing": 2, "1": 1, "2": 1}
     )
Esempio n. 6
0
 def test_with_apostrophes(self):
     self.assertEqual(
         count_words("First: don't laugh. Then: don't cry."),
         {"first": 1, "don't": 2, "laugh": 1, "then": 1, "cry": 1},
     )
Esempio n. 7
0
 def test_multiple_occurrences_of_a_word(self):
     self.assertEqual(
         count_words("one fish two fish red fish blue fish"),
         {"one": 1, "fish": 4, "two": 1, "red": 1, "blue": 1},
     )
Esempio n. 8
0
 def test_handles_expanded_lists(self):
     self.assertEqual(
         count_words("one,\ntwo,\nthree"), {"one": 1, "two": 1, "three": 1}
     )
 def test_word_count_one_word(self):
     word = 'I'
     result = word_count.count_words(word)
     self.assertEqual(result, 1)
Esempio n. 10
0
 def test_non_alphanumeric(self):
     self.assertEqual(
         count_words("hey,my_spacebar_is_broken"),
         {"hey": 1, "my": 1, "spacebar": 1, "is": 1, "broken": 1},
     )
Esempio n. 11
0
 def test_multiple_occurrences_of_a_word(self):
     self.assertEqual(
         count_words('one fish two fish red fish blue fish'),
         {'one': 1, 'fish': 4, 'two': 1, 'red': 1, 'blue': 1}
     )
Esempio n. 12
0
 def test_handles_cramped_lists(self):
     self.assertEqual(
         count_words('one,two,three'),
         {'one': 1, 'two': 1, 'three': 1}
     )
Esempio n. 13
0
File: io.py Progetto: Oldsooh/Python
# with open('standard library/test_files/pi_digits.txt') as file_object:
#     for line in file_object:
#         print(line.rstrip())

from word_count import count_words

file_name = r'standard library/test_files/write_result.txt'
# with open(file_name, 'w') as file_object:
#     file_object.write('Test string\n')
#     file_object.write('The sceond line\n')


words_count = count_words(file_name)

print('The total words count: ' + str(words_count))
Esempio n. 14
0
 def test_count_one_of_each_word(self):
     self.assertEqual(
         count_words('one of each'),
         {'one': 1, 'of': 1, 'each': 1}
     )
 def test_multiple_spaces_not_detected_as_a_word(self):
     self.assertEqual(count_words(' multiple   whitespaces'), {
         'multiple': 1,
         'whitespaces': 1
     })
 def test_count_one_word(self):
     self.assertEqual(count_words('word'), {'word': 1})
 def test_normalize_case(self):
     self.assertEqual(count_words('go Go GO Stop stop'), {
         'go': 3,
         'stop': 2
     })
Esempio n. 18
0
 def test_multiple_spaces_not_detected_as_a_word(self):
     self.assertEqual(count_words(" multiple   whitespaces"), {
         "multiple": 1,
         "whitespaces": 1
     })
Esempio n. 19
0
 def test_handles_expanded_lists(self):
     self.assertEqual(
         count_words('one,\ntwo,\nthree'),
         {'one': 1, 'two': 1, 'three': 1}
     )
 def _calculate_progress(self, dir_filter = script_analytics.DEFAULT_FILTER):
   if self._running:
     return
   
   self._running  = True
   self._canceled = False
   
   self.ui.lblResults.setText("<center><b>Results</b></center>")
   
   start_time = time.time()
   
   self.ui.progressBar.setMaximum(72000)
   self.ui.progressBar.setValue(0)
   
   # For our dupe database, we need the relative location of our files, not absolute.
   dir_start = len(common.editor_config.data01_dir) + 1
   
   total_files = 0
   unique_files = 0
   translated_files = 0
   translated_unique = 0
   
   total_chars = 0
   unique_chars = 0
   translated_chars = 0
   translated_unique_chars = 0
   
   translated_words = 0
   translated_unique_words = 0
   
   total_bytes = 0
   unique_bytes = 0
   translated_bytes = 0
   translated_unique_bytes = 0
   
   groups_seen = set()
   files_seen = set()
   
   untranslated_lines = []
   
   next_update = UPDATE_INTERVAL
   
   for i, total, filename, data in script_analytics.SA.get_data(dir_filter):
     if self._canceled:
       self._running  = False
       self._canceled = False
       self.ui.progressBar.setValue(0)
       self.ui.lblTimeElapsed.setText("00:00")
       return
     
     if i >= next_update:
       self.ui.progressBar.setValue(i)
       self.ui.progressBar.setMaximum(total)
       self.ui.lblTimeElapsed.setText("%02d:%02d" % (divmod(time.time() - start_time, 60)))
       QtGui.QApplication.processEvents()
       next_update = i + UPDATE_INTERVAL
     
     if data == None:
       continue
     
     db_name   = filename
     real_name = os.path.join(common.editor_config.data01_dir, filename)
     
     if db_name in files_seen:
       continue
     
     dupe_group = dupe_db.db.group_from_file(db_name)
     
     # Add the whole group to the translated files, but only one
     # to the unique translated. If there is no group, it's size 1.
     group_size = 1
     
     if not dupe_group == None:
       if dupe_group in groups_seen:
         continue
       else:
         groups_seen.add(dupe_group)
         group_files = dupe_db.db.files_in_group(dupe_group)
         group_files = filter(dir_filter.search, group_files)
         group_size  = len(group_files)
         files_seen.update(group_files)
     
     total_files += group_size
     unique_files += 1
     
     #file = script_for_counting(data)
     file = data
     
     # How many characters is the untranslated, non-tagged text?
     num_chars = len(file.notags[common.editor_config.lang_orig])
     #num_bytes = len(bytearray(file.notags[common.editor_config.lang_orig], encoding = "SJIS", errors = "replace"))
     
     total_chars  += num_chars * group_size
     unique_chars += num_chars
     
     #total_bytes  += num_bytes * group_size
     #unique_bytes += num_bytes
     
     if not file.notags[common.editor_config.lang_trans] == "" or num_chars == 0:
       translated_files  += group_size
       translated_unique += 1
       
       translated_chars        += num_chars * group_size
       translated_unique_chars += num_chars
       
       words = count_words(file.notags[common.editor_config.lang_trans])
       translated_words        += words * group_size
       translated_unique_words += words
       
       #translated_bytes        += num_bytes * group_size
       #translated_unique_bytes += num_bytes
     
     #elif file.notags[common.editor_config.lang_trans] == "":
       #untranslated_lines.append(db_name)
   
   # progress.close()
   self.ui.progressBar.setValue(total)
   #print "Took %s seconds." % (time.time() - start_time)
   
   files_percent         = 100.0 if total_files == 0  else float(translated_files) / total_files * 100
   unique_files_percent  = 100.0 if unique_files == 0 else float(translated_unique) / unique_files * 100
   chars_percent         = 100.0 if total_chars == 0  else float(translated_chars) / total_chars * 100
   unique_chars_percent  = 100.0 if unique_chars == 0 else float(translated_unique_chars) / unique_chars * 100
   bytes_percent         = 100.0 if total_bytes == 0  else float(translated_bytes) / total_bytes * 100
   unique_bytes_percent  = 100.0 if unique_bytes == 0 else float(translated_unique_bytes) / unique_bytes * 100
   
   self.ui.lblResults.setText(
     "<center><b>Results</b></center><br/>" +
     ("<b>Files</b>: %d / %d (%0.2f%%)<br/>" % (translated_files, total_files, files_percent)) + 
     ("<b>Unique Files</b>: %d / %d (%0.2f%%)<br/>" % (translated_unique, unique_files, unique_files_percent)) +
     "<br/>" +
     ("<b>Japanese Characters</b>: %d / %d (%0.2f%%)<br/>" % (translated_chars, total_chars, chars_percent)) + 
     ("<b>Unique Characters</b>: %d / %d (%0.2f%%)<br/>" % (translated_unique_chars, unique_chars, unique_chars_percent)) +
     "<br/>" +
     ("<b>English Words</b>: %d<br/>" % (translated_words)) + 
     ("<b>Unique Words</b>: %d<br/>" % (translated_unique_words)) +
     "<br/>" +
     "<b>NOTE</b>: Unique X is lazy for \"X in all unique files.\""
   )
   
   self._running  = False
   self._canceled = False
Esempio n. 21
0
 def test_ignore_punctuation(self):
     self.assertEqual(
         count_words('car : carpet as java : javascript!!&@$%^&'),
         {'car': 1, 'carpet': 1, 'as': 1, 'java': 1, 'javascript': 1}
     )
 def test_include_numbers(self):
     self.assertEqual(count_words('testing 1 2 testing'), {
         'testing': 2,
         '1': 1,
         '2': 1
     })
Esempio n. 23
0
 def test_include_numbers(self):
     self.assertEqual(
         count_words('testing 1 2 testing'),
         {'testing': 2, '1': 1, '2': 1}
     )
Esempio n. 24
0
 def test_multiple_apostrophes_ignored(self):
     self.assertEqual(count_words("''hey''"), {"hey": 1})
Esempio n. 25
0
 def test_normalize_case(self):
     self.assertEqual(
         count_words('go Go GO Stop stop'),
         {'go': 3, 'stop': 2}
     )
Esempio n. 26
0
 def test_handles_cramped_lists(self):
     self.assertEqual(count_words("one,two,three"), {"one": 1, "two": 1, "three": 1})
Esempio n. 27
0
 def test_with_apostrophes(self):
     self.assertEqual(
         count_words("First: don't laugh. Then: don't cry."),
         {'first': 1, "don't": 2, 'laugh': 1, 'then': 1, 'cry': 1}
     )
Esempio n. 28
0
 def test_ignore_punctuation(self):
     self.assertEqual(
         count_words("car: carpet as java: javascript!!&@$%^&"),
         {"car": 1, "carpet": 1, "as": 1, "java": 1, "javascript": 1},
     )
Esempio n. 29
0
 def test_with_quotations(self):
     self.assertEqual(
         count_words("Joe can't tell between 'large' and large."),
         {'joe': 1, "can't": 1, 'tell': 1, 'between': 1, 'large': 2,
          'and': 1}
     )
Esempio n. 30
0
 def test_normalize_case(self):
     self.assertEqual(count_words("go Go GO Stop stop"), {"go": 3, "stop": 2})
Esempio n. 31
0
 def test_multiple_spaces_not_detected_as_a_word(self):
     self.assertEqual(
         count_words(' multiple   whitespaces'),
         {'multiple': 1, 'whitespaces': 1}
     )
Esempio n. 32
0
 def test_with_quotations(self):
     self.assertEqual(
         count_words("Joe can't tell between 'large' and large."),
         {"joe": 1, "can't": 1, "tell": 1, "between": 1, "large": 2, "and": 1},
     )
Esempio n. 33
0
 def test_alternating_word_separators_not_detected_as_a_word(self):
     self.assertEqual(
         count_words(",\n,one,\n ,two \n 'three'"),
         {'one': 1, 'two': 1, 'three': 1}
     )
Esempio n. 34
0
 def test_count_one_word(self):
     self.assertEqual(count_words("word"), {"word": 1})
 def test_handles_cramped_lists(self):
     self.assertEqual(count_words('one,two,three'), {
         'one': 1,
         'two': 1,
         'three': 1
     })
Esempio n. 36
0
 def test_count_one_of_each_word(self):
     self.assertEqual(count_words("one of each"), {
         "one": 1,
         "of": 1,
         "each": 1
     })
 def test_count_one_of_each_word(self):
     self.assertEqual(count_words('one of each'), {
         'one': 1,
         'of': 1,
         'each': 1
     })
Esempio n. 38
0
 def test_word_count_no_words(self):
     word = ''
     result = word_count.count_words(word)
     self.assertEqual(result, 0)
Esempio n. 39
0
 def test_non_alphanumeric(self):
     self.assertEqual(
         count_words('hey,my_spacebar_is_broken.'),
         {'hey': 1, 'my': 1, 'spacebar': 1, 'is': 1, 'broken': 1}
     )
Esempio n. 40
0
 def test_word_count_one_sentence(self):
     word = 'I love my dog.'
     result = word_count.count_words(word)
     self.assertEqual(result, 4)
 def test_handles_expanded_lists(self):
     self.assertEqual(count_words('one,\ntwo,\nthree'), {
         'one': 1,
         'two': 1,
         'three': 1
     })
Esempio n. 42
0
 def test_count_one_word(self):
     self.assertEqual(
         count_words('word'),
         {'word': 1}
     )
from word_count import count_words, count_specific_word

filenames = ['alice.txt', 'siddhartha.txt', 'sherlock.txt', 'treasure.txt']

for filename in filenames:
    count_words(filename)
    count_specific_word(filename, 'grass')