def new_expert_trainer(): print("inside funciton") filename = "file_uploaded.txt" if os.path.isfile(filename): os.remove(filename) f = open(filename, "w+") f.close() logfile = open(filename, "r") loglines = tail_file(logfile) dict_obj = file_dictionary() for line in loglines: values = str(re.findall(r'\d+', str(line.strip()))[-1]) print("Reading Value " + values + " in dictionary" + str(dict_obj.get(values))) if dict_obj.get(str(values)) == -1: print("New Value") dict_obj.add(re.findall(r'\d+', values)[-1], 1) print(dict_obj) else: print("Already Exist Value") temp = dict_obj.get(values) print("current value of " + values + " is " + str(temp)) if temp + 1 < 3: dict_obj.add(values, int(temp) + 1) print(dict_obj) else: # run the MOE updating dict_obj.remove(values) print("3 files are present, running the update") wordcount.word_count( globals.spark_session, globals.default_hdfs_path + "MOE" + values)
def threaded(c): while True: # data received from client i.e 1. Add a Expert OR 2.Evaluation on cluster data = c.recv(1024) if not data: print('Client Disconnected') break # New Expert need to be added if data == b"1": print("Request for inserting new node") list_of_files_to_be_pushed_to_hdfs = [] # file_name_generator to generate file names generated_name = file_name_generator("MOE") # All File names will have same suffix print("Generated name : " + generated_name) list_of_files_to_be_pushed_to_hdfs.append(generated_name) list_of_files_to_be_pushed_to_hdfs.append( "TOE" + re.findall(r'\d+', generated_name)[-1]) list_of_files_to_be_pushed_to_hdfs.append( "SOE" + re.findall(r'\d+', generated_name)[-1]) for file in list_of_files_to_be_pushed_to_hdfs: print("Storing the incoming file in " + file) file_receiving_port = get_port_numer_for_file_receiving() print("About to send the data via " + str(file_receiving_port)) c.send((str(file_receiving_port) + ":").encode('ascii')) file_reciever(globals.default_storage_path + file, file_receiving_port) for file in list_of_files_to_be_pushed_to_hdfs: # After the files are received start pushing it to hdfs start_new_thread(hdfs_uploader.hdfs_pusher, (globals.default_storage_path + file, )) # To evaluate a input if data == b"2": print("Request for Evaluation") file_to_be_evaluated = file_name_generator("Evaluation") # list_of_files_to_be_pushed_to_hdfs.append() file_receiving_port = get_port_numer_for_file_receiving() c.send((str(file_receiving_port) + ":").encode('ascii')) file_reciever(globals.default_storage_path + file_to_be_evaluated, file_receiving_port) hdfs_uploader.hdfs_pusher(globals.default_storage_path + file_to_be_evaluated) wordcount.word_count( globals.spark_session, globals.default_hdfs_path + file_to_be_evaluated) print("Client Exited!!") c.close()
def test_count_all_stars(): try: dup1 = word_count("May the Force be with you, Luke!") assert dup1 == 7, "Running word_count('May the Force be with you, Luke!')... Expected 7, got {}".format( dup1) dup2 = word_count("C'est la chenille qui redémarre !") assert dup2 == 6, "Running word_count('C'est la chenille qui redémarre !')... Expected 6, got {}".format( dup2) success() send_msg("Bien joué !", "") except AssertionError as e: fail() send_msg("Oops! 🐞", e)
def test_newlines(self): self.assertEqual( {'rah': 2, 'ah': 3, 'roma': 2, 'ma': 1, 'ga': 2, 'oh': 1, 'la': 2, 'want': 1, 'your': 1, 'bad': 1, 'romance': 1}, word_count('rah rah ah ah ah\nroma roma ma\nga ga oh la la\n' 'want your bad romance') )
def test_non_alphanumeric(self): self.assertEqual( {'hey': 1, 'my': 1, 'spacebar': 1, 'is': 1, 'broken': 1}, word_count('hey,my_spacebar_is_broken.'))
def test_count_multiple_occurences(self): self.assertEqual( {'one': 1, 'fish': 4, 'two': 1, 'red': 1, 'blue': 1}, word_count('one fish two fish red fish blue fish'))
def test_tabs(self): self.assertEqual( {'rah': 2, 'ah': 3, 'roma': 2, 'ma': 1, 'ga': 2, 'oh': 1, 'la': 2, 'want': 1, 'your': 1, 'bad': 1, 'romance': 1}, word_count('rah rah ah ah ah\troma roma ma\tga ga oh la la\t' 'want your bad romance') )
def test_newlines(self): self.assertEqual( {'rah': 2, 'ah': 3, 'roma': 2, 'ma': 1, 'ga': 2, 'oh': 1, 'la': 2, 'want': 1, 'your': 1, 'bad': 1, 'romance': 1}, word_count('rah rah ah ah ah\nroma roma ma\n' 'ga ga oh la la\nwant your bad romance') )
def test_preserves_punctuation(self): self.assertEqual( {'car': 1, 'carpet': 1, 'as': 1, 'java': 1, 'javascript': 1}, word_count('car : carpet as java : javascript!!&@$%^&'))
def test_newlines(self): self.assertEqual( { "rah": 2, "ah": 3, "roma": 2, "ma": 1, "ga": 2, "oh": 1, "la": 2, "want": 1, "your": 1, "bad": 1, "romance": 1, }, word_count("rah rah ah ah ah\nroma roma ma\n" "ga ga oh la la\nwant your bad romance"), )
def test_includes_numbers(self): self.assertEqual( {u'1': 1, u'2': 1, u'testing': 2}, word_count(u'testing 1 2 testing') )
def test_preserves_mixed_case(self): self.assertEqual( {u'go': 1, u'Go': 1, u'GO': 1}, word_count(u'Go go GO') )
"store_true", # This action tells argparse to store True when the flag is specified. help="Force a case-sensitive count. By default, case is ignored.") # Optional integer argument indicating the minimum word count threshold for # display. # The new feature here is the type argument. By default, argparse treats # everything as a string. If you want arguments to be other types, you need # to tell argparse which type you require. # This has several benefits: # - argparse will do error handling for you. # - the value will already be the correct type in the arguments object. # - it helps make your code self-documenting. parser.add_argument("-m", "--min-count", default=2, help="The minimum word count threshold for display.") # parse_args first checks for errors, and if there are none, it returns # a Namespace object containing your named arguments. argument names # correspond to the long-form argument names. return parser.parse_args() if __name__ == "__main__": counts = word_count(read_file(args.file), characters_to_ignore=args.punctuation, case_sensitive=args.case - sensitive) print_counts(counts, min_count=args.min_count)
def test_counts_multiple_occurences(self): self.assertEqual( {u'blue': 1, u'fish': 4, u'two': 1, u'red': 1, u'one': 1}, word_count(u'one fish two fish red fish blue fish') )
def test_unicode(self): self.assertEqual( { decode_if_needed('аДаО'): 1, decode_if_needed('баВаИаДаАаНаИб'): 1 }, word_count('аДаО№баВаИаДаАаНаИб!'))
def test_count_multiple_occurences(self): self.assertEqual( {"one": 1, "fish": 4, "two": 1, "red": 1, "blue": 1}, word_count("one fish two fish red fish blue fish") )
def test_count_one_word(self): self.assertEqual( {'word': 1}, word_count('word') )
def test_mixed_case(self): self.assertEqual( {'go': 1, 'Go': 1, 'GO': 1}, word_count('go Go GO') )
def test_count_one_of_each(self): self.assertEqual({ 'one': 1, 'of': 1, 'each': 1 }, word_count('one of each'))
def test_mixed_case(self): self.assertEqual([2, 3], sorted(list( word_count('go Go GO Stop stop').values())))
def test_multiple_spaces(self): self.assertEqual({"wait": 1, "for": 1, "it": 1}, word_count("wait for it"))
def test_include_numbers(self): self.assertEqual({"testing": 2, "1": 1, "2": 1}, word_count("testing 1 2 testing"))
def test_preserves_punctuation(self): self.assertEqual( {"car": 1, "carpet": 1, "as": 1, "java": 1, "javascript": 1}, word_count("car : carpet as java : javascript!!&@$%^&"), )
def test_splits_on_newlines(self): self.assertEqual( {u'ma': 1, u'want': 1, u'oh': 1, u'ah': 3, u'la': 2, u'rah': 2, u'romance': 1, u'bad': 1, u'ga': 2, u'roma': 2, u'your': 1}, word_count(u'rah rah ah ah ah\nroma roma ma\nga ga oh la la\nwant your bad romance') )
def test_include_numbers(self): self.assertEqual( {'testing': 2, '1': 1, '2': 1}, word_count('testing 1 2 testing') )
def test_counts_one_word(self): self.assertEqual( {u'word': 1}, word_count(u'word') )
def test_multiple_spaces(self): self.assertEqual( {'wait': 1, 'for': 1, 'it': 1}, word_count('wait for it') )
def test_normalize_case(self): self.assertEqual( {'go': 3}, word_count('go Go GO') )
def test_mixed_case(self): self.assertEqual([3], list(word_count('go Go GO').values()))
def test_count_one_word(self): self.assertEqual({'word': 1}, word_count('word'))
def test_count_one_of_each(self): self.assertEqual({"one": 1, "of": 1, "each": 1}, word_count("one of each"))
def test_include_numbers(self): self.assertEqual({ 'testing': 2, '1': 1, '2': 1 }, word_count('testing 1 2 testing'))
def test_non_alphanumeric(self): self.assertEqual( {"hey": 1, "my": 1, "spacebar": 1, "is": 1, "broken": 1}, word_count("hey,my_spacebar_is_broken.") )
def test_multiple_spaces(self): self.assertEqual({ 'wait': 1, 'for': 1, 'it': 1 }, word_count('wait for it'))
def test_count_one_word(self): self.assertEqual({"word": 1}, word_count("word"))
def test_count_one_of_each(self): self.assertEqual( {'one': 1, 'of': 1, 'each': 1}, word_count('one of each') )
from wordcount import read_file, word_count, print_counts data = read_file("sample-text.txt") counts = word_count(data) print_counts(counts)
def test_mixed_case(self): self.assertEqual( [2, 3], sorted(list(word_count('go Go GO Stop stop').values())) )
def test_normalize_case(self): self.assertEqual({'go': 3}, word_count('go Go GO'))
def test_mixed_case(self): self.assertEqual( [3], list(word_count('go Go GO').values()) )
def test_counts_one_of_each(self): self.assertEqual( {u'of': 1, u'each': 1, u'one': 1}, word_count(u'one of each') )
def test_unicode(self): self.assertEqual( {decode_if_needed('аДаО'): 1, decode_if_needed('баВаИаДаАаНаИб'): 1}, word_count('аДаО№баВаИаДаАаНаИб!') )
# - it helps make your code self-documenting. parser.add_argument( "-m", "--min-count", ## We need to specify the argument type as `int` type=int, default=2, help="The minimum word count threshold for display.") # parse_args first checks for errors, and if there are none, it returns # a Namespace object containing your named arguments. argument names # correspond to the long-form argument names. return parser.parse_args() if __name__ == "__main__": ## Oops, we forgot to call the get_program_args() function. ## Remember, defining a function does not call it args = get_program_args() counts = word_count( read_file(args.file), characters_to_ignore=args.punctuation, ## The argument is called "case-sensitive" which ## is not a valid identifier. Argparse knows this and replaces "-" ## with "_". case_sensitive=args.case_sensitive) print_counts(counts, min_count=args.min_count)