Beispiel #1
0
def new_expert_trainer():
    print("inside funciton")
    filename = "file_uploaded.txt"
    if os.path.isfile(filename):
        os.remove(filename)
    f = open(filename, "w+")
    f.close()
    logfile = open(filename, "r")
    loglines = tail_file(logfile)
    dict_obj = file_dictionary()
    for line in loglines:
        values = str(re.findall(r'\d+', str(line.strip()))[-1])
        print("Reading Value " + values + " in dictionary" +
              str(dict_obj.get(values)))
        if dict_obj.get(str(values)) == -1:
            print("New Value")
            dict_obj.add(re.findall(r'\d+', values)[-1], 1)
            print(dict_obj)
        else:
            print("Already Exist Value")
            temp = dict_obj.get(values)
            print("current value of " + values + " is " + str(temp))
            if temp + 1 < 3:
                dict_obj.add(values, int(temp) + 1)
                print(dict_obj)
            else:
                # run the MOE updating
                dict_obj.remove(values)
                print("3 files are present, running the update")
                wordcount.word_count(
                    globals.spark_session,
                    globals.default_hdfs_path + "MOE" + values)
Beispiel #2
0
def threaded(c):
    while True:

        # data received from client i.e 1. Add a Expert OR 2.Evaluation on cluster
        data = c.recv(1024)
        if not data:
            print('Client Disconnected')
            break

        # New Expert need to be added
        if data == b"1":
            print("Request for inserting new node")
            list_of_files_to_be_pushed_to_hdfs = []
            # file_name_generator to generate file names
            generated_name = file_name_generator("MOE")
            # All File names will have same suffix
            print("Generated name : " + generated_name)
            list_of_files_to_be_pushed_to_hdfs.append(generated_name)
            list_of_files_to_be_pushed_to_hdfs.append(
                "TOE" + re.findall(r'\d+', generated_name)[-1])
            list_of_files_to_be_pushed_to_hdfs.append(
                "SOE" + re.findall(r'\d+', generated_name)[-1])

            for file in list_of_files_to_be_pushed_to_hdfs:
                print("Storing the incoming file in " + file)
                file_receiving_port = get_port_numer_for_file_receiving()
                print("About to send the data via " + str(file_receiving_port))
                c.send((str(file_receiving_port) + ":").encode('ascii'))
                file_reciever(globals.default_storage_path + file,
                              file_receiving_port)

            for file in list_of_files_to_be_pushed_to_hdfs:
                # After the files are received start pushing it to hdfs
                start_new_thread(hdfs_uploader.hdfs_pusher,
                                 (globals.default_storage_path + file, ))

        # To evaluate a input
        if data == b"2":
            print("Request for Evaluation")
            file_to_be_evaluated = file_name_generator("Evaluation")
            # list_of_files_to_be_pushed_to_hdfs.append()
            file_receiving_port = get_port_numer_for_file_receiving()
            c.send((str(file_receiving_port) + ":").encode('ascii'))
            file_reciever(globals.default_storage_path + file_to_be_evaluated,
                          file_receiving_port)
            hdfs_uploader.hdfs_pusher(globals.default_storage_path +
                                      file_to_be_evaluated)
            wordcount.word_count(
                globals.spark_session,
                globals.default_hdfs_path + file_to_be_evaluated)
    print("Client Exited!!")
    c.close()
def test_count_all_stars():
    try:
        dup1 = word_count("May the Force be with you, Luke!")
        assert dup1 == 7, "Running word_count('May the Force be with you, Luke!')... Expected 7, got {}".format(
            dup1)
        dup2 = word_count("C'est la chenille qui redémarre !")
        assert dup2 == 6, "Running word_count('C'est la chenille qui redémarre !')... Expected 6, got {}".format(
            dup2)
        success()

        send_msg("Bien joué !", "")
    except AssertionError as e:
        fail()
        send_msg("Oops! 🐞", e)
 def test_newlines(self):
     self.assertEqual(
         {'rah': 2, 'ah': 3, 'roma': 2, 'ma': 1, 'ga': 2, 'oh': 1, 'la': 2,
             'want': 1, 'your': 1, 'bad': 1, 'romance': 1},
         word_count('rah rah ah ah ah\nroma roma ma\nga ga oh la la\n'
                    'want your bad romance')
     )
 def test_non_alphanumeric(self):
     self.assertEqual(
         {'hey': 1,
          'my': 1,
          'spacebar': 1,
          'is': 1,
          'broken': 1}, word_count('hey,my_spacebar_is_broken.'))
 def test_count_multiple_occurences(self):
     self.assertEqual(
         {'one': 1,
          'fish': 4,
          'two': 1,
          'red': 1,
          'blue': 1}, word_count('one fish two fish red fish blue fish'))
Beispiel #7
0
 def test_tabs(self):
     self.assertEqual(
         {'rah': 2, 'ah': 3, 'roma': 2, 'ma': 1, 'ga': 2, 'oh': 1, 'la': 2,
          'want': 1, 'your': 1, 'bad': 1, 'romance': 1},
         word_count('rah rah ah ah ah\troma roma ma\tga ga oh la la\t'
                    'want your bad romance')
     )
Beispiel #8
0
 def test_newlines(self):
     self.assertEqual(
         {'rah': 2, 'ah': 3, 'roma': 2, 'ma': 1, 'ga': 2, 'oh': 1, 'la': 2,
          'want': 1, 'your': 1, 'bad': 1, 'romance': 1},
         word_count('rah rah ah ah ah\nroma roma ma\n'
                    'ga ga oh la la\nwant your bad romance')
     )
 def test_preserves_punctuation(self):
     self.assertEqual(
         {'car': 1,
          'carpet': 1,
          'as': 1,
          'java': 1,
          'javascript': 1},
         word_count('car : carpet as java : javascript!!&@$%^&'))
Beispiel #10
0
 def test_newlines(self):
     self.assertEqual(
         {
             "rah": 2,
             "ah": 3,
             "roma": 2,
             "ma": 1,
             "ga": 2,
             "oh": 1,
             "la": 2,
             "want": 1,
             "your": 1,
             "bad": 1,
             "romance": 1,
         },
         word_count("rah rah ah ah ah\nroma roma ma\n" "ga ga oh la la\nwant your bad romance"),
     )
Beispiel #11
0
 def test_includes_numbers(self):
   self.assertEqual(
     {u'1': 1, u'2': 1, u'testing': 2},
     word_count(u'testing 1 2 testing')
   )
Beispiel #12
0
 def test_preserves_mixed_case(self):
   self.assertEqual(
     {u'go': 1, u'Go': 1, u'GO': 1},
     word_count(u'Go go GO')
   )
Beispiel #13
0
        "store_true",  # This action tells argparse to store True when the flag is specified.
        help="Force a case-sensitive count. By default, case is ignored.")

    # Optional integer argument indicating the minimum word count threshold for
    # display.
    # The new feature here is the type argument. By default, argparse treats
    # everything as a string. If you want arguments to be other types, you need
    # to tell argparse which type you require.
    # This has several benefits:
    # - argparse will do error handling for you.
    # - the value will already be the correct type in the arguments object.
    # - it helps make your code self-documenting.
    parser.add_argument("-m",
                        "--min-count",
                        default=2,
                        help="The minimum word count threshold for display.")

    # parse_args first checks for errors, and if there are none, it returns
    # a Namespace object containing your named arguments. argument names
    # correspond to the long-form argument names.
    return parser.parse_args()


if __name__ == "__main__":

    counts = word_count(read_file(args.file),
                        characters_to_ignore=args.punctuation,
                        case_sensitive=args.case - sensitive)

    print_counts(counts, min_count=args.min_count)
Beispiel #14
0
 def test_counts_multiple_occurences(self):
   self.assertEqual(
     {u'blue': 1, u'fish': 4, u'two': 1, u'red': 1, u'one': 1},
     word_count(u'one fish two fish red fish blue fish')
   )
Beispiel #15
0
 def test_unicode(self):
     self.assertEqual(
         {
             decode_if_needed('аДаО'): 1,
             decode_if_needed('баВаИаДаАаНаИб'): 1
         }, word_count('аДаО№Ÿ––баВаИаДаАаНаИб!'))
Beispiel #16
0
 def test_count_multiple_occurences(self):
     self.assertEqual(
         {"one": 1, "fish": 4, "two": 1, "red": 1, "blue": 1}, word_count("one fish two fish red fish blue fish")
     )
Beispiel #17
0
 def test_count_one_word(self):
     self.assertEqual(
         {'word': 1},
         word_count('word')
     )
 def test_mixed_case(self):
     self.assertEqual(
         {'go': 1, 'Go': 1, 'GO': 1},
         word_count('go Go GO')
     )
Beispiel #19
0
 def test_count_one_of_each(self):
     self.assertEqual({
         'one': 1,
         'of': 1,
         'each': 1
     }, word_count('one of each'))
Beispiel #20
0
 def test_mixed_case(self):
     self.assertEqual([2, 3],
                      sorted(list(
                          word_count('go Go GO Stop stop').values())))
Beispiel #21
0
 def test_multiple_spaces(self):
     self.assertEqual({"wait": 1, "for": 1, "it": 1}, word_count("wait for       it"))
Beispiel #22
0
 def test_include_numbers(self):
     self.assertEqual({"testing": 2, "1": 1, "2": 1}, word_count("testing 1 2 testing"))
Beispiel #23
0
 def test_preserves_punctuation(self):
     self.assertEqual(
         {"car": 1, "carpet": 1, "as": 1, "java": 1, "javascript": 1},
         word_count("car : carpet as java : javascript!!&@$%^&"),
     )
Beispiel #24
0
 def test_splits_on_newlines(self):
   self.assertEqual(
     {u'ma': 1, u'want': 1, u'oh': 1, u'ah': 3, u'la': 2, u'rah': 2, u'romance': 1, u'bad': 1, u'ga': 2, u'roma': 2, u'your': 1},
     word_count(u'rah rah ah ah ah\nroma roma ma\nga ga oh la la\nwant your bad romance')
   )
Beispiel #25
0
 def test_include_numbers(self):
     self.assertEqual(
         {'testing': 2, '1': 1, '2': 1},
         word_count('testing 1 2 testing')
     )
Beispiel #26
0
 def test_counts_one_word(self):
   self.assertEqual(
     {u'word': 1},
     word_count(u'word')
   )
Beispiel #27
0
 def test_multiple_spaces(self):
     self.assertEqual(
         {'wait': 1, 'for': 1, 'it': 1},
         word_count('wait for       it')
     )
 def test_normalize_case(self):
     self.assertEqual(
         {'go': 3},
         word_count('go Go GO')
     )
Beispiel #29
0
 def test_mixed_case(self):
     self.assertEqual([3], list(word_count('go Go GO').values()))
Beispiel #30
0
 def test_count_one_word(self):
     self.assertEqual({'word': 1}, word_count('word'))
Beispiel #31
0
 def test_count_one_of_each(self):
     self.assertEqual({"one": 1, "of": 1, "each": 1}, word_count("one of each"))
Beispiel #32
0
 def test_include_numbers(self):
     self.assertEqual({
         'testing': 2,
         '1': 1,
         '2': 1
     }, word_count('testing 1 2 testing'))
Beispiel #33
0
 def test_non_alphanumeric(self):
     self.assertEqual(
         {"hey": 1, "my": 1, "spacebar": 1, "is": 1, "broken": 1}, word_count("hey,my_spacebar_is_broken.")
     )
Beispiel #34
0
 def test_multiple_spaces(self):
     self.assertEqual({
         'wait': 1,
         'for': 1,
         'it': 1
     }, word_count('wait for       it'))
Beispiel #35
0
 def test_count_one_word(self):
     self.assertEqual({"word": 1}, word_count("word"))
Beispiel #36
0
 def test_count_one_of_each(self):
     self.assertEqual(
         {'one': 1, 'of': 1, 'each': 1},
         word_count('one of each')
     )
from wordcount import read_file, word_count, print_counts

data = read_file("sample-text.txt")
counts = word_count(data)
print_counts(counts)
Beispiel #38
0
 def test_mixed_case(self):
     self.assertEqual(
         [2, 3],
         sorted(list(word_count('go Go GO Stop stop').values()))
     )
Beispiel #39
0
 def test_normalize_case(self):
     self.assertEqual({'go': 3}, word_count('go Go GO'))
Beispiel #40
0
 def test_mixed_case(self):
     self.assertEqual(
         [3],
         list(word_count('go Go GO').values())
     )
Beispiel #41
0
 def test_counts_one_of_each(self):
   self.assertEqual(
     {u'of': 1, u'each': 1, u'one': 1},
     word_count(u'one of each')
   )
Beispiel #42
0
 def test_unicode(self):
     self.assertEqual(
         {decode_if_needed('аДаО'): 1, decode_if_needed('баВаИаДаАаНаИб'): 1},
         word_count('аДаО№Ÿ––баВаИаДаАаНаИб!')
     )
Beispiel #43
0
    # - it helps make your code self-documenting.
    parser.add_argument(
        "-m",
        "--min-count",
        ## We need to specify the argument type as `int`
        type=int,
        default=2,
        help="The minimum word count threshold for display.")

    # parse_args first checks for errors, and if there are none, it returns
    # a Namespace object containing your named arguments. argument names
    # correspond to the long-form argument names.
    return parser.parse_args()


if __name__ == "__main__":

    ## Oops, we forgot to call the get_program_args() function.
    ## Remember, defining a function does not call it
    args = get_program_args()

    counts = word_count(
        read_file(args.file),
        characters_to_ignore=args.punctuation,
        ## The argument is called "case-sensitive" which
        ## is not a valid identifier. Argparse knows this and replaces "-"
        ## with "_".
        case_sensitive=args.case_sensitive)

    print_counts(counts, min_count=args.min_count)