Exemple #1
0
 def test_numerical_keys_become_strs(self):
     # JSON should convert numbers to strings when they are dict keys
     self.assertEqual(
         (None, {
             '3': 4
         }),
         JSONValueProtocol().read(JSONValueProtocol().write(None, {3: 4})))
Exemple #2
0
    def test_bad_keys_and_values(self):
        # dictionaries have to have strings as keys
        self.assertCantEncode(JSONValueProtocol(), None, {(1, 2): 3})

        # only unicodes (or bytes in utf-8) are allowed
        self.assertCantEncode(JSONValueProtocol(), None, '\xe9')

        # sets don't exist in JSON
        self.assertCantEncode(JSONValueProtocol(), None, set())

        # Point class has no representation in JSON
        self.assertCantEncode(JSONValueProtocol(), None, Point(1, 4))
 def reducer_init(self):
     self.idfs = {}
     for fname in os.listdir(DIRECTORY): # look through file names in the directory
         file = open(os.path.join(DIRECTORY, fname)) # open a file
         for line in file: # read each line in json file
             term_idf = JSONValueProtocol().read(line)[1] # parse the line as a JSON object
             self.idfs[term_idf['term']] = term_idf['idf']
Exemple #4
0
    def data(self, minimum=1, **kw):
        res = []

        mr_job = MRWordFreqJSON()
        mr_job.stdin = [JSONValueProtocol().write(None, line) for line in TEXT]

        with mr_job.make_runner() as runner:
            runner.run()
            for line in runner.stream_output():
                key, value = mr_job.parse_output_line(line)
                if int(value) >= int(minimum):
                    res.append([key, value])

        return dict(data=res)
Exemple #5
0
#s3_input_path = "s3://joeloren//iceval_out//input//datasets//"
tmp_dir_out = "s3://joeloren/interim_out/"
tmp_dir_in = "s3://joeloren/interim_in/"
tmp_dir_in_relative = "interim_in/"
tmp_dir_out_relative = "interim_out/"

from mrjob.protocol import JSONValueProtocol, JSONProtocol
jvp = JSONValueProtocol()
jp = JSONProtocol()

from boto.s3.connection import S3Connection
import sys

c = S3Connection('AKIAI4OZ3HY56BTOHA3A',
                 '6isbkZjBM8kt3PIk53EXVIf76VOPxOH8rNleGc6B')

bucket = c.get_bucket("joeloren")
datasets_bucket = c.get_bucket('joel_datasets')
Exemple #6
0
 def test_bad_data(self):
     self.assertCantDecode(JSONValueProtocol(), '{@#$@#!^&*$%^')
Exemple #7
0
 def test_tuples_become_lists(self):
     # JSON should convert tuples into lists
     self.assertEqual(
         (None, [3, 4]),
         JSONValueProtocol().read(JSONValueProtocol().write(None, (3, 4))))
Exemple #8
0
    def test_uses_json_format(self):
        VALUE = {'foo': {'bar': 3}, 'baz': None, 'quz': ['a', 1]}
        ENCODED = '{"foo": {"bar": 3}, "baz": null, "quz": ["a", 1]}'

        self.assertEqual((None, VALUE), JSONValueProtocol().read(ENCODED))
        self.assertEqual(ENCODED, JSONValueProtocol().write(None, VALUE))
Exemple #9
0
 def test_round_trip_with_trailing_tab(self):
     for _, v in JSON_KEYS_AND_VALUES:
         self.assertRoundTripWithTrailingTabOK(JSONValueProtocol(), None, v)
Exemple #10
0
 def test_round_trip(self):
     for _, v in JSON_KEYS_AND_VALUES:
         self.assertRoundTripOK(JSONValueProtocol(), None, v)
Exemple #11
0
    def test_uses_json_format(self):
        VALUE = {'foo': 'bar'}
        ENCODED = b'{"foo": "bar"}'

        self.assertEqual((None, VALUE), JSONValueProtocol().read(ENCODED))
        self.assertEqual(ENCODED, JSONValueProtocol().write(None, VALUE))