def test_numerical_keys_become_strs(self): # JSON should convert numbers to strings when they are dict keys self.assertEqual( (None, { '3': 4 }), JSONValueProtocol().read(JSONValueProtocol().write(None, {3: 4})))
def test_bad_keys_and_values(self): # dictionaries have to have strings as keys self.assertCantEncode(JSONValueProtocol(), None, {(1, 2): 3}) # only unicodes (or bytes in utf-8) are allowed self.assertCantEncode(JSONValueProtocol(), None, '\xe9') # sets don't exist in JSON self.assertCantEncode(JSONValueProtocol(), None, set()) # Point class has no representation in JSON self.assertCantEncode(JSONValueProtocol(), None, Point(1, 4))
def reducer_init(self): self.idfs = {} for fname in os.listdir(DIRECTORY): # look through file names in the directory file = open(os.path.join(DIRECTORY, fname)) # open a file for line in file: # read each line in json file term_idf = JSONValueProtocol().read(line)[1] # parse the line as a JSON object self.idfs[term_idf['term']] = term_idf['idf']
def data(self, minimum=1, **kw): res = [] mr_job = MRWordFreqJSON() mr_job.stdin = [JSONValueProtocol().write(None, line) for line in TEXT] with mr_job.make_runner() as runner: runner.run() for line in runner.stream_output(): key, value = mr_job.parse_output_line(line) if int(value) >= int(minimum): res.append([key, value]) return dict(data=res)
#s3_input_path = "s3://joeloren//iceval_out//input//datasets//" tmp_dir_out = "s3://joeloren/interim_out/" tmp_dir_in = "s3://joeloren/interim_in/" tmp_dir_in_relative = "interim_in/" tmp_dir_out_relative = "interim_out/" from mrjob.protocol import JSONValueProtocol, JSONProtocol jvp = JSONValueProtocol() jp = JSONProtocol() from boto.s3.connection import S3Connection import sys c = S3Connection('AKIAI4OZ3HY56BTOHA3A', '6isbkZjBM8kt3PIk53EXVIf76VOPxOH8rNleGc6B') bucket = c.get_bucket("joeloren") datasets_bucket = c.get_bucket('joel_datasets')
def test_bad_data(self): self.assertCantDecode(JSONValueProtocol(), '{@#$@#!^&*$%^')
def test_tuples_become_lists(self): # JSON should convert tuples into lists self.assertEqual( (None, [3, 4]), JSONValueProtocol().read(JSONValueProtocol().write(None, (3, 4))))
def test_uses_json_format(self): VALUE = {'foo': {'bar': 3}, 'baz': None, 'quz': ['a', 1]} ENCODED = '{"foo": {"bar": 3}, "baz": null, "quz": ["a", 1]}' self.assertEqual((None, VALUE), JSONValueProtocol().read(ENCODED)) self.assertEqual(ENCODED, JSONValueProtocol().write(None, VALUE))
def test_round_trip_with_trailing_tab(self): for _, v in JSON_KEYS_AND_VALUES: self.assertRoundTripWithTrailingTabOK(JSONValueProtocol(), None, v)
def test_round_trip(self): for _, v in JSON_KEYS_AND_VALUES: self.assertRoundTripOK(JSONValueProtocol(), None, v)
def test_uses_json_format(self): VALUE = {'foo': 'bar'} ENCODED = b'{"foo": "bar"}' self.assertEqual((None, VALUE), JSONValueProtocol().read(ENCODED)) self.assertEqual(ENCODED, JSONValueProtocol().write(None, VALUE))