def test_ndarray_object_nesting(): # Based on issue 53 # With nested ndarrays before = zeros(( 2, 2, ), dtype=object) for i in ndindex(before.shape): before[i] = array([1, 2, 3]) after = loads(dumps(before)) assert before.shape == after.shape, \ 'shape of array changed for nested ndarrays:\n{}'.format(dumps(before, indent=2)) assert before.dtype == before.dtype assert array_equal(before[0, 0], after[0, 0]) # With nested lists before = zeros(( 2, 2, ), dtype=object) for i in ndindex(before.shape): before[i] = [1, 2, 3] after = loads(dumps(before)) assert before.shape == after.shape, \ 'shape of array changed for nested ndarrays:\n{}'.format(dumps(before, indent=2)) assert before.dtype == before.dtype assert array_equal(before[0, 0], after[0, 0])
def test_memory_order(): arrC = array([[1., 2.], [3., 4.]], order='C') json = dumps(arrC) arr = loads(json) assert array_equal(arrC, arr) assert arrC.flags['C_CONTIGUOUS'] == arr.flags['C_CONTIGUOUS'] and \ arrC.flags['F_CONTIGUOUS'] == arr.flags['F_CONTIGUOUS'] arrF = array([[1., 2.], [3., 4.]], order='F') json = dumps(arrF) arr = loads(json) assert array_equal(arrF, arr) assert arrF.flags['C_CONTIGUOUS'] == arr.flags['C_CONTIGUOUS'] and \ arrF.flags['F_CONTIGUOUS'] == arr.flags['F_CONTIGUOUS']
def test_dump_np_scalars(): data = [ int8(-27), complex64(exp(1) + 37j), ( { 'alpha': float64(-exp(10)), 'str-only': complex64(-1 - 1j), }, uint32(123456789), float16(exp(-1)), set(( int64(37), uint64(-0), )), ), ] replaced = encode_scalars_inplace(deepcopy(data)) json = dumps(replaced) rec = loads(json) assert data[0] == rec[0] assert data[1] == rec[1] assert data[2][0] == rec[2][0] assert data[2][1] == rec[2][1] assert data[2][2] == rec[2][2] assert data[2][3] == rec[2][3] assert data[2] == tuple(rec[2])
def test_dump_np_scalars(): data = [ int8(-27), complex64(exp(1)+37j), ( { 'alpha': float64(-exp(10)), 'str-only': complex64(-1-1j), }, uint32(123456789), float16(exp(-1)), { int64(37), uint64(-0), }, ), ] replaced = encode_scalars_inplace(deepcopy(data)) json = dumps(replaced) rec = loads(json) print(data) print(rec) assert data[0] == rec[0] assert data[1] == rec[1] assert data[2][0] == rec[2][0] assert data[2][1] == rec[2][1] assert data[2][2] == rec[2][2] assert data[2][3] == rec[2][3] assert data[2] == tuple(rec[2])
def lc(self): ''' Makes learning curve for a player ''' if self.lcScores is None: self.lcModel = LassoLarsCV() lastDate = self.dates[-1] X = self.XTrains[lastDate] y = self.yTrains[lastDate] N = len(X) chopOff = N - (N % 7) X = X.iloc[:chopOff] y = y.iloc[:chopOff] idxs = np.arange(chopOff) cvSplits = [(idxs[:i], idxs[i:]) for i in range(7, chopOff, 7)] trainSizes, trainScores, testScores = \ learning_curve(estimator=self.lcModel, X=X.as_matrix(), y=np.array(y), cv=cvSplits, train_sizes=[7], n_jobs=2, ) trainSizes = [len(t[0]) for t in cvSplits] self.lcScores = dumps((trainSizes, trainScores, testScores)) result = self.lcScores else: result = self.lcScores return result
def test_compact_mode_unspecified(): # Other tests may have raised deprecation warning, so reset the cache here numpy_encode._warned_compact = False data = [ array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]]), array([pi, exp(1)]) ] with warns(JsonTricksDeprecation): gz_json_1 = dumps(data, compression=True) # noinspection PyTypeChecker with warns(None) as captured: gz_json_2 = dumps(data, compression=True) assert len(captured) == 0 assert gz_json_1 == gz_json_2 json = gzip_decompress(gz_json_1).decode('ascii') assert json == '[{"__ndarray__": [[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]], "dtype": "float64", "shape": [2, 4], "Corder": true}, ' \ '{"__ndarray__": [3.141592653589793, 2.718281828459045], "dtype": "float64", "shape": [2]}]'
def test_encode_compact_inline_compression(): data = [ array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], [9.0, 10.0, 11.0, 12.0], [13.0, 14.0, 15.0, 16.0]]) ] json = dumps(data, compression=False, properties={'ndarray_compact': True}) assert 'b64.gz:' in json, 'If the overall file is not compressed and there are significant savings, then do inline gzip compression.' assert json == '[{"__ndarray__": "b64.gz:H4sIAAAAAAAC/2NgAIEP9gwQ4AChOKC0AJQWgdISUFoGSitAaSUorQKl1aC0BpTWgtI6UFoPShs4AABmfqWAgAAAAA==", "dtype": "float64", "shape": [4, 4], "Corder": true}]'
def test_scalars_types(): # from: https://docs.scipy.org/doc/numpy/user/basics.types.html encme = [] for dtype in DTYPES: for val in (dtype(0),) + get_lims(dtype): assert isinstance(val, dtype) encme.append(val) json = dumps(encme, indent=2) rec = loads(json) assert encme == rec
def test_mixed_cls_arr(): json = dumps(mixed_data) back = dict(loads(json)) assert mixed_data.keys() == back.keys() assert (mixed_data['vec'] == back['vec']).all() assert (mixed_data['inst'].vec == back['inst'].vec).all() assert (mixed_data['inst'].nr == back['inst'].nr) assert (mixed_data['inst'].li == back['inst'].li) assert (mixed_data['inst'].inst.s == back['inst'].inst.s) assert (mixed_data['inst'].inst.dct == dict(back['inst'].inst.dct))
def test_array_types(): # from: https://docs.scipy.org/doc/numpy/user/basics.types.html # see also `test_scalars_types` for dtype in DTYPES: vec = [array((dtype(0), dtype(exp(1))) + get_lims(dtype), dtype=dtype)] json = dumps(vec) assert dtype.__name__ in json rec = loads(json) assert rec[0].dtype == dtype assert array_equal(vec, rec)
def test_primitives(): txt = dumps(deepcopy(npdata), primitives=True) data2 = loads(txt) assert isinstance(data2['vector'], list) assert isinstance(data2['matrix'], list) assert isinstance(data2['matrix'][0], list) assert data2['vector'] == npdata['vector'].tolist() assert (abs(array(data2['vector']) - npdata['vector'])).sum() < 1e-10 assert data2['matrix'] == npdata['matrix'].tolist() assert (abs(array(data2['matrix']) - npdata['matrix'])).sum() < 1e-10
def test_scalars_types(): # from: https://docs.scipy.org/doc/numpy/user/basics.types.html encme = [] for dtype in DTYPES: for val in (dtype(0), ) + get_lims(dtype): assert isinstance(val, dtype) encme.append(val) json = dumps(encme, indent=2) rec = loads(json) assert encme == rec
def test_encode_compact_cutoff(): data = [ array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]]), array([pi, exp(1)]) ] gz_json = dumps(data, compression=True, properties={'ndarray_compact': 5}) json = gzip_decompress(gz_json).decode('ascii') assert json == '[{"__ndarray__": "b64:AAAAAAAA8D8AAAAAAAAAQAAAAAAAAAhAAAAAAAAAEEAAAAAAAAA' \ 'UQAAAAAAAABhAAAAAAAAAHEAAAAAAAAAgQA==", "dtype": "float64", "shape": [2, 4], "Corder": ' \ 'true}, {"__ndarray__": [3.141592653589793, 2.718281828459045], "dtype": "float64", "shape": [2]}]'
def test_empty(): # issue https://github.com/mverleg/pyjson_tricks/issues/76 datas = [ zeros(shape=(1, 0)), zeros(shape=(0, 1)), zeros(shape=(0, 0)), ] for data in datas: json = dumps(data) assert_equal(loads(json), data, 'shape = {} ; json = {}'.format(data.shape, json))
def test_encode_disable_compact(): data = [ array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]]), array([pi, exp(1)]) ] gz_json = dumps(data, compression=True, properties={'ndarray_compact': False}) json = gzip_decompress(gz_json).decode('ascii') assert json == '[{"__ndarray__": [[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]], "dtype": "float64", "shape": [2, 4], "Corder": true}, ' \ '{"__ndarray__": [3.141592653589793, 2.718281828459045], "dtype": "float64", "shape": [2]}]'
def test_encode_enable_compact(): data = [ array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]]), array([pi, exp(1)]) ] gz_json = dumps(data, compression=True, properties={'ndarray_compact': True}) json = gzip_decompress(gz_json).decode('ascii') assert json == '[{"__ndarray__": "b64:AAAAAAAA8D8AAAAAAAAAQAAAAAAAAAhAAAAAAAAAEEAAAAAAAAA' \ 'UQAAAAAAAABhAAAAAAAAAHEAAAAAAAAAgQA==", "dtype": "float64", "shape": [2, 4], "Corder": ' \ 'true}, {"__ndarray__": "b64:GC1EVPshCUBpVxSLCr8FQA==", "dtype": "float64", "shape": [2]}]'
def write_imp_CDF(Epoch, lat_lon_r, X, Y, Z, Label, olat_olon_or, ObsX, ObsY, ObsZ, ObsFit, ObsName, filename='impOut.json'): # def write_imp_json(Epoch, (Latitude, Longitude), X, Y, Z, Label, # (ObsLat, ObsLon), ObsFit, ObsName, # filename='impOut.json'): """Write imp files Write gridded interpolated magnetic perturbations (IMPs) to a JSON file. TODO: make ObsName, ObsLat, ObsLon, and ObsFit optional TODO: figure out how to store metadata...really, need to figure out a imp metadata standard and use it for all inputs and outputs. """ # unpack former tuple arguments (see PEP-3113) Latitude, Longitud, Radius = lat_lon_r ObsLat, ObsLon, ObsRad = olat_olon_or data = {} data['Epoch'] = (Epoch) data['Latitude'] = (Latitude) data['Longitude'] = (Longitude) data['Radius'] = (Radius) data['X'] = (X) data['Y'] = (Y) data['Z'] = (Z) data['Label'] = (Label) data['ObsLat'] = (ObsLat) data['ObsLon'] = (ObsLon) data['ObsRad'] = (ObsRad) data['ObsX'] = (ObsX) data['ObsY'] = (ObsY) data['ObsZ'] = (ObsZ) data['ObsFit'] = (ObsFit) data['ObsName'] = (ObsName) with open(filename, 'w') as fh: fh.write(json_t.dumps(data))
def load(genre, load_dir, n_beats=16): ''' This is the main driver for now. This function takes a directory and scans it for all of its songs. It calls analyze upon to fill the song class structure with attributes. Upon storing all song data, it will calculate a Kernel Density Estimator for the combined scatterplots. :param genre: (string) | input genre to store alongside the song :param dir: (string) | directory of the song folder :param n_beats: (int) | number of beats to record for the slice :return: None ''' mp3s = [] name = os.path.basename(os.path.normpath(load_dir)) db = TinyDB( os.path.join(db_root, genre, ''.join(name + '-' + str(datetime.now())) + '.json')) target = os.path.abspath(load_dir) for root, subs, files in os.walk(target): for f in files: ext = os.path.splitext(f)[1] if ext in supported_ext: strip = os.path.splitext(f)[0] mp3s.append((strip, os.path.join(target, f))) print('Loaded {} songs'.format(len(mp3s))) update = update_info(len(mp3s)) succ_count = 0 fail_count = 0 for m in mp3s: try: song = analyze_song(m, genre, n_beats, update) json = {'{}'.format(succ_count): jt.dumps(song)} db.insert(json) succ_count += 1 except IndexError or TypeError or ValueError as e: verbose_ and update.state('{}!!!'.format(e), end='\n') fail_count += 1 stdout.write('\x1b[2K') print('Analyzed {} songs. Failed {} songs.'.format(succ_count - fail_count, fail_count)) clear_folder(temp_dir) return
def main(argv): inputfile = '' trainfile = 'pre_train_set.json' testfile = 'pre_test_set.json' filtered_set = [] train_set = [] test_set = [] try: opts, args = getopt.getopt(argv, "hi:", ["ifile="]) except getopt.GetoptError: print('build_set.py -i <inputfile.json>') sys.exit(2) for opt, arg in opts: if opt == '-h': print('oh_vectorize.py -i <inputfile.json>') sys.exit() elif opt in ("-i", "--ifile"): inputfile = arg uniq = set() vocab = set() tknzr = TweetTokenizer() with open(inputfile, 'r') as fpt: l = fpt.readlines() for row, d in enumerate(l): try: raw = json.loads(str(d, encoding='utf-8')) except TypeError: raw = json.loads(d) if (raw['klass'] != 'NONE' and (raw['id'] not in uniq)): filtered_set.append({'text': raw['text'], 'klass': raw['klass']}) uniq.add(raw['id']) vocab.add(tknzr.tokenize(raw['text'])).flatten with open(trainfile, 'w') as outfile: json_data = dumps(train_set) outfile.write(json_data) outfile.close()
uuid = server_service.create_new_repo(args.datasetName, "description") logging.info('UUID:\n{}'.format(uuid)) # get node service node_service = DVIDNodeService(server_address, uuid) # get dataset size and store in dvid shape = image_provider.getImageShape(args.ilpFilename, args.labelImagePath) time_range = image_provider.getTimeRange(args.ilpFilename, args.labelImagePath) if args.timeRange is not None: time_range = (max(time_range[0], args.timeRange[0]), min(time_range[1], args.timeRange[1])) logging.info('Uploading time range {} to {}'.format(time_range, server_address)) keyvalue_store = "config" node_service.create_keyvalue(keyvalue_store) settings = { "shape": shape, "time_range": time_range } node_service.put(keyvalue_store, "imageInfo", json.dumps(settings)) # upload all frames for frame in range(time_range[0], time_range[1]): logging.info("Uploading frame {}".format(frame)) label_image = image_provider.getLabelImageForFrame(args.ilpFilename, args.labelImagePath, frame) raw_image = image_provider.getImageDataAtTimeFrame(args.rawFilename, args.rawPath, frame) raw_name = "raw-{}".format(frame) seg_name = "seg-{}".format(frame) node_service.create_grayscale8(raw_name) node_service.put_gray3D(raw_name, dataToBlock(raw_image, dtype=np.uint8), (0,0,0)) node_service.create_labelblk(seg_name) node_service.put_labels3D(seg_name, dataToBlock(label_image, dtype=np.uint64), (0,0,0)) # TODO: upload classifier
def __str__(self): return dumps(self)
def on_get(self, req, resp): query = req.query_string resp.status = falcon.HTTP_200 sl = GetRecommendation().get_recommend(int(query)) resp.body = np.dumps({'subtest': sl[0], 'recommendation': sl[1:]})
def test_dumps_loads_numpy(): json = dumps(deepcopy(npdata)) data2 = loads(json) _numpy_equality(data2)
def test_dumps_loads_numpy(): json = dumps(npdata) data2 = loads(json) _numpy_equality(data2)
def test_encode_compact_no_inline_compression(): data = [array([[1.0, 2.0], [3.0, 4.0]])] json = dumps(data, compression=False, properties={'ndarray_compact': True}) assert 'b64.gz:' not in json, 'If the overall file is not compressed, but there are no significant savings, then do not do inline compression.' assert json == '[{"__ndarray__": "b64:AAAAAAAA8D8AAAAAAAAAQAAAAAAAAAhAAAAAAAAAEEA=", ' \ '"dtype": "float64", "shape": [2, 2], "Corder": true}]'
def save_to_json(f, d): 'Function to save dictionary with numpy elements (d) to a text file (f) define by the JSON typing' from json_tricks.np import dumps with open(f, 'w') as handle: handle.write(dumps(d, indent=2))
def save_to_json(f,d): 'Function to save dictionary with numpy elements (d) to a text file (f) define by the JSON typing' from json_tricks.np import dumps with open(f,'w') as handle: handle.write(dumps(d,indent=2))
def test_dtype_object(): # Based on issue 64 arr = array(['a', 'b', 'c'], dtype=object) json = dumps(arr) back = loads(json) assert array_equal(back, arr)
def test_compact(): data = [array(list(2**(x + 0.5) for x in range(-30, +31)))] json = dumps(data, compression=True, properties={'ndarray_compact': True}) back = loads(json) assert_equal(data, back)
def to_json(features, filepath=None): if filepath is None: return json.dumps(features, indent=4) else: json.dump(features, open(filepath, 'w'), indent=4)
# get node service node_service = DVIDNodeService(server_address, uuid) # get dataset size and store in dvid shape = image_provider.getImageShape(args.ilpFilename, args.labelImagePath) time_range = image_provider.getTimeRange(args.ilpFilename, args.labelImagePath) if args.timeRange is not None: time_range = (max(time_range[0], args.timeRange[0]), min(time_range[1], args.timeRange[1])) logging.info('Uploading time range {} to {}'.format( time_range, server_address)) keyvalue_store = "config" node_service.create_keyvalue(keyvalue_store) settings = {"shape": shape, "time_range": time_range} node_service.put(keyvalue_store, "imageInfo", json.dumps(settings)) # upload all frames for frame in range(time_range[0], time_range[1]): logging.info("Uploading frame {}".format(frame)) label_image = image_provider.getLabelImageForFrame( args.ilpFilename, args.labelImagePath, frame) raw_image = image_provider.getImageDataAtTimeFrame( args.rawFilename, args.rawPath, frame) raw_name = "raw-{}".format(frame) seg_name = "seg-{}".format(frame) node_service.create_grayscale8(raw_name) node_service.put_gray3D(raw_name, dataToBlock(raw_image, dtype=np.uint8), (0, 0, 0))
data['Radius'] = (Radius) data['X'] = (X) data['Y'] = (Y) data['Z'] = (Z) data['Label'] = (Label) data['ObsLat'] = (ObsLat) data['ObsLon'] = (ObsLon) data['ObsRad'] = (ObsRad) data['ObsX'] = (ObsX) data['ObsY'] = (ObsY) data['ObsZ'] = (ObsZ) data['ObsFit'] = (ObsFit) data['ObsName'] = (ObsName) with open(filename, 'w') as fh: fh.write(json_t.dumps(data)) def read_imp_CDF(filename): """read in a imp CDF file read gridded interpolated magnetic perturbations (IMPs) from a specially formatted CDF file. TODO: """ cdf = pycdf.CDF(filename) Epoch = cdf['Epoch'][:] Latitude = cdf['Latitude'][:] Longitude = cdf['Longitude'][:] Radius = cdf['Radius'][:]