def json2generator(data, arrayKey=None): """ Функция конвертирует переданный json в генератор. Это позволяет избежать утечки памяти на огромных обьемах данных. Может выдать генератор только для массива (неважно какой вложенности и сложности). arrayKey должен указывать на массив, может быть цепочкой (key1.key2) """ from ijson import common # from io import BytesIO from cStringIO import StringIO #! yajl2 беккенд работает значительно быстрее, но на первый сервак так и не удалось его установить, пишет "Yajl shared object cannot be found" try: import ijson.backends.yajl2_cffi as ijson except: try: from ijson.backends import yajl2 as ijson except: try: from ijson.backends import yajl as ijson except: from ijson.backends import python as ijson try: f=StringIO(data) except: f=StringIO(data.encode('utf-8')) def _fixJSON(event): # функция исправляет "фичу" декодинга, Которая пытается все цифровые типы привести к decimal() if event[1]=='number': return (event[0], event[1], float(event[2]) if math.modf(event[2])[0] else int(event[2])) else: return event events=imap(_fixJSON, ijson.parse(f)) g=common.items(events, (arrayKey+'.item' if arrayKey else 'item')) # g=ijson.items(f, (arrayKey+'.item' if arrayKey else 'item')) return g
def test_items(self): events = basic_parse(BytesIO(JSON)) meta = list(common.items(common.parse(events), 'docs.item.meta')) self.assertEqual(meta, [ [[1], {}], {'key': 'value'}, None, ])
def items(file, prefix): ''' Backend-specific wrapper for ijson.common.items. ''' return common.items(parse(file), prefix)
def items(file, prefix): return common.items(basic_parse(file), prefix)
def test_items(self): events = basic_parse(BytesIO(JSON)) meta = list(common.items(common.parse(events), "docs.item.meta")) self.assertEqual(meta, [[[1], {}], {"key": "value"}, None])
def items(file, prefix, map_type=None, **kwargs): ''' Backend-specific wrapper for ijson.common.items. ''' return common.items(parse(file, **kwargs), prefix, map_type=map_type)
def get_item(self, prefix = ''): return next(iter(common.items(self.parse(), prefix)))
def items(file, prefix, do_translate=True): ''' Backend-specific wrapper for ijson.common.items. ''' return common.items(parse(file, do_translate=do_translate), prefix)
def process_file(file_url, org_feature_mappings): print('Loading', file_url) some_engine = create_engine( os.getenv('DATABASE_URL', 'postgresql://localhost/digital_land')) Session = sessionmaker(bind=some_engine) session = Session() total = 0 try: if file_url.startswith('http'): f = urlopen(file_url) else: f = open(file_url, 'rb') events = map(floaten, ijson.parse(f)) data = common.items(events, 'features.item') records = [] orgs_to_save = [] processed = set([]) for feature in data: id = feature['properties'].get('feature') item = 'item:%s' % feature['properties'].get('item') publication = feature['properties'].get('publication') feature_id = id if id is not None else item if session.query(Feature).get( feature_id) is None and feature_id not in processed: geo = json.dumps(feature['geometry']) geometry = session.execute(json_to_geo_query % geo).fetchone()[0] if feature_id in org_feature_mappings: org = session.query(Organisation).get( org_feature_mappings[feature_id]) org.feature_id = feature_id orgs_to_save.append(org) records.append( dict(feature=feature_id, data=feature, geometry=geometry, item=item, publication=publication)) processed.add(feature_id) if len(records) % 10000 == 0: session.bulk_insert_mappings(Feature, records) session.bulk_save_objects(orgs_to_save) session.commit() print('Saved', len(records), 'features from', file_url) total += len(records) records = [] orgs_to_save = [] session.bulk_insert_mappings(Feature, records) session.bulk_save_objects(orgs_to_save) session.commit() print('Saved', len(records), 'features from', file_url) total += len(records) print('Finished loading', file_url) except Exception as e: print(e) print('Error loading', file_url) finally: try: f.close() except: pass return 'Loaded total of %d features from %s' % (total, file_url)