def dump_orgs(): orgs = load_utf_json(RAW_JSON_FNAME) for org in orgs: for rubric in [TARGETS, ACTIVITIES, PROJECTS, SERVICES]: item = org[rubric] if item: org[rubric] = str_to_list(item) raw_codes = org[CODES] if raw_codes: codes = str_to_list(raw_codes) org[CODES] = codes org[OGRN] = find_code(codes, r'ОГРН') org[INN] = find_code(codes, r'ИНН') org[KPP] = find_code(codes, r'КПП') org[OKPO] = find_code(codes, r'ОКПО') org[OKATO] = find_code(codes, r'ОКАТО') else: org[OGRN], org[INN], org[KPP], org[OKPO], org[ OKATO] = None, None, None, None, None if org[SOURCE] == 'http://nko71.ru/katalog-nko/nko-po-uslugam/sotsialnaya-pomoshch-i-podderzhka/nasledie.html': org[OGRN] = '1097100001129' if org[SOURCE] == 'http://nko71.ru/katalog-nko/nko-po-gruppam-naseleniya/zhenshchiny-semi-s-detmi/soyuz-' +\ 'pravoslavnykh-zhenshchin.html': org[INN] = '7116511663' org[KPP] = '711601001' dump_utf_json(orgs, JSON_FNAME)
def dump_raw_orgs(): orgs = list() urls = load_utf_json(URL_JSON_FNAME) ind = -1 for url in urls: ind += 1 org = scrape_org(url) print(ind) print(org[ORGNAME]) print() orgs.append(org) dump_utf_json(orgs, RAW_JSON_FNAME)
def add_fields(source_json, target_json=None, **fields): if not target_json: target_json = source_json entries = load_utf_json(source_json) total = len(entries) count = 0 for entry in entries: count += 1 print("\r{} / {}".format(count, total), end='', flush=True) for fieldname, val in fields.items(): entry[fieldname] = val print("\nDumping...") dump_utf_json(entries, target_json)
def check_codes(): ind = 0 orgs = load_utf_json(JSON_FNAME) for org in orgs: ind += 1 print(ind) print(org[SOURCE]) print(OGRN, org[OGRN]) print(INN, org[INN]) print(KPP, org[KPP]) print(OKPO, org[OKPO]) print(OKATO, org[OKATO]) print()
def check_orgs(num): orgs = load_utf_json(JSON_FNAME) rand_inds = (random.randrange(0, len(orgs)) for _ in range(num)) ind = -1 for rand_ind in rand_inds: ind += 1 print(ind, '-', rand_ind) rand_org = orgs[rand_ind] for field in MAPPER: print(field) print(rand_org[field]) print() print() print('************') print()
def duplicate_entry(source_json, target_json=None, **fields): if not target_json: target_json = source_json entries = load_utf_json(source_json) total = len(entries) for indx in range(len(entries)): count = indx + 1 entry = entries[indx] print("\r{} / {}".format(count, total), end='', flush=True) for fieldname, val in fields.items(): if entry[fieldname] != val: break else: entries.insert(count, entry) print("\nFound at {}, inserted at {}!".format(indx, count)) print("Dumping...") dump_utf_json(entries, target_json) return print("\nNo such entry!")