def test(): if os.path.exists(filename): os.remove(filename) mdb = MicroDB(filename, testpartition_keys) mdb.erase_all() mdb = MicroDB(filename, testpartition_keys) for d in test_data: mdb.upsert(d) mdb.save() mdb.pprint_all() mdb2 = MicroDB(filename, testpartition_keys) for d in mdb2.all(): print(d) mdb2.save_as_grid() mdb3 = MicroDB(filename, testpartition_keys) for d in mdb3.all(): print(d) mdb4 = MicroDB(filename, testpartition_keys) mdb4.upsert({ 'job': 'study', 'name': 'Bob', 'status': 'undone', 'extra-info': 'hogehoge' }) try: mdb4.save_as_grid() except Exception as e: print(e) mdb4.save() mdb4
def attach_all_geotag(count=100): mdb_repos = MicroDB(jsons_dir+'repos.json', partition_keys=['username', ]) mdb_geotag = MicroDB(jsons_dir+'geotag.json', partition_keys=['username', ]) sorted_usernames_by_priotity = sort_by_priotity(mdb_repos, mdb_geotag) for username in tqdm(sorted_usernames_by_priotity[:count]): update(username, mdb_geotag) i = 0 for d in mdb_repos.all(): geotag = mdb_geotag.get(d) if geotag is None: i += 1 print(i, d)
def exact_yet_stared_succeed_repos(): mdb_repos = MicroDB(jsons_dir + 'repos.json', partition_keys=[ 'full_name', ]) mdb_gifs = MicroDB(jsons_dir + 'gifs.json', partition_keys=[ 'full_name', ]) yet_stared_succeed_repos = [] for d in mdb_repos.all(): gifjson = mdb_gifs.get(d) if gifjson['success'] and d['stargazers_count'] == 0: yet_stared_succeed_repos.append(d) return yet_stared_succeed_repos
def del_wrong_data(): mdb_gifs = MicroDB(jsons_dir + 'gifs.json', partition_keys=[ 'full_name', ]) del_fullnames = [] for d in mdb_gifs.all(): if isinstance(d['success'], str): del_fullnames.append(d) print(d) print(len(del_fullnames)) print(del_fullnames) for del_fullname in del_fullnames: key = mdb_gifs.gen_key(del_fullname) print(key) del mdb_gifs[key] mdb_gifs.save()
]) print('mdb_repos', len(mdb_repos)) mdb_geotags = MicroDB(jsons_dir + 'geotag.json', partition_keys=[ 'username', ]) print('mdb_geotags', len(mdb_geotags)) mdb_gifs = MicroDB(jsons_dir + 'gifs.json', partition_keys=[ 'full_name', ]) print('mdb_gifs', len(mdb_gifs)) mdb_skills = MicroDB(jsons_dir + 'skills.json', partition_keys=[ 'username', ]) print('mdb_skills', len(mdb_skills)) merged_db = [] for d in mdb_repos.all(): gif_json = mdb_gifs.get({'full_name': d['full_name']}) geotag_json = mdb_geotags.get({'username': d['username']}, mdb_geotags.get( {'username': d['username'].lower()})) d['gif_path'] = gif_json['filepath'] if d['gif_path']: d['gif_path'] = d['gif_path'].replace(htmls_root_dir, f'/thumbnailed-{topic}s/') d['gif_success'] = gif_json['success'] try: d['geotags'] = geotag_json['geotags'] except Exception as e: print(d, geotag_json) raise d['homepage_exist'] = bool(d['homepage'])