def attach_all_geotag(count=100): mdb_repos = MicroDB(jsons_dir+'repos.json', partition_keys=['username', ]) mdb_geotag = MicroDB(jsons_dir+'geotag.json', partition_keys=['username', ]) sorted_usernames_by_priotity = sort_by_priotity(mdb_repos, mdb_geotag) for username in tqdm(sorted_usernames_by_priotity[:count]): update(username, mdb_geotag) i = 0 for d in mdb_repos.all(): geotag = mdb_geotag.get(d) if geotag is None: i += 1 print(i, d)
def exact_yet_stared_succeed_repos(): mdb_repos = MicroDB(jsons_dir + 'repos.json', partition_keys=[ 'full_name', ]) mdb_gifs = MicroDB(jsons_dir + 'gifs.json', partition_keys=[ 'full_name', ]) yet_stared_succeed_repos = [] for d in mdb_repos.all(): gifjson = mdb_gifs.get(d) if gifjson['success'] and d['stargazers_count'] == 0: yet_stared_succeed_repos.append(d) return yet_stared_succeed_repos
print('mdb_repos', len(mdb_repos)) mdb_geotags = MicroDB(jsons_dir + 'geotag.json', partition_keys=[ 'username', ]) print('mdb_geotags', len(mdb_geotags)) mdb_gifs = MicroDB(jsons_dir + 'gifs.json', partition_keys=[ 'full_name', ]) print('mdb_gifs', len(mdb_gifs)) mdb_skills = MicroDB(jsons_dir + 'skills.json', partition_keys=[ 'username', ]) print('mdb_skills', len(mdb_skills)) merged_db = [] for d in mdb_repos.all(): gif_json = mdb_gifs.get({'full_name': d['full_name']}) geotag_json = mdb_geotags.get({'username': d['username']}, mdb_geotags.get( {'username': d['username'].lower()})) d['gif_path'] = gif_json['filepath'] if d['gif_path']: d['gif_path'] = d['gif_path'].replace(htmls_root_dir, f'/thumbnailed-{topic}s/') d['gif_success'] = gif_json['success'] try: d['geotags'] = geotag_json['geotags'] except Exception as e: print(d, geotag_json) raise d['homepage_exist'] = bool(d['homepage']) d['skills'] = mdb_skills.get(d, {}).get('skills', list())