def attach_all_geotag(count=100):
    mdb_repos = MicroDB(jsons_dir+'repos.json', partition_keys=['username', ])
    mdb_geotag = MicroDB(jsons_dir+'geotag.json', partition_keys=['username', ])
    sorted_usernames_by_priotity = sort_by_priotity(mdb_repos, mdb_geotag)
    for username in tqdm(sorted_usernames_by_priotity[:count]):
        update(username, mdb_geotag)
    i = 0

    for d in mdb_repos.all():
        geotag = mdb_geotag.get(d)
        if geotag is None:
            i += 1
            print(i, d)
Beispiel #2
0
def exact_yet_stared_succeed_repos():
    mdb_repos = MicroDB(jsons_dir + 'repos.json',
                        partition_keys=[
                            'full_name',
                        ])
    mdb_gifs = MicroDB(jsons_dir + 'gifs.json', partition_keys=[
        'full_name',
    ])
    yet_stared_succeed_repos = []
    for d in mdb_repos.all():
        gifjson = mdb_gifs.get(d)
        if gifjson['success'] and d['stargazers_count'] == 0:
            yet_stared_succeed_repos.append(d)
    return yet_stared_succeed_repos
print('mdb_repos', len(mdb_repos))
mdb_geotags = MicroDB(jsons_dir + 'geotag.json', partition_keys=[
    'username',
])
print('mdb_geotags', len(mdb_geotags))
mdb_gifs = MicroDB(jsons_dir + 'gifs.json', partition_keys=[
    'full_name',
])
print('mdb_gifs', len(mdb_gifs))
mdb_skills = MicroDB(jsons_dir + 'skills.json', partition_keys=[
    'username',
])
print('mdb_skills', len(mdb_skills))
merged_db = []
for d in mdb_repos.all():
    gif_json = mdb_gifs.get({'full_name': d['full_name']})
    geotag_json = mdb_geotags.get({'username': d['username']},
                                  mdb_geotags.get(
                                      {'username': d['username'].lower()}))
    d['gif_path'] = gif_json['filepath']
    if d['gif_path']:
        d['gif_path'] = d['gif_path'].replace(htmls_root_dir,
                                              f'/thumbnailed-{topic}s/')
    d['gif_success'] = gif_json['success']
    try:
        d['geotags'] = geotag_json['geotags']
    except Exception as e:
        print(d, geotag_json)
        raise
    d['homepage_exist'] = bool(d['homepage'])
    d['skills'] = mdb_skills.get(d, {}).get('skills', list())