def write_posts_to_db(tracks_df, _p_):
    # add entries
    tracks_list = tracks_df.to_dict(orient = 'records')
    if _p_['tracks_or_reposts'] == 'tracks':
        for track in tracks_list:
            t_p = '{0}/{1}'.format(track['user_permalink'], track['track_permalink'])
            login_mongo('auxsauce')[_p_['tracks_or_reposts']].update_one({'_id': t_p}, update = {'$addToSet': {'updates': track}, '$set': {'dt_last_update': int(time()), 'current': track}}, upsert = True)
    else: print('havent gotten here yet'); exit()
Exemple #2
0
def add_to_drop_list_and_remove(perma):
    # add to drop list
    login_mongo("auxsauce")['dropped_profiles'].update_one(
        filter={'permalink': perma},
        update={'$set': {
            'permalink': perma
        }},
        upsert=True)

    # remove from all profiles
    login_mongo("auxsauce")['all_profiles'].delete_one({'permalink': perma})
Exemple #3
0
def delete_column_from_collection(db_name, coll_name, drop_col_list):
    yn = '0'
    while yn.lower() != 'yes':
        yn = input(
            'would you like to delete the column(s) "{0}" in collection "{1}/{2}"?: '
            .format(str(drop_col_list), db_name, coll_name))
    from pandas import DataFrame

    coll = login_mongo(db_name)[coll_name]
    if 1 == 0:
        coll_list = list(coll.find({}))
        coll_df = DataFrame(coll_list)
        coll_df.drop(drop_col_list, axis=1, inplace=True)
        coll_list_new = coll_df.to_dict(orient='records')
        coll.drop()
        coll.insert_many(coll_list_new)
    else:
        import numpy as np
        coll_list = list(coll.find({drop_col_list: {'$not': {'$type': 0}}}))
        print(len(coll_list))

        for ii, _profile_ in enumerate(coll_list):
            print('{0}/{1}'.format(ii, len(coll_list)),
                  _profile_[drop_col_list])
            coll.update_one({'_id': _profile_['_id']},
                            {'$unset': {
                                drop_col_list: ''
                            }})
Exemple #4
0
def check_coll():
    from pandas import DataFrame
    t_coll = login_mongo('auxsauce')['tracks']
    t_list = list(
        t_coll.find(
            {'_id': '100percentpurerecords/luca-morris-mozzy-rekorder'}))
    t_df = DataFrame(t_list[0]['updates'])
    print('\n', t_list, '\n', t_df)
Exemple #5
0
def show_db_stats():
    from pandas import DataFrame
    all_colls = login_mongo('auxsauce').collection_names()

    # active tracks, total tracks, active profiles, total profiles
    for coll_name in all_colls:
        if 1 == 0:
            if coll_name != 'tracks': continue
        print('\n\n', coll_name)
        db = login_mongo('auxsauce')
        coll = login_mongo('auxsauce')[coll_name]
        coll_ct = coll.count()
        coll_mb = db.command({
            'collstats': coll_name,
            'scale': 1024 * 1024
        })['size']

        # check the last update time
        if coll_name == 'all_profiles':
            coll_list = list(
                coll.find({'dt_updated': {
                    '$gt': 1551956071
                }}, {'_id': 1}))
            coll_df = DataFrame(list(DataFrame(coll_list)['_id'].values))
            print(coll_df.shape)

        # check the song-update frequency
        if coll_name == 'tracks' and 0 == 1:
            from pandas import to_datetime
            coll_list = list(coll.find({}).limit(5))
            coll_df = DataFrame(coll_list)
            coll_np = DataFrame(coll_list)
            print(coll_df)
            exit()
            if 1 == 0: coll_df['dt'] = to_datetime(coll_df['_id'], unit='s')
            coll_df['delta'] = (coll_df['_id'] -
                                coll_df['_id'].shift(1)).fillna(0).astype(int)
            mean_list = [{
                'past {0} mean update time (s)'.format(_t_):
                round(coll_df['delta'].iloc[-_t_:].mean(), 2)
            } for _t_ in [1, 2, 5, 10]]

            print(mean_list)
            exit()

        print(coll_ct, coll_mb)
Exemple #6
0
def delete_collection(db, collection):
    yn = '0'
    while yn.lower() != 'yes':
        yn = input(
            'would you like to delete the collection "{0}/{1}"?: '.format(
                db, collection))

    coll = login_mongo(db)[collection]
    coll.drop()
Exemple #7
0
def create_constant_column_in_collection(db, collection, col_name_f, constant):
    from pandas import DataFrame
    coll_cursor = login_mongo(db)[collection]
    coll_list = list(coll_cursor.find({}))
    coll_df = DataFrame(coll_list)
    coll_df[col_name_f] = constant
    coll_list_new = coll_df.to_dict(orient='records')
    coll_cursor.drop()
    coll_cursor.insert_many(coll_list_new)
Exemple #8
0
def check_coll_arch():
    from pandas import DataFrame
    t_coll = login_mongo('auxsauce')['active_tracks_archive']
    #t_list = list(t_coll.find({'_id': '100percentpurerecords/luca-morris-mozzy-rekorder'}))
    t_list = list(t_coll.find({}))

    if len(t_list) == 0: print('search results are empty')
    print((t_list))
    exit()
    t_df = DataFrame(t_list[0]['updates'])
    print('\n', t_list, '\n', t_df)
def pull_active_profiles(_p_):
    from pandas import DataFrame
    from time   import time 
    drop_list = ['choraltracks', 'family-stations-inc', 'radio_rural', 'feiyr', 'rinsefm', 'thismorningshow', 'kcrw', 'officialsxsw', 'meditations']
    cursor = list(login_mongo('auxsauce')['active_profiles_{0}'.format(_p_['tracks_or_reposts'])].find({}))

    _ap_df_ = DataFrame(cursor)
    _ap_df_ = _ap_df_[~_ap_df_['permalink'].isin(drop_list)]

    # sort the dataframe
    return _ap_df_.sort_values('dt_last_accessed')
Exemple #10
0
def filter_coll(db, collection):
    from pandas import DataFrame
    coll_cursor = login_mongo(db)[collection]

    # filter out conditions
    before_size = coll_cursor.count()
    #coll_cursor.delete_many({'$lt': {'past_year_track_ct': 0, 'past_30d_repost_ct': 5}})
    coll_cursor.delete_many({
        '$and': [{
            'current.past_year_track_ct': {
                '$eq': 0
            }
        }, {
            'current.past_30d_repost_ct': {
                '$lte': 5
            }
        }]
    })
    after_size = coll_cursor.count()
    print('before size: {0}     after size: {1}'.format(
        before_size, after_size))
    if _reposts_ != []:
        rep_coll = _aux_db_['reposts']
        rep_coll.update_one(filter={'_id': _prof_['_id']},
                            update={
                                '$set': {
                                    'dt_last_update': int(time()),
                                    'reposts': _reposts_
                                }
                            },
                            upsert=True)


while True:
    # load collection
    t = time()
    aux_db = login_mongo('auxsauce')
    pc_coll = aux_db['profiles_current']
    pu_coll = aux_db['profiles_updates']

    if 1 == 0:
        reposts_db_list = list(aux_db['reposts'].find({'_id': 'therealstyme'},
                                                      {
                                                          '_id': 0,
                                                          'reposts': 1
                                                      }))
        rdbl = reposts_db_list[0]['reposts'][0]
        print(DataFrame(rdbl))
        print(len(rdbl))
        print(type(rdbl))
        exit()
    min_access_profiles_df = profiles_df[profiles_df['access_ct'] <= min_access_ct]
    print(min_access_profiles_df.shape, min_access_ct)
    least_recent_list = sorted(min_access_profiles_df.to_dict('records'), key = itemgetter('permalink'))
    
    # shuffle list
    from random import shuffle
    shuffle(least_recent_list)

    for ct, profile in enumerate(least_recent_list[: 1000]):
        print('\n\n{0}/{1}'.format(ct, len(least_recent_list)), profile['permalink'])
        t = time()
        params['loop_start'] = time()

        # get the df
        t = time()
        ap_tr_coll = login_mongo('auxsauce')['active_profiles_{0}'.format(t_r)].find({'permalink': profile['permalink']})
        ap_tr_list = list(ap_tr_coll)
        try: prof_access_ct = ap_tr_list[0]['access_ct']
        except IndexError: print('index error'); print(ap_tr_list, profile); exit()
        print('get cursors:       {0:.4f} s'.format(time() - t))

        profiles_remaining_ct = login_mongo('auxsauce')['active_profiles_{0}'.format(params['tracks_or_reposts'])].find({'access_ct': min_access_ct}).count()
        if profiles_remaining_ct == 0: exit()
        if min_access_ct != prof_access_ct: print('SKIPPING:   {0}\n\n'.format(profile['permalink'])); continue

        # write the access dt to the active profile db coll (currently i have it saved so that profs are in a list)
        t = time()
        write_profile_access_dt(profile, params)
        print('write profile access:       {0:.4f} s'.format(time() - t))

        # scrape the most recent songs from a profile, for now limit to a month
Exemple #13
0
def migrate_db(db_name_from, db_name_to, port_no_from=27018, port_no_to=27017):
    all_colls = login_mongo(db_name_from).collection_names()
Exemple #14
0
def column_to_columns(column_name_from, coll_name, db_name='auxsauce'):
    from pandas import DataFrame

    # load collection
    coll = login_mongo(db_name)[coll_name]
Exemple #15
0
def migrate_column(column_name_from,
                   column_name_to,
                   coll_name_from,
                   coll_name_to,
                   db_name_from='auxsauce',
                   db_name_to='auxsauce'):
    from pandas import DataFrame

    # get all reposts from orig coll
    from_coll = login_mongo(db_name_from)[coll_name_from]
    to_coll = login_mongo(db_name_to)[coll_name_to]
    print('\n\nfrom: ', from_coll, '\nto: ', to_coll)
    from_coll_ct = int(from_coll.count())
    if 1 == 0:
        all_colls = login_mongo(db_name_from).collection_names()
        for i in all_colls:
            print(i)
        exit()

    # load records from original collection
    empty = 0
    while empty == 0:
        if 1 == 0:
            for i in (list(from_coll.find().limit(1))[0]):
                print(i)
            exit()
        db_from_list = list(
            from_coll.find(
                {
                    column_name_from: {
                        '$exists': True
                    },
                    '_id': {
                        '$regex': '^a'
                    }
                }, {
                    '_id': 1,
                    column_name_from: 1
                }).limit(100))
        empty = len(db_from_list) == 0
        #print(empty)
        for ct, profile in enumerate(db_from_list):
            print(profile['_id'], '  {0}/{1}'.format(ct, len(db_from_list)))
            #profile = profile['current']
            profile_id = profile.pop('_id')

            # place new field in new col
            #to_coll.update_one({'_id': profile['_id']}, {'$set': {column_name_to: profile[column_name_to]}}, upsert = True)
            '''from_coll.update_one({'_id': profile_id, column_name_from: {'$exists': True}}, {'$unset': {column_name_from: True}})
            obj = list(from_coll.find({'_id': profile_id}).limit(1))
            print(obj)
            exit()'''
            to_coll.update_one({'_id': profile_id}, {'$set': profile},
                               upsert=True)

            # remove orig field from orig col
            #from_coll.update_one({'_id': profile['_id'], column_name_from: {'$exists': True}}, {'$unset': {column_name_from: True}})
            #from_coll.update_one({'_id': profile_id, column_name_from: {'$exists': True}}, {'$unset': {column_name_from: True}})
            #from_coll.update_one({'_id': profile_id, column_name_from: {'$exists': True}}, {'$unset': {column_name_from: True}})

        to_coll_ct = int(to_coll.count())
        print(to_coll_ct, '/', from_coll_ct, '\n\n')
Exemple #16
0
def rename_collection(orig_coll_name, new_coll_name, db_name='auxsauce'):
    aux_db = login_mongo(db_name)
    orig_coll = aux_db[orig_coll_name]
    orig_coll.rename(new_coll_name)
def delete_profile_by_perma(_profile_):
    login_mongo("auxsauce")['profiles_current'].delete_one(
        {'_id': _profile_['_id']})
    login_mongo("auxsauce")['profiles_updates'].delete_one(
        {'_id': _profile_['_id']})
    print('deleted profile: {0}'.format(_profile_['_id']))
def write_profile_access_dt(_prof_, _p_):
    t_r = _p_['tracks_or_reposts']
    _prof_['dt_last_accessed'] = int(time())
    _prof_['access_ct'] = prof_access_ct + 1
    login_mongo('auxsauce')['active_profiles_{0}'.format(t_r)].update_one(filter = {'permalink': _prof_['permalink']}, update = {'$set': {'access_ct': _prof_['access_ct']}})