def user_frequency_tables(db_source, sample=True, tags_too=False):
    connsourc,connderiv = deriv_db.connect_databases(db_source)
    cursderiv = connderiv.cursor()

    required=['user_genres','user_tags']
    ct = check_tables(cursderiv,required)
    if not ct[0] or not ct[1]:
        for n,r in enumerate(ct):
            if not r: print 'Could not find {} table.'.format(required[n])
        print ('Before calling this function, call user_gt_tables with '
               'path of source database to create necessary tables.')
        return False

    if sample:
        cursderiv.execute('SELECT id FROM sample')
        sample={c[0] for c in cursderiv.fetchall()}

    to_do = [('user_genres','genre_popularity')]
    if tags_too: to_do.append(('user_tags','tag_popularity'))

    for usertab,poptab in to_do:
        cursderiv.execute('SELECT user,most_used FROM {}'.format(usertab))
        if sample:
            strings=[s[1] for s in cursderiv.fetchall() if s[0] in sample]
        else:
            strings=[s[1] for s in cursderiv.fetchall()]

        add_data.create_table(cursderiv,poptab)
        sql=('INSERT INTO {} (string,frequency,rank) '
             'VALUES(?,?,?)'.format(poptab))
        cursderiv.executemany(sql,add_ranks(strings,1))
        connderiv.commit()
def user_gt_tables(db_source):
    connsourc,connderiv = deriv_db.connect_databases(db_source)
    curssourc = connsourc.cursor()
    cursderiv = connderiv.cursor()

    required=['genres','tags']
    ct = check_tables(cursderiv,required)
    if not ct[0] or not ct[1]:
        for n,r in enumerate(ct):
            if not r: print 'Could not find {} table.'.format(required[n])
        print ('Before calling this function, call gt_tables with '
               'path of source database to create necessary tables.')
        return False

    curssourc.execute('SELECT user_id FROM tracks')
    users=set(curssourc.fetchall())

    for colsourc,tabderiv,ranktable in [('genre','user_genres','genres'),
                                        ('tag_list','user_tags','tags')]:
        print 'Now working with: '+ranktable
        add_data.create_table(cursderiv,tabderiv)
        add_data.insert_deriv_data(cursderiv,tabderiv,
                                   deriv_user_data(curssourc,cursderiv,
                                                   users,colsourc,ranktable))
        connderiv.commit()

    return True
def create_uploaders_table(cursderiv):
    add_data.create_table(cursderiv, 'uploaders')
    sql1 = 'SELECT user_id FROM tracks'
    sql2 = 'INSERT INTO uploaders VALUES(?)'
    cursderiv.execute(sql1)
    ups = set(cursderiv.fetchall())
    cursderiv.executemany(sql2, ups)
def user_frequency_tables(db_source, sample=True, tags_too=False):
    connsourc, connderiv = deriv_db.connect_databases(db_source)
    cursderiv = connderiv.cursor()

    required = ['user_genres', 'user_tags']
    ct = check_tables(cursderiv, required)
    if not ct[0] or not ct[1]:
        for n, r in enumerate(ct):
            if not r: print 'Could not find {} table.'.format(required[n])
        print(
            'Before calling this function, call user_gt_tables with '
            'path of source database to create necessary tables.')
        return False

    if sample:
        cursderiv.execute('SELECT id FROM sample')
        sample = {c[0] for c in cursderiv.fetchall()}

    to_do = [('user_genres', 'genre_popularity')]
    if tags_too: to_do.append(('user_tags', 'tag_popularity'))

    for usertab, poptab in to_do:
        cursderiv.execute('SELECT user,most_used FROM {}'.format(usertab))
        if sample:
            strings = [s[1] for s in cursderiv.fetchall() if s[0] in sample]
        else:
            strings = [s[1] for s in cursderiv.fetchall()]

        add_data.create_table(cursderiv, poptab)
        sql = ('INSERT INTO {} (string,frequency,rank) '
               'VALUES(?,?,?)'.format(poptab))
        cursderiv.executemany(sql, add_ranks(strings, 1))
        connderiv.commit()
def create_uploaders_table(cursderiv):
    add_data.create_table(cursderiv,'uploaders')
    sql1='SELECT user_id FROM tracks'
    sql2='INSERT INTO uploaders VALUES(?)'
    cursderiv.execute(sql1)
    ups=set(cursderiv.fetchall())
    cursderiv.executemany(sql2,ups)
def copy_and_process_tracks_table(curssourc,cursderiv):
    add_data.create_table(cursderiv,'tracks')
    sql1='SELECT * FROM tracks'
    sql2='INSERT INTO tracks VALUES({})'.format(('?,'*40)[:-1])
    cursderiv.executemany(sql2,
                          (process_track_datum(t) 
                           for t in curssourc.execute(sql1)))
def create_sample_uploaders_table(cursderiv):
    add_data.create_table(cursderiv,'sample_uploaders')
    sql1='SELECT id FROM sample'
    sql2='SELECT id FROM uploaders'
    sql3='INSERT INTO sample_uploaders VALUES(?)'
    cursderiv.execute(sql1)
    smp=set(cursderiv.fetchall())
    cursderiv.execute(sql2)
    smp_upl=smp & set(cursderiv.fetchall())
    cursderiv.executemany(sql3,smp_upl)
def create_sample_uploaders_table(cursderiv):
    add_data.create_table(cursderiv, 'sample_uploaders')
    sql1 = 'SELECT id FROM sample'
    sql2 = 'SELECT id FROM uploaders'
    sql3 = 'INSERT INTO sample_uploaders VALUES(?)'
    cursderiv.execute(sql1)
    smp = set(cursderiv.fetchall())
    cursderiv.execute(sql2)
    smp_upl = smp & set(cursderiv.fetchall())
    cursderiv.executemany(sql3, smp_upl)
def user_gt_tables(db_source, sample_only=False,tags_too=False):
    connsourc,connderiv = deriv_db.connect_databases(db_source)
    curssourc = connsourc.cursor()
    cursderiv = connderiv.cursor()

    required=['sample','tracks','uploaders','genres','tags']
    if False in check_tables(cursderiv,required):
        return False
   
    if sample_only: 
        cursderiv.execute('SELECT id FROM sample')
        users=cursderiv.fetchall()
        cursderiv.execute('SELECT id FROM uploaders')
        users=list(set(users).intersection(set(cursderiv.fetchall())))
    else: 
        cursderiv.execute('SELECT id FROM uploaders')
        users=cursderiv.fetchall()

    print '{} users to process'.format(len(users))

    to_do = [('genre','user_genres','genres')]
    if tags_too: to_do.append(('tag_list','user_tags','tags'))

    for colsourc,tabderiv,ranktable in to_do:
        print 'Now working with: '+ranktable
        add_data.create_table(cursderiv,tabderiv)
        print 'Fresh {} table created.'.format(colsourc)
        print 'Getting track data.'
        tracks={}
        sql='SELECT user_id,{} FROM tracks'.format(colsourc)
        for t in cursderiv.execute(sql):
            l=split_gt_string(t[1])
            if l[0]:
                try:
                    tracks[t[0]].extend(l)
                except KeyError:
                    tracks[t[0]]=l
        print 'Data loaded in memory.'
        done=0
        while done < len(users):
            to_collect = (user_batch if done+user_batch <= len(users)
                          else len(users)-done)
            this_batch=users[done:done+to_collect]
            print 'Starting on a batch of {} users.'.format(to_collect)
            add_data.insert_deriv_data(cursderiv,tabderiv,
                                       deriv_user_data(tracks,
                                                       cursderiv,this_batch,
                                                       colsourc,ranktable))
            connderiv.commit()
            done+=to_collect
            print '{} done. {} remain.'.format(done,len(users)-done)

    return True
def create_gt_table(curssourc,cursderiv,colsourc,tabderiv):
    add_data.create_table(cursderiv,tabderiv)
    entries = (all_genres(curssourc) if tabderiv=='genres' 
               else all_tags(curssourc))
    l = []
    for e in entries:
        if e[0]: 
            l.extend(strings_from_string(e[0],colsourc))
    sql=('INSERT INTO {} (string,frequency,rank) '
         'VALUES(?,?,?)'.format(tabderiv))
    thresh = (genre_threshold if tabderiv == 'genres' else tag_threshold)
    cursderiv.executemany(sql,add_ranks(l,thresh))
def user_gt_tables(db_source, sample_only=False, tags_too=False):
    connsourc, connderiv = deriv_db.connect_databases(db_source)
    curssourc = connsourc.cursor()
    cursderiv = connderiv.cursor()

    required = ['sample', 'tracks', 'uploaders', 'genres', 'tags']
    if False in check_tables(cursderiv, required):
        return False

    if sample_only:
        cursderiv.execute('SELECT id FROM sample')
        users = cursderiv.fetchall()
        cursderiv.execute('SELECT id FROM uploaders')
        users = list(set(users).intersection(set(cursderiv.fetchall())))
    else:
        cursderiv.execute('SELECT id FROM uploaders')
        users = cursderiv.fetchall()

    print '{} users to process'.format(len(users))

    to_do = [('genre', 'user_genres', 'genres')]
    if tags_too: to_do.append(('tag_list', 'user_tags', 'tags'))

    for colsourc, tabderiv, ranktable in to_do:
        print 'Now working with: ' + ranktable
        add_data.create_table(cursderiv, tabderiv)
        print 'Fresh {} table created.'.format(colsourc)
        print 'Getting track data.'
        tracks = {}
        sql = 'SELECT user_id,{} FROM tracks'.format(colsourc)
        for t in cursderiv.execute(sql):
            l = split_gt_string(t[1])
            if l[0]:
                try:
                    tracks[t[0]].extend(l)
                except KeyError:
                    tracks[t[0]] = l
        print 'Data loaded in memory.'
        done = 0
        while done < len(users):
            to_collect = (user_batch if done + user_batch <= len(users) else
                          len(users) - done)
            this_batch = users[done:done + to_collect]
            print 'Starting on a batch of {} users.'.format(to_collect)
            add_data.insert_deriv_data(
                cursderiv, tabderiv,
                deriv_user_data(tracks, cursderiv, this_batch, colsourc,
                                ranktable))
            connderiv.commit()
            done += to_collect
            print '{} done. {} remain.'.format(done, len(users) - done)

    return True
def create_gt_table(cursderiv,colsourc,tabderiv,users):
    add_data.create_table(cursderiv,tabderiv)
    entries = (all_genres(cursderiv) if tabderiv=='genres' 
               else all_tags(cursderiv))
    l = []
    for e in entries:
        if users and e[0] not in users: pass
        elif e[1]:
            l.extend(split_gt_string(e[1]))
    sql=('INSERT INTO {} (string,frequency,rank) '
         'VALUES(?,?,?)'.format(tabderiv))
    thresh = (genre_threshold if tabderiv == 'genres' else tag_threshold)
    cursderiv.executemany(sql,add_ranks(l,thresh))
def create_gt_table(cursderiv, colsourc, tabderiv, users):
    add_data.create_table(cursderiv, tabderiv)
    entries = (all_genres(cursderiv)
               if tabderiv == 'genres' else all_tags(cursderiv))
    l = []
    for e in entries:
        if users and e[0] not in users: pass
        elif e[1]:
            l.extend(split_gt_string(e[1]))
    sql = ('INSERT INTO {} (string,frequency,rank) '
           'VALUES(?,?,?)'.format(tabderiv))
    thresh = (genre_threshold if tabderiv == 'genres' else tag_threshold)
    cursderiv.executemany(sql, add_ranks(l, thresh))
def go_for_it(to_process_filepath, db_path, to_backup=True):
    conn = sqlite3.connect(db_path)
    curs = conn.cursor()
    if not grs.check_tables(curs, ['tracks'])[0]:
        print 'Creating tracks table.'
        ad.create_table(curs, 'tracks')
    with open(to_process_filepath, 'r') as to_process:
        users = [int(u.strip('\n')) for u in to_process]
    processed_filepath = get_processed_filepath(db_path)
    if not os.path.exists(processed_filepath):
        f = open(processed_filepath, 'w')
        f.close()
    processed = open(processed_filepath, 'r')
    for user in processed:
        user = user.strip('\n')
        if user: users.remove(int(user))
    processed.close()
    processed = open(processed_filepath, 'a')
    print 'There are {} users to munch through. Here we go!'.format(len(users))
    for n, user in enumerate(users):
        if to_backup == True and n % 10000 == 0:
            print 'Backing up...'
            processed.close()
            backup(db_path)
            processed = open(processed_filepath, 'a')
        get_tracks(curs, user)
        processed.write('{}\n'.format(user))
        processed.flush()  # Otherwise, restart becomes unreliable
        conn.commit()
        if n < 10 or n + 1 % 100 == 0: print '{} done.'.format(n + 1)
        if n == 10: print 'Only reporting hundreds from now on.'
    if to_backup:
        print 'Backing up...'
        processed.close()
        backup(db_path)
    print 'And we\'re done.'
    return True
Esempio n. 15
0
def create_new_tracks_table(curs):
    add_data.create_table(curs, 'tracks')
Esempio n. 16
0
def export_data_table(cursor, table_data, table_name):
    logging.info('Creating '+table_name+' table in DB.')
    ad.create_table(cursor, table_name)  # creates table if not existing already
    ad.insert_tuple_data_set_into_DB(cursor, table_name, table_data)
Esempio n. 17
0
def create_tables(curs):
    for tabl in ['users','tracks','comments','x_follows_y',
                 'ids_tried','track_ids_tried','comment_ids_tried','sample']:
        ad.create_table(curs,tabl)
def corpus_table(cursderiv):
    add_data.create_table(cursderiv,'comments_corp')
Esempio n. 19
0
def corpus_table(cursderiv):
    add_data.create_table(cursderiv, 'comments_corp')
def copy_and_process_tracks_table(curssourc, cursderiv):
    add_data.create_table(cursderiv, 'tracks')
    sql1 = 'SELECT * FROM tracks'
    sql2 = 'INSERT INTO tracks VALUES({})'.format(('?,' * 40)[:-1])
    cursderiv.executemany(sql2, (process_track_datum(t)
                                 for t in curssourc.execute(sql1)))
def create_new_tracks_table(curs):
    add_data.create_table(curs,'tracks')
Esempio n. 22
0
def create_tables(curs):
    for tabl in ['users','tracks','comments','x_follows_y',
                 'ids_tried','track_ids_tried','comment_ids_tried','sample']:
        ad.create_table(curs,tabl)
Esempio n. 23
0
 def create_table(self,table_name):
     ad.create_table(self.curs,table_name)
Esempio n. 24
0
def faves_table(cursderiv):
    add_data.create_table(cursderiv, 'x_faves_work_of_y')
def copy_sample_table(curssourc, cursderiv):
    add_data.create_table(cursderiv, 'sample')
    sql1 = 'SELECT id FROM sample'
    sql2 = 'INSERT INTO sample VALUES(?)'
    cursderiv.executemany(sql2, curssourc.execute(sql1))
Esempio n. 26
0
 def create_table(self,table_name):
     ad.create_table(self.curs,table_name)
def create_tables(curs):
    for tabl in ['users', 'tracks', 'comments']:
        ad.create_table(curs, tabl)
def faves_table(cursderiv):
    add_data.create_table(cursderiv,'x_faves_work_of_y')
def create_tables(curs):
    for tabl in ['users','tracks','comments']:
        ad.create_table(curs,tabl)
def copy_sample_table(curssourc,cursderiv):
    add_data.create_table(cursderiv,'sample')
    sql1='SELECT id FROM sample'
    sql2='INSERT INTO sample VALUES(?)'
    cursderiv.executemany(sql2,curssourc.execute(sql1))