def get_it_by_ay(author,year, delta=2): """ Get arxiv_id (i) and title (t) of all papers from author (a) in in year (y) + [-delta..0] """ con = lite.connect(meta_db) recs = [] with con: cur = con.cursor() cur.execute("SELECT arxiv_id, title FROM ayit_lookup WHERE author='%s' AND '%d' <= year AND year <= '%d' " % (author,year-delta,year)) recs = [ (to_ascii(x_id), to_ascii(x_title)) for x_id,x_title in cur.fetchall()] return recs
def get_it_by_ay(author, year, delta=2): """ Get arxiv_id (i) and title (t) of all papers from author (a) in in year (y) + [-delta..0] """ con = lite.connect(meta_db) recs = [] with con: cur = con.cursor() cur.execute( "SELECT arxiv_id, title FROM ayit_lookup WHERE author='%s' AND '%d' <= year AND year <= '%d' " % (author, year - delta, year)) recs = [(to_ascii(x_id), to_ascii(x_title)) for x_id, x_title in cur.fetchall()] return recs
def cleanup_rec(string): ''' converst to ascii and removes '\n' and '|' from string ''' if not 'rx' in dir(cleanup_rec): cleanup_rec.rx = re.compile(r'[\n|]') return cleanup_rec.rx.sub('', to_ascii(string))
def get_author_count_dict(limit=1000): """ Count number of paper by authors """ author_count = defaultdict(int) con = lite.connect(meta_db) with con: cur = con.cursor() cur.execute("SELECT author FROM ayit_lookup LIMIT %d" % limit) for author in cur.fetchall(): author_count[to_ascii(author[0])] += 1 return author_count