def most_referenced(sample,amount): with open('data/02-refined/%s-rows.json' % sample, 'rU') as f: rows = json.load(f) for row in rows: # pprint(row.keys()) for author in row['article']['authors']: if not authors.has_key(author): authors[author] = {'articles': 0, 'referenced': 0} authors[author]['articles'] += 1 for reference in row['article']['referencelist']: for author in reference['authors']: if not authors.has_key(author): authors[author] = {'articles': 0, 'referenced': 0} authors[author]['referenced'] += 1 pprint(author) with open('data/03-stats/%s-top-references.csv' % sample, 'wb') as f: writer = CSVUnicodeWriter(f) for row in rows: for reference in row['article']['referencelist']: for author in reference['authors']: for top_author,stats in sorted(authors.items(), key=lambda x: x[1]['referenced'],reverse=True)[:amount]: if author == top_author: writer.writerow([','.join(reference['authors']),reference['year'],reference['original']])
def most_referenced(sample, amount): with open('data/02-refined/%s-rows.json' % sample, 'rU') as f: rows = json.load(f) for row in rows: # pprint(row.keys()) for author in row['article']['authors']: if not authors.has_key(author): authors[author] = {'articles': 0, 'referenced': 0} authors[author]['articles'] += 1 for reference in row['article']['referencelist']: for author in reference['authors']: if not authors.has_key(author): authors[author] = {'articles': 0, 'referenced': 0} authors[author]['referenced'] += 1 pprint(author) with open('data/03-stats/%s-top-references.csv' % sample, 'wb') as f: writer = CSVUnicodeWriter(f) for row in rows: for reference in row['article']['referencelist']: for author in reference['authors']: for top_author, stats in sorted( authors.items(), key=lambda x: x[1]['referenced'], reverse=True)[:amount]: if author == top_author: writer.writerow([ ','.join(reference['authors']), reference['year'], reference['original'] ])
def references_by_authors_db(sample,authors): f = open('data/03-stats/%s-top-references.csv' % sample, 'wb') writer = CSVUnicodeWriter(f) for author,stats in authors: print author for reference in db.references.find({ 'articleset' : sample, 'authors' : author }): # pprint(reference) writer.writerow([','.join(reference['authors']),reference['year'],reference['original']]) f.close()
def references_by_authors_db(sample, authors): f = open('data/03-stats/%s-top-references.csv' % sample, 'wb') writer = CSVUnicodeWriter(f) for author, stats in authors: print author for reference in db.references.find({ 'articleset': sample, 'authors': author }): # pprint(reference) writer.writerow([ ','.join(reference['authors']), reference['year'], reference['original'] ]) f.close()