def most_referenced(sample, amount):
    with open('data/02-refined/%s-rows.json' % sample, 'rU') as f:
        rows = json.load(f)
        for row in rows:
            # pprint(row.keys())
            for author in row['article']['authors']:
                if not authors.has_key(author):
                    authors[author] = {'articles': 0, 'referenced': 0}
                authors[author]['articles'] += 1

            for reference in row['article']['referencelist']:
                for author in reference['authors']:
                    if not authors.has_key(author):
                        authors[author] = {'articles': 0, 'referenced': 0}
                authors[author]['referenced'] += 1

            pprint(author)

    with open('data/03-stats/%s-top-references.csv' % sample, 'wb') as f:
        writer = CSVUnicodeWriter(f)
        for row in rows:
            for reference in row['article']['referencelist']:
                for author in reference['authors']:
                    for top_author, stats in sorted(
                            authors.items(),
                            key=lambda x: x[1]['referenced'],
                            reverse=True)[:amount]:
                        if author == top_author:
                            writer.writerow([
                                ','.join(reference['authors']),
                                reference['year'], reference['original']
                            ])
def references_by_authors_db(sample, authors):
    f = open('data/03-stats/%s-top-references.csv' % sample, 'wb')
    writer = CSVUnicodeWriter(f)

    for author, stats in authors:
        print author
        for reference in db.references.find({
                'articleset': sample,
                'authors': author
        }):
            # pprint(reference)
            writer.writerow([
                ','.join(reference['authors']), reference['year'],
                reference['original']
            ])

    f.close()