def test_get_basic_stats_data(self): '''Test getting basic stats data''' from models import get_basic_stats_data data = get_basic_stats_data(testset) # The most important thing here is to test that it is a list # of MetricsModel instances self.assertEqual(isinstance(data, list), True) self.assertTrue(False not in [x.__class__.__name__ == 'MetricsModel' for x in data])
def test_get_basic_stats_data(self): '''Test getting basic stats data''' from models import get_basic_stats_data data = get_basic_stats_data(testset) # The most important thing here is to test that it is a list # of MetricsModel instances self.assertEqual(isinstance(data, list), True) self.assertTrue( False not in [x.__class__.__name__ == 'MetricsModel' for x in data])
def get_basic_stats(identifiers): # basic stats for all publications bs = {} # basic stats for refereed publications` bsr = {} # Get the data to calculate the basic stats data = get_basic_stats_data(identifiers) # First get the number of (refereed) papers bs['number of papers'] = len(identifiers) bsr['number of papers'] = len([p for p in data if p.refereed]) # Next get the (refereed) normalized paper count bs['normalized paper count'] = np.sum( np.array([1.0 / float(p.author_num) for p in data])) bsr['normalized paper count'] = np.sum( np.array([1.0 / float(p.author_num) for p in data if p.refereed])) # Get the total number of reads year = datetime.now().year Nentries = year - 1996 + 1 reads = [p.reads for p in data if p.reads and len(p.reads) == Nentries] reads_ref = [ p.reads for p in data if p.refereed and p.reads and len(p.reads) == Nentries ] reads_totals = [sum(r) for r in reads] reads_ref_totals = [sum(r) for r in reads_ref] bs['total number of reads'] = np.sum(reads_totals or [0]) bsr['total number of reads'] = np.sum(reads_ref_totals or [0]) # Get the average number of reads bs['average number of reads'] = np.mean(reads_totals or [0]) bsr['average number of reads'] = np.mean(reads_ref_totals or [0]) # Get the median number of reads bs['median number of reads'] = np.median(reads_totals or [0]) bsr['median number of reads'] = np.median(reads_ref_totals or [0]) # Get the normalized number of reads # bs['normalized number of reads'] = \ # np.sum([np.array(p.reads)/float(p.author_num) # for p in data if p.reads and len(p.reads) == Nentries]) # bsr['normalized number of reads'] = \ # sum([p.reads[-1] for p in data if p.refereed and # p.reads and len(p.reads) == Nentries]) # and finally, get the recent reads bs['recent number of reads'] = sum( [p.reads[-1] for p in data if p.reads and len(p.reads) == Nentries]) bsr['recent number of reads'] = sum([ p.reads[-1] for p in data if p.refereed and p.reads and len(p.reads) == Nentries ]) # Do the same for the downloads downloads = [ p.downloads for p in data if p.downloads and len(p.downloads) == Nentries ] downloads_ref = [ p.downloads for p in data if p.refereed and p.downloads and len(p.downloads) == Nentries ] downloads_totals = [sum(d) for d in downloads] downloads_ref_totals = [sum(d) for d in downloads_ref] bs['total number of downloads'] = np.sum(downloads_totals or [0]) bsr['total number of downloads'] = np.sum(downloads_ref_totals or [0]) # Get the average number of downloads bs['average number of downloads'] = np.mean(downloads_totals or [0]) bsr['average number of downloads'] = np.mean(downloads_ref_totals or [0]) # Get the median number of downloads bs['median number of downloads'] = np.median(downloads_totals or [0]) bsr['median number of downloads'] = np.median(downloads_ref_totals or [0]) # Get the normalized number of downloads # bs['normalized number of downloads'] = \ # np.sum([np.array(p.downloads)/float(p.author_num) for p in data if # p.downloads and len(p.downloads) == Nentries]) # bsr['normalized number of downloads'] = \ # np.sum([np.array(p.downloads)/float(p.author_num) for p in data if # p.refereed and p.downloads and len(p.downloads) == Nentries]) # and finally, get the recent number of downloads bs['recent number of downloads'] = sum([ p.downloads[-1] for p in data if p.downloads and len(p.downloads) == Nentries ]) bsr['recent number of downloads'] = sum([ p.downloads[-1] for p in data if p.refereed and p.downloads and len(p.downloads) == Nentries ]) # Return both results and the data (which will get used later on # if the usage histograms are required) return bs, bsr, data
def get_basic_stats(identifiers): # basic stats for all publications bs = {} # basic stats for refereed publications` bsr = {} # Get the data to calculate the basic stats data = get_basic_stats_data(identifiers) # First get the number of (refereed) papers bs['number of papers'] = len(identifiers) bsr['number of papers'] = len([p for p in data if p.refereed]) # Next get the (refereed) normalized paper count bs['normalized paper count'] = np.sum( np.array([1.0 / float(p.author_num) for p in data])) bsr['normalized paper count'] = np.sum( np.array([1.0 / float(p.author_num) for p in data if p.refereed])) # Get the total number of reads year = datetime.now().year Nentries = year - 1996 + 1 reads = [p.reads for p in data if p.reads and len(p.reads) == Nentries] reads_ref = [ p.reads for p in data if p.refereed and p.reads and len(p.reads) == Nentries] reads_totals = [sum(r) for r in reads] reads_ref_totals = [sum(r) for r in reads_ref] bs['total number of reads'] = np.sum(reads_totals or [0]) bsr['total number of reads'] = np.sum(reads_ref_totals or [0]) # Get the average number of reads bs['average number of reads'] = np.mean(reads_totals or [0]) bsr['average number of reads'] = np.mean(reads_ref_totals or [0]) # Get the median number of reads bs['median number of reads'] = np.median(reads_totals or [0]) bsr['median number of reads'] = np.median(reads_ref_totals or [0]) # Get the normalized number of reads # bs['normalized number of reads'] = \ # np.sum([np.array(p.reads)/float(p.author_num) # for p in data if p.reads and len(p.reads) == Nentries]) # bsr['normalized number of reads'] = \ # sum([p.reads[-1] for p in data if p.refereed and # p.reads and len(p.reads) == Nentries]) # and finally, get the recent reads bs['recent number of reads'] = sum( [p.reads[-1] for p in data if p.reads and len(p.reads) == Nentries]) bsr['recent number of reads'] = sum( [p.reads[-1] for p in data if p.refereed and p.reads and len(p.reads) == Nentries]) # Do the same for the downloads downloads = [ p.downloads for p in data if p.downloads and len(p.downloads) == Nentries] downloads_ref = [p.downloads for p in data if p.refereed and p.downloads and len(p.downloads) == Nentries] downloads_totals = [sum(d) for d in downloads] downloads_ref_totals = [sum(d) for d in downloads_ref] bs['total number of downloads'] = np.sum(downloads_totals or [0]) bsr['total number of downloads'] = np.sum(downloads_ref_totals or [0]) # Get the average number of downloads bs['average number of downloads'] = np.mean(downloads_totals or [0]) bsr['average number of downloads'] = np.mean(downloads_ref_totals or [0]) # Get the median number of downloads bs['median number of downloads'] = np.median(downloads_totals or [0]) bsr['median number of downloads'] = np.median(downloads_ref_totals or [0]) # Get the normalized number of downloads # bs['normalized number of downloads'] = \ # np.sum([np.array(p.downloads)/float(p.author_num) for p in data if # p.downloads and len(p.downloads) == Nentries]) # bsr['normalized number of downloads'] = \ # np.sum([np.array(p.downloads)/float(p.author_num) for p in data if # p.refereed and p.downloads and len(p.downloads) == Nentries]) # and finally, get the recent number of downloads bs['recent number of downloads'] = sum( [p.downloads[-1] for p in data if p.downloads and len(p.downloads) == Nentries]) bsr['recent number of downloads'] = sum( [p.downloads[-1] for p in data if p.refereed and p.downloads and len(p.downloads) == Nentries]) # Return both results and the data (which will get used later on # if the usage histograms are required) return bs, bsr, data