def generate_data(cls): """ get statistics for a list of values and associated weights: mean, median, normalized values """ cls.pre_process() # values = map(lambda a: a[0], cls.data) weights = map(lambda a: a[1], cls.data) refereed_values = map(lambda a: a[0], cls.refereed_data) refereed_weights = map(lambda a: a[1], cls.refereed_data) # get number of entries cls.number_of_entries = len(values) # get number of refereed entries cls.number_of_refereed_entries = len(refereed_values) # get normalized value cls.normalized_value = vector_product(values, weights) # get refereed normalized value cls.refereed_normalized_value = vector_product(refereed_values, refereed_weights) # get mean value of values cls.mean_value = mean(values) # get mean value of refereed values cls.refereed_mean_value = mean(refereed_values) # get median value of values cls.median_value = median(values) # get median value of refereed values cls.refereed_median_value = median(refereed_values) # get total of values cls.total_value = sum(values) # get total of refereed values cls.refereed_total_value = sum(refereed_values) # record results cls.post_process()
def generate_data(cls): """ get statistics for a list of values and associated weights: mean, median, normalized values """ cls.pre_process() # values = map(lambda a: a[0], cls.data) weights= map(lambda a: a[1], cls.data) refereed_values = map(lambda a: a[0], cls.refereed_data) refereed_weights= map(lambda a: a[1], cls.refereed_data) # get number of entries cls.number_of_entries = len(values) # get number of refereed entries cls.number_of_refereed_entries = len(refereed_values) # get normalized value cls.normalized_value = float('%.1f' % round(vector_product(values,weights), 1)) # get refereed normalized value cls.refereed_normalized_value = float('%.1f' % round(vector_product(refereed_values,refereed_weights), 1)) # get mean value of values cls.mean_value = float('%.1f' % round(mean(values), 1)) # get mean value of refereed values cls.refereed_mean_value = float('%.1f' % round(mean(refereed_values), 1)) # get median value of values cls.median_value = float('%.1f' % round(median(values), 1)) # get median value of refereed values cls.refereed_median_value = float('%.1f' % round(median(refereed_values), 1)) # get total of values cls.total_value = sum(values) # get total of refereed values cls.refereed_total_value = sum(refereed_values) # record results cls.post_process()
def generate_data(cls): cls.pre_process() # array with citations, descending order citations = cls.citations citations.sort() citations.reverse() # first calclate the Hirsch and g indices rank = 1 N = 0 h = 0 g = 0 for cite in citations: N += cite r2 = rank*rank if r2 <= N: g = rank h += min(1, cite/rank) rank += 1 # the e-index try: e = sqrt(sum(citations[:h]) - h*h) except: e = 'NA' # Get the number of self-citations try: number_of_self_citations = sum(map(lambda a: a['number_of_self_citations'], cls.metrics_data)) except: number_of_self_citations = 0 # get the Tori index rn_citations = map(lambda a: a['rn_citations'], cls.metrics_data) auth_nums = map(lambda a: 1.0/float(a['author_num']), cls.metrics_data) tori = vector_product(rn_citations,auth_nums) try: read10_reads = map(lambda a: a[7][-2], cls.reads10data) read10_auths = map(lambda a: 1.0/float(a[4]), cls.reads10data) read10 = vector_product(read10_reads, read10_auths) except: read10 = 0 try: riq = int(1000.0*sqrt(float(tori))/float(cls.time_span)) except: riq = "NA" cls.h_index = h cls.g_index = g cls.m_index = float('%.1f' % round(float(h)/float(cls.time_span), 2)) cls.i10_index = len(filter(lambda a: a >= 10, citations)) cls.i100_index= len(filter(lambda a: a >= 100, citations)) cls.e_index = float('%.1f' % round(e,1)) cls.tori = float('%.1f' % round(tori,1)) cls.riq = float('%.1f' % round(riq,1)) cls.read10 = int(round(read10)) cls.number_of_self_citations = number_of_self_citations cls.post_process()
def generate_data(cls): """ Get time series """ today = datetime.today() bibcodes = map(lambda a: a[0], cls.attributes) years = map(lambda a: int(a[:4]), bibcodes) minYear = min(years) maxYear = today.year cls.series = {} cls.pre_process() for year in range(minYear, maxYear+1): if year < 1996: read10 = 0 else: threshold = year - 10 year_index = year - 1996 if year == maxYear: year_index -= 1 reads10data = filter(lambda a: len(a[7]) > 0 and int(a[0][:4]) > threshold, cls.attributes) try: read10_reads = map(lambda a: a[7][year_index], reads10data) read10_auths = map(lambda a: 1.0/float(a[4]), reads10data) read10 = vector_product(read10_reads, read10_auths) except: read10 = 0 tori = sum([value for d in cls.metrics_data for (yr,value) in d['rn_citations_hist'].items() if int(yr) <= year]) new_list = get_subset(cls.attributes,year) new_list = sort_list_of_lists(new_list,2) citations = map(lambda a: a[2], new_list) # first calclate the Hirsch and g indices rank = 1 N = 0 h = 0 g = 0 for cite in citations: N += cite r2 = rank*rank if r2 <= N: g = rank h += min(1, cite/rank) rank += 1 TimeSpan = year - minYear + 1 i10 = len(filter(lambda a: a >= 10, citations)) i100= len(filter(lambda a: a >= 100, citations)) m = float(h)/float(TimeSpan) roq = int(1000.0*math.sqrt(float(tori))/float(TimeSpan)) indices = "%s:%s:%s:%s:%s:%s:%s:%s" % (h,g,i10,tori,m,roq,i100,int(round(read10)*0.1)) cls.series[str(year)] = indices cls.post_process()