def _report_msc_counts_(path, selected_fields):  
    """Prints report on MSC distribution among records that have fields from selected_fields."""  
    msc_counts = count_msc_occurences(open(path), lambda record: has_record_fields(record, selected_fields))
    msc_counts = dict( (n.upper(), c) for n,c in msc_counts.iteritems() ) #To upper case
    
    msc_counts_lp = filter_counts(msc_counts, msc_processing.MSC_LEAF_PATTERN)
    msc_counts_olp = filter_counts(msc_counts, msc_processing.MSC_ORDINARY_LEAF_PATTERN)
    msc_counts_slp = filter_counts(msc_counts, msc_processing.MSC_SPECIAL_LEAF_PATTERN)
    
    msc_counts_sl = filter_counts(msc_counts, msc_processing.MSC_SECOND_LEVEL)
    msc_counts_osl = filter_counts(msc_counts, msc_processing.MSC_ORDINARY_SECOND_LEVEL)
    msc_counts_ssl = filter_counts(msc_counts, msc_processing.MSC_SPECIAL_SECOND_LEVEL)
    
    #############################################
    
    #draw_occur_hist(msc_counts, zoom_out=100, title='Histogram liczby wystapien kategorii w rekordach', xlabel='Ranga kategorii', ylabel='Liczba wystapien w rekordach')
    
    n, bins, patches = plt.hist(msc_counts.values(), len(msc_counts)/100, log=True, normed=False, alpha=0.75)
    plt.xlabel("Ranga kategorii")
    plt.ylabel("Liczba wystapien w rekordach")    
    plt.show()
    
    _report_counts_(msc_counts, "--All categories:--")
    _report_counts_(msc_counts_lp, "--MSC_LEAF_PATTERN:--")
    _report_counts_(msc_counts_olp, "--MSC_ORDINARY_LEAF_PATTERN:--")
    _report_counts_(msc_counts_slp, "--MSC_SPECIAL_LEAF_PATTERN:--")
    _report_counts_(msc_counts_sl, "--MSC_SECOND_LEVEL:--")
    _report_counts_(msc_counts_osl, "--MSC_ORDINARY_SECOND_LEVEL:--")
    _report_counts_(msc_counts_ssl, "--MSC_SPECIAL_SECOND_LEVEL:--")
예제 #2
0
def _report_msc_counts_(path, selected_fields):
    """Prints report on MSC distribution among records that have fields from selected_fields."""
    msc_counts = count_msc_occurences(
        open(path), lambda record: has_record_fields(record, selected_fields))
    msc_counts = dict(
        (n.upper(), c) for n, c in msc_counts.iteritems())  #To upper case

    msc_counts_lp = filter_counts(msc_counts, msc_processing.MSC_LEAF_PATTERN)
    msc_counts_olp = filter_counts(msc_counts,
                                   msc_processing.MSC_ORDINARY_LEAF_PATTERN)
    msc_counts_slp = filter_counts(msc_counts,
                                   msc_processing.MSC_SPECIAL_LEAF_PATTERN)

    msc_counts_sl = filter_counts(msc_counts, msc_processing.MSC_SECOND_LEVEL)
    msc_counts_osl = filter_counts(msc_counts,
                                   msc_processing.MSC_ORDINARY_SECOND_LEVEL)
    msc_counts_ssl = filter_counts(msc_counts,
                                   msc_processing.MSC_SPECIAL_SECOND_LEVEL)

    #############################################

    #draw_occur_hist(msc_counts, zoom_out=100, title='Histogram liczby wystapien kategorii w rekordach', xlabel='Ranga kategorii', ylabel='Liczba wystapien w rekordach')

    n, bins, patches = plt.hist(msc_counts.values(),
                                len(msc_counts) / 100,
                                log=True,
                                normed=False,
                                alpha=0.75)
    plt.xlabel("Ranga kategorii")
    plt.ylabel("Liczba wystapien w rekordach")
    plt.show()

    _report_counts_(msc_counts, "--All categories:--")
    _report_counts_(msc_counts_lp, "--MSC_LEAF_PATTERN:--")
    _report_counts_(msc_counts_olp, "--MSC_ORDINARY_LEAF_PATTERN:--")
    _report_counts_(msc_counts_slp, "--MSC_SPECIAL_LEAF_PATTERN:--")
    _report_counts_(msc_counts_sl, "--MSC_SECOND_LEVEL:--")
    _report_counts_(msc_counts_osl, "--MSC_ORDINARY_SECOND_LEVEL:--")
    _report_counts_(msc_counts_ssl, "--MSC_SPECIAL_SECOND_LEVEL:--")
예제 #3
0
        avg, std_dev = calc_avg_dev(val['degrees'])
        print "List of degrees: "+str(val['degrees'])
        print "Average degree: "+str(avg)
        print "Standard deviation of a degree: "+str(std_dev)
        
        avg, std_dev = calc_avg_dev(val['counts'])
        print "List of counts: "+str(val['counts'])
        print "Average count: "+str(avg)
        print "Standard deviation of a count: "+str(std_dev)
        

#def print_counts(counts_lowest, counts_higher, curr_labels, start_printable):
#    '''
#    Print the dictionaries in a nested way.
#    '''
#    if counts_higher:
#        for curr_l in curr_labels:
#            print start_printable+"[print_counts] key: "+curr_l+": "+str(counts_higher[0][curr_l]['count']+", children: "+str(len(counts_higher[0][curr_l]['elements'])))
#            print_counts(counts_lowest, counts_higher[1:], counts_higher[0][curr_l]['elements'], start_printable+'\t')
#    else:
#        for curr_l in curr_labels:
#            print start_printable+"[print_counts] key: "+curr_l+": "+str(counts_lowest[curr_l])

if __name__ == '__main__':
    fname = sys.argv[1]
    print "fname:", fname
    counts_lowest, counts_higher = count_label_statistics(count_msc_occurences(open(fname, 'r')), label_mappings = [lambda x: x[:3], lambda x: x[:2]])
    #print counts_lowest, counts_higher
    data4avg_stats = {}
    print_counts(counts_lowest, counts_higher, list(counts_higher[0].iterkeys()), '', data4avg_stats)
    print_avg_stats(data4avg_stats)