Esempio n. 1
0
 def get_stats(self):
     import pprint
     printer = pprint.PrettyPrinter(indent=4)
     db = DatabasePlainFiles('stats/')
     stats = db.loadDbase('stats17028')
     
     #tag cloud
     """
     tag_cloud = []
     for tag in stats['tags']:
         if stats['tags'][tag] > 15: #5 is okay
             for i in range(int(stats['tags'][tag] / 15)):
                 tag_cloud.append(tag)
         
     import json
     db.saveDbaseRaw('tag_cloud', json.dumps(tag_cloud))
     """
     
     #tags overall
     tag_usage = 0
     tag_count = 0
     for tag in stats['tags']:
         tag_usage = tag_usage + stats['tags'][tag]
         tag_count = tag_count + 1
     
     print tag_usage
     print tag_count
     
     
     #format statistics
     """
Esempio n. 2
0
 def read_data_folder(self):
     import pickle
     #get data folder list
     data_folder = 'data/'
     file_list = os.listdir(data_folder)
     
     stats = {
         'maintainer': {},
         'isopen': {},
         'author': {},
         'version': {},
         'license_id': {},
         'type': {},
         'mimetype': {},
         'format': {},
         'resource_type': {},
         'tags': {},
         'groups': {},
         'license': {},
         'license_title': {},
         'geographic_coverage': {},
         'geographical_granularity': {},
         'temporal_coverage-from': {},
         'temporal_coverage-to': {},
         'temporal_granularity': {},
         'national_statistic': {},
         'precision': {},
         'series': {},
         'date_released': {},
         'categories': {}            
     }
     
     import pprint
     printer = pprint.PrettyPrinter(indent=4)
     db = DatabasePlainFiles('stats/')    
     
     stats = db.loadDbase('stats14061')
             
     for num, file in enumerate(file_list):
         print num
         if(num < 14061 or file == "package_list"):
             continue
         f = open(data_folder + file)
         object = pickle.load(f)
         f.close()
         self.add_to_stats(object['maintainer'], 'maintainer', stats)
         self.add_to_stats(object['isopen'], 'isopen', stats)
         self.add_to_stats(object['author'], 'author', stats)
         self.add_to_stats(object['version'], 'version', stats)
         self.add_to_stats(object['type'], 'type', stats)
         for resource in object['resources']:
             self.add_to_stats(resource['mimetype'], 'mimetype', stats)
             self.add_to_stats(resource['format'], 'format', stats)
             self.add_to_stats(resource['resource_type'], 'resource_type', stats)
         
         for tag in object['tags']:
             self.add_to_stats(tag, 'tags', stats)
         
         for group in object['groups']:
             self.add_to_stats(group, 'groups', stats)
             
         self.add_to_stats(object['license'], 'license', stats)
         self.add_to_stats(object['license_title'], 'license_title', stats)
         
         try:
             self.add_to_stats(object['extras']['geographic_coverage'], 'geographic_coverage', stats)
             self.add_to_stats(object['extras']['geographical_granularity'], 'geographical_granularity', stats)
             self.add_to_stats(object['extras']['temporal_coverage-from'], 'temporal_coverage-from', stats)
             self.add_to_stats(object['extras']['temporal_coverage-to'], 'temporal_coverage-to', stats)
             self.add_to_stats(object['extras']['temporal_granularity'], 'temporal_granularity', stats)
             self.add_to_stats(object['extras']['series'], 'series', stats)
             self.add_to_stats(object['extras']['precision'], 'precision', stats)
             self.add_to_stats(object['extras']['national_statistic'], 'national_statistic', stats)
             self.add_to_stats(object['extras']['date_released'], 'date_released', stats)
             self.add_to_stats(object['extras']['categories'], 'categories', stats)
         except BaseException as e:
             pass
             #print str(e)
         
         db.saveDbase('stats' + str(num), stats)    
         
     #output stats to file
     print 'script executed!'