class DataAnalysis(object): """ Data Analysis is collecting data and prints it to be used by visualization software to better understand access patterns """ def __init__(self, config=dict()): self.logger = logging.getLogger(__name__) self.config = config self.datasets = DatasetManager(self.config) self.sites = SiteManager(self.config) self.storage = StorageManager(self.config) self.popularity = PopularityManager(self.config) def start(self): """ Begin Data Analysis """ t1 = datetime.datetime.utcnow() dataset_name = '/PAHighPt/HIRun2013-PromptReco-v1/RECO' self.initiate_data(dataset_name) self.export_data(dataset_name) t2 = datetime.datetime.utcnow() td = t2 - t1 self.logger.info('Data Analysis took %s', str(td)) def initiate_data(self, dataset_name): """ Initiate data about dataset(s) """ coll = 'dataset_data' query = {'name':dataset_name} data = {'$set':{'name':dataset_name}} self.storage.update_data(coll=coll, query=query, data=data, upsert=True) self.datasets.insert_phedex_data(dataset_name) self.datasets.insert_dbs_data(dataset_name) self.popularity.insert_dataset(dataset_name) def export_data(self, dataset_name): """ Get data from DB and export to file for usage in visualization """ # get data from DB coll = 'dataset_popularity' pipeline = list() match = {'$match':{'name':dataset_name}} pipeline.append(match) db_data = self.storage.get_data(coll=coll, pipeline=pipeline) headers = ('dataset_name', 'date', 'popularity') data = list() for data_entry in db_data: data.append(tuple(data_entry['name'], data_entry['date'], data_entry['n_accesses']*data_entry['n_cpus']*data_entry['n_users'])) export_csv(headers=headers, data=data, file_name='single_dataset')
def __init__(self, config=dict()): self.logger = logging.getLogger(__name__) self.config = config self.storage = StorageManager(self.config) self.sites = SiteManager(self.config) self.datasets = DatasetManager(self.config) self.popularity = PopularityManager(self.config)
class Initiate(object): """ Initiate Database """ def __init__(self, config=dict()): self.logger = logging.getLogger(__name__) self.config = config self.sites = SiteManager(self.config) self.datasets = DatasetManager(self.config) self.popularity = PopularityManager(self.config) def start(self): """ Begin Initiating Database """ t1 = datetime.datetime.utcnow() self.sites.initiate_db() self.datasets.initiate_db() self.popularity.initiate_db() t2 = datetime.datetime.utcnow() td = t2 - t1 self.logger.info('Initiate took %s', str(td))
class UpdateDB(object): """ Update DB with new dataset and site data """ def __init__(self, config=dict()): self.logger = logging.getLogger(__name__) self.config = config self.storage = StorageManager(self.config) self.sites = SiteManager(self.config) self.datasets = DatasetManager(self.config) self.popularity = PopularityManager(self.config) def start(self): """ Begin Database Update """ t1 = datetime.datetime.utcnow() self.sites.update_db() self.datasets.update_db() self.popularity.update_db() t2 = datetime.datetime.utcnow() td = t2 - t1 self.logger.info("Update DB took %s", str(td))