Ejemplo n.º 1
0
class DataAnalysis(object):
    """
    Data Analysis is collecting data and prints it to be used by visualization
    software to better understand access patterns
    """
    def __init__(self, config=dict()):
        self.logger = logging.getLogger(__name__)
        self.config = config
        self.datasets = DatasetManager(self.config)
        self.sites = SiteManager(self.config)
        self.storage = StorageManager(self.config)
        self.popularity = PopularityManager(self.config)

    def start(self):
        """
        Begin Data Analysis
        """
        t1 = datetime.datetime.utcnow()
        dataset_name = '/PAHighPt/HIRun2013-PromptReco-v1/RECO'
        self.initiate_data(dataset_name)
        self.export_data(dataset_name)
        t2 = datetime.datetime.utcnow()
        td = t2 - t1
        self.logger.info('Data Analysis took %s', str(td))

    def initiate_data(self, dataset_name):
        """
        Initiate data about dataset(s)
        """
        coll = 'dataset_data'
        query = {'name':dataset_name}
        data = {'$set':{'name':dataset_name}}
        self.storage.update_data(coll=coll, query=query, data=data, upsert=True)
        self.datasets.insert_phedex_data(dataset_name)
        self.datasets.insert_dbs_data(dataset_name)
        self.popularity.insert_dataset(dataset_name)

    def export_data(self, dataset_name):
        """
        Get data from DB and export to file for usage in visualization
        """
        # get data from DB
        coll = 'dataset_popularity'
        pipeline = list()
        match = {'$match':{'name':dataset_name}}
        pipeline.append(match)
        db_data = self.storage.get_data(coll=coll, pipeline=pipeline)
        headers = ('dataset_name', 'date', 'popularity')
        data = list()
        for data_entry in db_data:
            data.append(tuple(data_entry['name'], data_entry['date'], data_entry['n_accesses']*data_entry['n_cpus']*data_entry['n_users']))
        export_csv(headers=headers, data=data, file_name='single_dataset')
Ejemplo n.º 2
0
 def __init__(self, config=dict()):
     self.logger = logging.getLogger(__name__)
     self.config = config
     self.storage = StorageManager(self.config)
     self.sites = SiteManager(self.config)
     self.datasets = DatasetManager(self.config)
     self.popularity = PopularityManager(self.config)
Ejemplo n.º 3
0
class Initiate(object):
    """
    Initiate Database
    """
    def __init__(self, config=dict()):
        self.logger = logging.getLogger(__name__)
        self.config = config
        self.sites = SiteManager(self.config)
        self.datasets = DatasetManager(self.config)
        self.popularity = PopularityManager(self.config)

    def start(self):
        """
        Begin Initiating Database
        """
        t1 = datetime.datetime.utcnow()
        self.sites.initiate_db()
        self.datasets.initiate_db()
        self.popularity.initiate_db()
        t2 = datetime.datetime.utcnow()
        td = t2 - t1
        self.logger.info('Initiate took %s', str(td))
Ejemplo n.º 4
0
class UpdateDB(object):
    """
    Update DB with new dataset and site data
    """

    def __init__(self, config=dict()):
        self.logger = logging.getLogger(__name__)
        self.config = config
        self.storage = StorageManager(self.config)
        self.sites = SiteManager(self.config)
        self.datasets = DatasetManager(self.config)
        self.popularity = PopularityManager(self.config)

    def start(self):
        """
        Begin Database Update
        """
        t1 = datetime.datetime.utcnow()
        self.sites.update_db()
        self.datasets.update_db()
        self.popularity.update_db()
        t2 = datetime.datetime.utcnow()
        td = t2 - t1
        self.logger.info("Update DB took %s", str(td))