def reduceResults(self):
     """
     Reduce the results from the DAG to something useful
     """
     
     siteData = {}
     
     # Read in the results
     for site in self.get_sites():
         logging.info("Processing site %s" % site)
         inputdata = {}
         with open("postprocess.%s.json" % site) as f:
             inputdata = json.load(f)
         
         siteData[site] = inputdata
     
     outputmodule = ".".join(get_option("outputtype").split(".")[:-1])
     outputclass = get_option("outputtype").split(".")[-1]
     
     try:
         logging.debug("Trying to import module %s and class %s" % (outputmodule, outputclass))
         mod = __import__(outputmodule, fromlist=[outputclass])
         outputProcessor = getattr(mod, outputclass)
         outputProcessor = outputProcessor(siteData)
     
     except ImportError as e:
         logging.error("Failed to load module %s and class %s" % (outputmodule, outputclass))
         raise e
     
     outputProcessor.startProcessing()
 def __init__(self, configFiles):
     
     # First, read in the configuration
     set_config_file(configFiles)
     self.config_location = os.path.abspath(configFiles)
     
     loglevel = get_option("loglevel", default="warning", section="logging")
     logdirectory = get_option("logdirectory", default="log", section="logging")
     self._setLogging(loglevel, logdirectory)
     
     raw_testsize = humanfriendly.parse_size(get_option("testsize"))
     set_option("raw_testsize", raw_testsize)
 def startProcessing(self):
     """
     This function will create plots using python's `matplotlib <http://matplotlib.org/index.html>`_.  Currently, it will make:
     
     1. A `violin plot <https://en.wikipedia.org/wiki/Violin_plot>`_ of the distribution of download times for each site given in :ref:`sitesData <sitesData-label>`.
     
     A violin plot example:
     
     .. image:: images/matploblib-violinplot.png
         :width: 300pt
     
     """
     logging.debug("Starting processing with matplotlib...")
         
     # Make a violin plot
     downloadTimes = {}
     for site in self.sitesData:
         siteTimes = self.sitesData[site]
         
         downloadTimes[site] = []
         
         for time in siteTimes:
             downloadTimes[site].append(float(time['duration']))
         
         testsize = get_option("raw_testsize")
         downloadTimes[site] = (float(testsize*8) / (1024*1024)) / numpy.array(downloadTimes[site])
         
         
     plt.violinplot(downloadTimes.values())
     plt.xticks(range(1, len(downloadTimes.keys())+1), downloadTimes.keys())
     plt.ylabel("Mb per second")
     plt.xlabel("Site")
     plt.title("Violin Plot of StashCache Transfer Speeds per Site")
     plt.savefig("violinplot.png")
     plt.clf()
 def get_sites(self):
     # First, get the sites from the configuration
     sites = get_option("sites")
     logging.debug("Got sites:\"%s\" from config file" % sites)
     if sites is None or sites is "":
         logging.error("No sites defined, therefore no tests created.")
         return None
     
     split_sites = re.split("[,\s]+", sites)
     return split_sites
 def runTests(self):
     """
     Run the tests prescribed in the configuration
     """
     sites = self.get_sites()
     templates_dir = os.path.join(sys.prefix, "etc/stashcache-tester/templates")
     
     # Parse the size of the test in bytes
     raw_testsize = humanfriendly.parse_size(get_option("testsize"))
     
     md5sum = self.createTestFile(raw_testsize, get_option("stashdir"))
     
     
     # Create the site specific tests
     env = Environment(loader=FileSystemLoader(templates_dir))
     
     
     
     env.globals = {
         "config_location": self.config_location,
         "stash_test_location": os.path.abspath(sys.argv[0]),
         "pythonpath": ":".join(sys.path),
         "testurl": get_option("testurl"),
         "localpath": get_option("stashdir"),
         "testsize": raw_testsize,
         "humantestsize": humanfriendly.format_size(raw_testsize)
     }
     
     test_dirs = []
     testingdir = get_option("testingdir")
     for site in sites:
         tmp_site = Site(site)
         test_dir = tmp_site.createTest(testingdir, env)
         test_dirs.append(test_dir)
     
     
     # Create the DAG from the template
     
     dag_template = env.get_template("dag.tmpl")
     test_dag = os.path.join(testingdir, "submit.dag")
     with open(test_dag, 'w') as f:
         f.write(dag_template.render(sites=sites, md5sum=md5sum))
         
     
     reduce_template = env.get_template("test_reduce.tmpl")
     reduce_submit = os.path.join(testingdir, "reduce.submit")
     with open(reduce_submit, 'w') as f:
         f.write(reduce_template.render())
         
     shutil.copyfile(os.path.join(templates_dir, "site_post.py"), os.path.join(get_option("testingdir"), "site_post.py"))
     os.chmod(os.path.join(get_option("testingdir"), "site_post.py"), 0755)
     
     # Start the DAG
     (stdout, stderr) = RunExternal("cd %s; condor_submit_dag submit.dag" % testingdir)
     logging.debug("output from condor_submit_dag: %s" % stdout)
     if stderr is not None or stderr is not "":
         logging.error("Error from condor_submit_dag: %s" % stderr)
 def _summarize_data(self, sitesData):
     summarized = []
     
     # Average download time per site.
     for site in sitesData:
         cur = {}
         cur['name'] = site
         siteTimes = sitesData[site]
         total_runtime = 0
         failures = 0
         caches = {}
         for run in siteTimes:
             # Initialize the cache structure
             cache = run['cache']
             if cache not in caches:
                 caches[cache] = {}
                 caches[cache]['runs'] = 0
                 caches[cache]['totalRuntime'] = 0
                 caches[cache]['failures'] = 0
                 
             if run['success'] is True:
                 total_runtime += float(run['duration'])
                 caches[cache]['totalRuntime'] += float(run['duration'])
                 caches[cache]['runs'] += 1
             else:
                 caches[cache]['failures'] += 1
                 failures += 1
         
         
         
         testsize = get_option("raw_testsize")
         if total_runtime == 0:
             cur['average'] = 0
             for cache in caches.keys():
                 caches[cache]['average'] = 0
         else:
             cur['average'] = (float(testsize*8) / (1024*1024)) / (total_runtime / len(siteTimes))
             
             for cache in caches.keys():
                 caches[cache]['average'] = (float(testsize*8) / (1024*1024)) / (caches[cache]['totalRuntime'] / caches[cache]['runs'])
         
         cur['caches'] = caches
         cur['failures'] = failures
         
         summarized.append(cur)
         
     
     # Should we do violin plot?
     
     #summarized = sitesData 
     return summarized
 def _get_option(self, option, default = None):
     return get_option(option, section="github", default=default)