def reduceResults(self): """ Reduce the results from the DAG to something useful """ siteData = {} # Read in the results for site in self.get_sites(): logging.info("Processing site %s" % site) inputdata = {} with open("postprocess.%s.json" % site) as f: inputdata = json.load(f) siteData[site] = inputdata outputmodule = ".".join(get_option("outputtype").split(".")[:-1]) outputclass = get_option("outputtype").split(".")[-1] try: logging.debug("Trying to import module %s and class %s" % (outputmodule, outputclass)) mod = __import__(outputmodule, fromlist=[outputclass]) outputProcessor = getattr(mod, outputclass) outputProcessor = outputProcessor(siteData) except ImportError as e: logging.error("Failed to load module %s and class %s" % (outputmodule, outputclass)) raise e outputProcessor.startProcessing()
def __init__(self, configFiles): # First, read in the configuration set_config_file(configFiles) self.config_location = os.path.abspath(configFiles) loglevel = get_option("loglevel", default="warning", section="logging") logdirectory = get_option("logdirectory", default="log", section="logging") self._setLogging(loglevel, logdirectory) raw_testsize = humanfriendly.parse_size(get_option("testsize")) set_option("raw_testsize", raw_testsize)
def startProcessing(self): """ This function will create plots using python's `matplotlib <http://matplotlib.org/index.html>`_. Currently, it will make: 1. A `violin plot <https://en.wikipedia.org/wiki/Violin_plot>`_ of the distribution of download times for each site given in :ref:`sitesData <sitesData-label>`. A violin plot example: .. image:: images/matploblib-violinplot.png :width: 300pt """ logging.debug("Starting processing with matplotlib...") # Make a violin plot downloadTimes = {} for site in self.sitesData: siteTimes = self.sitesData[site] downloadTimes[site] = [] for time in siteTimes: downloadTimes[site].append(float(time['duration'])) testsize = get_option("raw_testsize") downloadTimes[site] = (float(testsize*8) / (1024*1024)) / numpy.array(downloadTimes[site]) plt.violinplot(downloadTimes.values()) plt.xticks(range(1, len(downloadTimes.keys())+1), downloadTimes.keys()) plt.ylabel("Mb per second") plt.xlabel("Site") plt.title("Violin Plot of StashCache Transfer Speeds per Site") plt.savefig("violinplot.png") plt.clf()
def get_sites(self): # First, get the sites from the configuration sites = get_option("sites") logging.debug("Got sites:\"%s\" from config file" % sites) if sites is None or sites is "": logging.error("No sites defined, therefore no tests created.") return None split_sites = re.split("[,\s]+", sites) return split_sites
def runTests(self): """ Run the tests prescribed in the configuration """ sites = self.get_sites() templates_dir = os.path.join(sys.prefix, "etc/stashcache-tester/templates") # Parse the size of the test in bytes raw_testsize = humanfriendly.parse_size(get_option("testsize")) md5sum = self.createTestFile(raw_testsize, get_option("stashdir")) # Create the site specific tests env = Environment(loader=FileSystemLoader(templates_dir)) env.globals = { "config_location": self.config_location, "stash_test_location": os.path.abspath(sys.argv[0]), "pythonpath": ":".join(sys.path), "testurl": get_option("testurl"), "localpath": get_option("stashdir"), "testsize": raw_testsize, "humantestsize": humanfriendly.format_size(raw_testsize) } test_dirs = [] testingdir = get_option("testingdir") for site in sites: tmp_site = Site(site) test_dir = tmp_site.createTest(testingdir, env) test_dirs.append(test_dir) # Create the DAG from the template dag_template = env.get_template("dag.tmpl") test_dag = os.path.join(testingdir, "submit.dag") with open(test_dag, 'w') as f: f.write(dag_template.render(sites=sites, md5sum=md5sum)) reduce_template = env.get_template("test_reduce.tmpl") reduce_submit = os.path.join(testingdir, "reduce.submit") with open(reduce_submit, 'w') as f: f.write(reduce_template.render()) shutil.copyfile(os.path.join(templates_dir, "site_post.py"), os.path.join(get_option("testingdir"), "site_post.py")) os.chmod(os.path.join(get_option("testingdir"), "site_post.py"), 0755) # Start the DAG (stdout, stderr) = RunExternal("cd %s; condor_submit_dag submit.dag" % testingdir) logging.debug("output from condor_submit_dag: %s" % stdout) if stderr is not None or stderr is not "": logging.error("Error from condor_submit_dag: %s" % stderr)
def _summarize_data(self, sitesData): summarized = [] # Average download time per site. for site in sitesData: cur = {} cur['name'] = site siteTimes = sitesData[site] total_runtime = 0 failures = 0 caches = {} for run in siteTimes: # Initialize the cache structure cache = run['cache'] if cache not in caches: caches[cache] = {} caches[cache]['runs'] = 0 caches[cache]['totalRuntime'] = 0 caches[cache]['failures'] = 0 if run['success'] is True: total_runtime += float(run['duration']) caches[cache]['totalRuntime'] += float(run['duration']) caches[cache]['runs'] += 1 else: caches[cache]['failures'] += 1 failures += 1 testsize = get_option("raw_testsize") if total_runtime == 0: cur['average'] = 0 for cache in caches.keys(): caches[cache]['average'] = 0 else: cur['average'] = (float(testsize*8) / (1024*1024)) / (total_runtime / len(siteTimes)) for cache in caches.keys(): caches[cache]['average'] = (float(testsize*8) / (1024*1024)) / (caches[cache]['totalRuntime'] / caches[cache]['runs']) cur['caches'] = caches cur['failures'] = failures summarized.append(cur) # Should we do violin plot? #summarized = sitesData return summarized
def _get_option(self, option, default = None): return get_option(option, section="github", default=default)