def compute_capacity(stream: VCFStream, population: Population, repeats: int, ratio: float, predictor_factory: PredictorFactory): individuals = stream.individuals split_index = int(len(individuals) * ratio) labels = population.get_labels(individuals) # Split once, since the call to `split` takes a lot of time to do for every # polymorphism and repeat. We call for the number of repeats and apply the # corresponding one, copying the same approach for all polymorphisms. if repeats > 1: train_target_indices_list = [ split(range(len(individuals)), split_index) for _ in range(repeats) ] train_target_indices = [(set(i), set(j)) for i, j in train_target_indices_list] else: train_target_indices = [(set(range(len(individuals))), set(range(len(individuals))))] for polymorphism in stream: accuracies: List[float] = [] for repeat_idx in range(repeats): genotypes = (process_variant(i) for i in polymorphism.data_fields) to_split = zip(individuals, genotypes, labels) train = [] target = [] for i, val in enumerate(to_split): if i in train_target_indices[repeat_idx][0]: train.append(val) if i in train_target_indices[repeat_idx][1]: target.append(val) _train_individuals, train_genotypes, train_labels = zip(*train) predictor = predictor_factory.build(train_genotypes, train_labels) count = 0.0 for _target_individual, genotype, label in target: predictions = predictor.predict(genotype) if label in predictions: count += predictions[label] accuracies.append(count / len(target)) m = mean(accuracies) if repeats == 1: print(f'{polymorphism.get_identifier()}\t{m:.5f}') else: s = stdev(accuracies) print(f'{polymorphism.get_identifier()}\t{m:.5f}\t{s:.5f}')
def main(): df2 = pd.DataFrame() if not (len(sys.argv) == 1 + 1): print('\033[91m' + '✘ Error: ' + '\033[0m' + 'CSV file is missing, please add his path as argument') sys.exit() df = utils.dataframe(sys.argv[1]) columnsNamesArr = df.columns.values listOfColumnNames = list(columnsNamesArr) dico_numerals = {} for label in listOfColumnNames: if df[label].dtypes == str or df[label].dtypes == object: continue dico_numerals[label] = { 'count': 0, 'mean': 0, 'std': 0, 'min': float('inf'), '25%': 0, '50%': 0, '75%': 0, 'max': float('-inf'), 'total': 0 } for label in listOfColumnNames: if df[label].dtypes == str or df[label].dtypes == object: continue for index, row in df.iterrows(): if row[label] > dico_numerals[label]['max']: dico_numerals[label]['max'] = row[label] if row[label] < dico_numerals[label]['min']: dico_numerals[label]['min'] = row[label] if np.isnan(row[label]) != True: dico_numerals[label]['count'] += 1 dico_numerals[label]['total'] += row[label] if (dico_numerals[label]['count'] > 0): dico_numerals[label]['mean'] = dico_numerals[label][ 'total'] / dico_numerals[label]['count'] dico_numerals[label]['25%'] = utils.calc_quantile( df[label].dropna(), 0.25) dico_numerals[label]['50%'] = utils.calc_quantile( df[label].dropna(), 0.5) dico_numerals[label]['75%'] = utils.calc_quantile( df[label].dropna(), 0.75) dico_numerals[label]['std'] = utils.stdev(df[label].dropna()) describe(dico_numerals)
def table_soak_vs_ndr(table, input_data): """Generate the table(s) with algorithm: table_soak_vs_ndr specified in the specification file. :param table: Table to generate. :param input_data: Data to process. :type table: pandas.Series :type input_data: InputData """ logging.info(" Generating the table {0} ...".format(table.get( "title", ""))) # Transform the data logging.info(" Creating the data set for the {0} '{1}'.".format( table.get("type", ""), table.get("title", ""))) data = input_data.filter_data(table, continue_on_error=True) # Prepare the header of the table try: header = [ "Test case", "{0} Throughput [Mpps]".format(table["reference"]["title"]), "{0} Stdev [Mpps]".format(table["reference"]["title"]), "{0} Throughput [Mpps]".format(table["compare"]["title"]), "{0} Stdev [Mpps]".format(table["compare"]["title"]), "Delta [%]", "Stdev of delta [%]" ] header_str = ",".join(header) + "\n" except (AttributeError, KeyError) as err: logging.error( "The model is invalid, missing parameter: {0}".format(err)) return # Create a list of available SOAK test results: tbl_dict = dict() for job, builds in table["compare"]["data"].items(): for build in builds: for tst_name, tst_data in data[job][str(build)].iteritems(): if tst_data["type"] == "SOAK": tst_name_mod = tst_name.replace("-soak", "") if tbl_dict.get(tst_name_mod, None) is None: groups = re.search(REGEX_NIC, tst_data["parent"]) nic = groups.group(0) if groups else "" name = "{0}-{1}".format( nic, "-".join(tst_data["name"].split("-")[:-1])) tbl_dict[tst_name_mod] = { "name": name, "ref-data": list(), "cmp-data": list() } try: tbl_dict[tst_name_mod]["cmp-data"].append( tst_data["throughput"]["LOWER"]) except (KeyError, TypeError): pass tests_lst = tbl_dict.keys() # Add corresponding NDR test results: for job, builds in table["reference"]["data"].items(): for build in builds: for tst_name, tst_data in data[job][str(build)].iteritems(): tst_name_mod = tst_name.replace("-ndrpdr", "").\ replace("-mrr", "") if tst_name_mod in tests_lst: try: if tst_data["type"] in ("NDRPDR", "MRR", "BMRR"): if table["include-tests"] == "MRR": result = tst_data["result"]["receive-rate"].avg elif table["include-tests"] == "PDR": result = tst_data["throughput"]["PDR"]["LOWER"] elif table["include-tests"] == "NDR": result = tst_data["throughput"]["NDR"]["LOWER"] else: result = None if result is not None: tbl_dict[tst_name_mod]["ref-data"].append( result) except (KeyError, TypeError): continue tbl_lst = list() for tst_name in tbl_dict.keys(): item = [ tbl_dict[tst_name]["name"], ] data_r = tbl_dict[tst_name]["ref-data"] if data_r: data_r_mean = mean(data_r) item.append(round(data_r_mean / 1000000, 2)) data_r_stdev = stdev(data_r) item.append(round(data_r_stdev / 1000000, 2)) else: data_r_mean = None data_r_stdev = None item.extend([None, None]) data_c = tbl_dict[tst_name]["cmp-data"] if data_c: data_c_mean = mean(data_c) item.append(round(data_c_mean / 1000000, 2)) data_c_stdev = stdev(data_c) item.append(round(data_c_stdev / 1000000, 2)) else: data_c_mean = None data_c_stdev = None item.extend([None, None]) if data_r_mean and data_c_mean: delta, d_stdev = relative_change_stdev(data_r_mean, data_c_mean, data_r_stdev, data_c_stdev) item.append(round(delta, 2)) item.append(round(d_stdev, 2)) tbl_lst.append(item) # Sort the table according to the relative change tbl_lst.sort(key=lambda rel: rel[-1], reverse=True) # Generate csv tables: csv_file = "{0}.csv".format(table["output-file"]) with open(csv_file, "w") as file_handler: file_handler.write(header_str) for test in tbl_lst: file_handler.write(",".join([str(item) for item in test]) + "\n") convert_csv_to_pretty_txt(csv_file, "{0}.txt".format(table["output-file"]))
def table_nics_comparison(table, input_data): """Generate the table(s) with algorithm: table_nics_comparison specified in the specification file. :param table: Table to generate. :param input_data: Data to process. :type table: pandas.Series :type input_data: InputData """ logging.info(" Generating the table {0} ...".format(table.get( "title", ""))) # Transform the data logging.info(" Creating the data set for the {0} '{1}'.".format( table.get("type", ""), table.get("title", ""))) data = input_data.filter_data(table, continue_on_error=True) # Prepare the header of the tables try: header = [ "Test case", ] if table["include-tests"] == "MRR": hdr_param = "Receive Rate" else: hdr_param = "Throughput" header.extend([ "{0} {1} [Mpps]".format(table["reference"]["title"], hdr_param), "{0} Stdev [Mpps]".format(table["reference"]["title"]), "{0} {1} [Mpps]".format(table["compare"]["title"], hdr_param), "{0} Stdev [Mpps]".format(table["compare"]["title"]), "Delta [%]" ]) header_str = ",".join(header) + "\n" except (AttributeError, KeyError) as err: logging.error( "The model is invalid, missing parameter: {0}".format(err)) return # Prepare data to the table: tbl_dict = dict() for job, builds in table["data"].items(): for build in builds: for tst_name, tst_data in data[job][str(build)].iteritems(): tst_name_mod = tst_name.replace("-ndrpdrdisc", "").\ replace("-ndrpdr", "").replace("-pdrdisc", "").\ replace("-ndrdisc", "").replace("-pdr", "").\ replace("-ndr", "").\ replace("1t1c", "1c").replace("2t1c", "1c").\ replace("2t2c", "2c").replace("4t2c", "2c").\ replace("4t4c", "4c").replace("8t4c", "4c") tst_name_mod = re.sub(REGEX_NIC, "", tst_name_mod) if tbl_dict.get(tst_name_mod, None) is None: name = "-".join(tst_data["name"].split("-")[:-1]) tbl_dict[tst_name_mod] = { "name": name, "ref-data": list(), "cmp-data": list() } try: if table["include-tests"] == "MRR": result = tst_data["result"]["receive-rate"].avg elif table["include-tests"] == "PDR": result = tst_data["throughput"]["PDR"]["LOWER"] elif table["include-tests"] == "NDR": result = tst_data["throughput"]["NDR"]["LOWER"] else: result = None if result: if table["reference"]["nic"] in tst_data["tags"]: tbl_dict[tst_name_mod]["ref-data"].append(result) elif table["compare"]["nic"] in tst_data["tags"]: tbl_dict[tst_name_mod]["cmp-data"].append(result) except (TypeError, KeyError) as err: logging.debug("No data for {0}".format(tst_name)) logging.debug(repr(err)) # No data in output.xml for this test tbl_lst = list() for tst_name in tbl_dict.keys(): item = [ tbl_dict[tst_name]["name"], ] data_t = tbl_dict[tst_name]["ref-data"] if data_t: item.append(round(mean(data_t) / 1000000, 2)) item.append(round(stdev(data_t) / 1000000, 2)) else: item.extend([None, None]) data_t = tbl_dict[tst_name]["cmp-data"] if data_t: item.append(round(mean(data_t) / 1000000, 2)) item.append(round(stdev(data_t) / 1000000, 2)) else: item.extend([None, None]) if item[-4] is not None and item[-2] is not None and item[-4] != 0: item.append(int(relative_change(float(item[-4]), float(item[-2])))) if len(item) == len(header): tbl_lst.append(item) # Sort the table according to the relative change tbl_lst.sort(key=lambda rel: rel[-1], reverse=True) # Generate csv tables: csv_file = "{0}.csv".format(table["output-file"]) with open(csv_file, "w") as file_handler: file_handler.write(header_str) for test in tbl_lst: file_handler.write(",".join([str(item) for item in test]) + "\n") convert_csv_to_pretty_txt(csv_file, "{0}.txt".format(table["output-file"]))
def table_performance_comparison(table, input_data): """Generate the table(s) with algorithm: table_performance_comparison specified in the specification file. :param table: Table to generate. :param input_data: Data to process. :type table: pandas.Series :type input_data: InputData """ logging.info(" Generating the table {0} ...".format(table.get( "title", ""))) # Transform the data logging.info(" Creating the data set for the {0} '{1}'.".format( table.get("type", ""), table.get("title", ""))) data = input_data.filter_data(table, continue_on_error=True) # Prepare the header of the tables try: header = [ "Test case", ] if table["include-tests"] == "MRR": hdr_param = "Receive Rate" else: hdr_param = "Throughput" history = table.get("history", None) if history: for item in history: header.extend([ "{0} {1} [Mpps]".format(item["title"], hdr_param), "{0} Stdev [Mpps]".format(item["title"]) ]) header.extend([ "{0} {1} [Mpps]".format(table["reference"]["title"], hdr_param), "{0} Stdev [Mpps]".format(table["reference"]["title"]), "{0} {1} [Mpps]".format(table["compare"]["title"], hdr_param), "{0} Stdev [Mpps]".format(table["compare"]["title"]), "Delta [%]" ]) header_str = ",".join(header) + "\n" except (AttributeError, KeyError) as err: logging.error( "The model is invalid, missing parameter: {0}".format(err)) return # Prepare data to the table: tbl_dict = dict() for job, builds in table["reference"]["data"].items(): for build in builds: for tst_name, tst_data in data[job][str(build)].iteritems(): tst_name_mod = tst_name.replace("-ndrpdrdisc", "").\ replace("-ndrpdr", "").replace("-pdrdisc", "").\ replace("-ndrdisc", "").replace("-pdr", "").\ replace("-ndr", "").\ replace("1t1c", "1c").replace("2t1c", "1c").\ replace("2t2c", "2c").replace("4t2c", "2c").\ replace("4t4c", "4c").replace("8t4c", "4c") if "across topologies" in table["title"].lower(): tst_name_mod = tst_name_mod.replace("2n1l-", "") if tbl_dict.get(tst_name_mod, None) is None: groups = re.search(REGEX_NIC, tst_data["parent"]) nic = groups.group(0) if groups else "" name = "{0}-{1}".format( nic, "-".join(tst_data["name"].split("-")[:-1])) if "across testbeds" in table["title"].lower() or \ "across topologies" in table["title"].lower(): name = name.\ replace("1t1c", "1c").replace("2t1c", "1c").\ replace("2t2c", "2c").replace("4t2c", "2c").\ replace("4t4c", "4c").replace("8t4c", "4c") tbl_dict[tst_name_mod] = { "name": name, "ref-data": list(), "cmp-data": list() } try: # TODO: Re-work when NDRPDRDISC tests are not used if table["include-tests"] == "MRR": tbl_dict[tst_name_mod]["ref-data"]. \ append(tst_data["result"]["receive-rate"].avg) elif table["include-tests"] == "PDR": if tst_data["type"] == "PDR": tbl_dict[tst_name_mod]["ref-data"]. \ append(tst_data["throughput"]["value"]) elif tst_data["type"] == "NDRPDR": tbl_dict[tst_name_mod]["ref-data"].append( tst_data["throughput"]["PDR"]["LOWER"]) elif table["include-tests"] == "NDR": if tst_data["type"] == "NDR": tbl_dict[tst_name_mod]["ref-data"]. \ append(tst_data["throughput"]["value"]) elif tst_data["type"] == "NDRPDR": tbl_dict[tst_name_mod]["ref-data"].append( tst_data["throughput"]["NDR"]["LOWER"]) else: continue except TypeError: pass # No data in output.xml for this test for job, builds in table["compare"]["data"].items(): for build in builds: for tst_name, tst_data in data[job][str(build)].iteritems(): tst_name_mod = tst_name.replace("-ndrpdrdisc", ""). \ replace("-ndrpdr", "").replace("-pdrdisc", ""). \ replace("-ndrdisc", "").replace("-pdr", ""). \ replace("-ndr", "").\ replace("1t1c", "1c").replace("2t1c", "1c").\ replace("2t2c", "2c").replace("4t2c", "2c").\ replace("4t4c", "4c").replace("8t4c", "4c") if "across topologies" in table["title"].lower(): tst_name_mod = tst_name_mod.replace("2n1l-", "") try: # TODO: Re-work when NDRPDRDISC tests are not used if table["include-tests"] == "MRR": tbl_dict[tst_name_mod]["cmp-data"]. \ append(tst_data["result"]["receive-rate"].avg) elif table["include-tests"] == "PDR": if tst_data["type"] == "PDR": tbl_dict[tst_name_mod]["cmp-data"]. \ append(tst_data["throughput"]["value"]) elif tst_data["type"] == "NDRPDR": tbl_dict[tst_name_mod]["cmp-data"].append( tst_data["throughput"]["PDR"]["LOWER"]) elif table["include-tests"] == "NDR": if tst_data["type"] == "NDR": tbl_dict[tst_name_mod]["cmp-data"]. \ append(tst_data["throughput"]["value"]) elif tst_data["type"] == "NDRPDR": tbl_dict[tst_name_mod]["cmp-data"].append( tst_data["throughput"]["NDR"]["LOWER"]) else: continue except KeyError: pass except TypeError: tbl_dict.pop(tst_name_mod, None) if history: for item in history: for job, builds in item["data"].items(): for build in builds: for tst_name, tst_data in data[job][str( build)].iteritems(): tst_name_mod = tst_name.replace("-ndrpdrdisc", ""). \ replace("-ndrpdr", "").replace("-pdrdisc", ""). \ replace("-ndrdisc", "").replace("-pdr", ""). \ replace("-ndr", "").\ replace("1t1c", "1c").replace("2t1c", "1c").\ replace("2t2c", "2c").replace("4t2c", "2c").\ replace("4t4c", "4c").replace("8t4c", "4c") if "across topologies" in table["title"].lower(): tst_name_mod = tst_name_mod.replace("2n1l-", "") if tbl_dict.get(tst_name_mod, None) is None: continue if tbl_dict[tst_name_mod].get("history", None) is None: tbl_dict[tst_name_mod]["history"] = OrderedDict() if tbl_dict[tst_name_mod]["history"].get( item["title"], None) is None: tbl_dict[tst_name_mod]["history"][item["title"]] = \ list() try: # TODO: Re-work when NDRPDRDISC tests are not used if table["include-tests"] == "MRR": tbl_dict[tst_name_mod]["history"][ item["title"]].append( tst_data["result"]["receive-rate"].avg) elif table["include-tests"] == "PDR": if tst_data["type"] == "PDR": tbl_dict[tst_name_mod]["history"][ item["title"]].\ append(tst_data["throughput"]["value"]) elif tst_data["type"] == "NDRPDR": tbl_dict[tst_name_mod]["history"][ item["title"]].append( tst_data["throughput"]["PDR"] ["LOWER"]) elif table["include-tests"] == "NDR": if tst_data["type"] == "NDR": tbl_dict[tst_name_mod]["history"][ item["title"]].\ append(tst_data["throughput"]["value"]) elif tst_data["type"] == "NDRPDR": tbl_dict[tst_name_mod]["history"][ item["title"]].append( tst_data["throughput"]["NDR"] ["LOWER"]) else: continue except (TypeError, KeyError): pass tbl_lst = list() for tst_name in tbl_dict.keys(): item = [ tbl_dict[tst_name]["name"], ] if history: if tbl_dict[tst_name].get("history", None) is not None: for hist_data in tbl_dict[tst_name]["history"].values(): if hist_data: item.append(round(mean(hist_data) / 1000000, 2)) item.append(round(stdev(hist_data) / 1000000, 2)) else: item.extend([None, None]) else: item.extend([None, None]) data_t = tbl_dict[tst_name]["ref-data"] if data_t: item.append(round(mean(data_t) / 1000000, 2)) item.append(round(stdev(data_t) / 1000000, 2)) else: item.extend([None, None]) data_t = tbl_dict[tst_name]["cmp-data"] if data_t: item.append(round(mean(data_t) / 1000000, 2)) item.append(round(stdev(data_t) / 1000000, 2)) else: item.extend([None, None]) if item[-4] is not None and item[-2] is not None and item[-4] != 0: item.append(int(relative_change(float(item[-4]), float(item[-2])))) if len(item) == len(header): tbl_lst.append(item) # Sort the table according to the relative change tbl_lst.sort(key=lambda rel: rel[-1], reverse=True) # Generate csv tables: csv_file = "{0}.csv".format(table["output-file"]) with open(csv_file, "w") as file_handler: file_handler.write(header_str) for test in tbl_lst: file_handler.write(",".join([str(item) for item in test]) + "\n") convert_csv_to_pretty_txt(csv_file, "{0}.txt".format(table["output-file"]))
def table_performance_comparison_mrr(table, input_data): """Generate the table(s) with algorithm: table_performance_comparison_mrr specified in the specification file. :param table: Table to generate. :param input_data: Data to process. :type table: pandas.Series :type input_data: InputData """ logging.info(" Generating the table {0} ...".format(table.get( "title", ""))) # Transform the data logging.info(" Creating the data set for the {0} '{1}'.".format( table.get("type", ""), table.get("title", ""))) data = input_data.filter_data(table, continue_on_error=True) # Prepare the header of the tables try: header = [ "Test case", "{0} Throughput [Mpps]".format(table["reference"]["title"]), "{0} stdev [Mpps]".format(table["reference"]["title"]), "{0} Throughput [Mpps]".format(table["compare"]["title"]), "{0} stdev [Mpps]".format(table["compare"]["title"]), "Change [%]" ] header_str = ",".join(header) + "\n" except (AttributeError, KeyError) as err: logging.error( "The model is invalid, missing parameter: {0}".format(err)) return # Prepare data to the table: tbl_dict = dict() for job, builds in table["reference"]["data"].items(): for build in builds: for tst_name, tst_data in data[job][str(build)].iteritems(): if tbl_dict.get(tst_name, None) is None: name = "{0}-{1}".format( tst_data["parent"].split("-")[0], "-".join(tst_data["name"].split("-")[1:])) tbl_dict[tst_name] = { "name": name, "ref-data": list(), "cmp-data": list() } try: tbl_dict[tst_name]["ref-data"].\ append(tst_data["result"]["receive-rate"].avg) except TypeError: pass # No data in output.xml for this test for job, builds in table["compare"]["data"].items(): for build in builds: for tst_name, tst_data in data[job][str(build)].iteritems(): try: tbl_dict[tst_name]["cmp-data"].\ append(tst_data["result"]["receive-rate"].avg) except KeyError: pass except TypeError: tbl_dict.pop(tst_name, None) tbl_lst = list() for tst_name in tbl_dict.keys(): item = [ tbl_dict[tst_name]["name"], ] data_t = tbl_dict[tst_name]["ref-data"] if data_t: item.append(round(mean(data_t) / 1000000, 2)) item.append(round(stdev(data_t) / 1000000, 2)) else: item.extend([None, None]) data_t = tbl_dict[tst_name]["cmp-data"] if data_t: item.append(round(mean(data_t) / 1000000, 2)) item.append(round(stdev(data_t) / 1000000, 2)) else: item.extend([None, None]) if item[1] is not None and item[3] is not None and item[1] != 0: item.append(int(relative_change(float(item[1]), float(item[3])))) if len(item) == 6: tbl_lst.append(item) # Sort the table according to the relative change tbl_lst.sort(key=lambda rel: rel[-1], reverse=True) # Generate tables: # All tests in csv: tbl_names = [ "{0}-1t1c-full{1}".format(table["output-file"], table["output-file-ext"]), "{0}-2t2c-full{1}".format(table["output-file"], table["output-file-ext"]), "{0}-4t4c-full{1}".format(table["output-file"], table["output-file-ext"]) ] for file_name in tbl_names: logging.info(" Writing file: '{0}'".format(file_name)) with open(file_name, "w") as file_handler: file_handler.write(header_str) for test in tbl_lst: if file_name.split("-")[-2] in test[0]: # cores test[0] = "-".join(test[0].split("-")[:-1]) file_handler.write(",".join([str(item) for item in test]) + "\n") # All tests in txt: tbl_names_txt = [ "{0}-1t1c-full.txt".format(table["output-file"]), "{0}-2t2c-full.txt".format(table["output-file"]), "{0}-4t4c-full.txt".format(table["output-file"]) ] for i, txt_name in enumerate(tbl_names_txt): logging.info(" Writing file: '{0}'".format(txt_name)) convert_csv_to_pretty_txt(tbl_names[i], txt_name)
def table_performance_comparison(table, input_data): """Generate the table(s) with algorithm: table_performance_comparison specified in the specification file. :param table: Table to generate. :param input_data: Data to process. :type table: pandas.Series :type input_data: InputData """ logging.info(" Generating the table {0} ...".format(table.get( "title", ""))) # Transform the data logging.info(" Creating the data set for the {0} '{1}'.".format( table.get("type", ""), table.get("title", ""))) data = input_data.filter_data(table, continue_on_error=True) # Prepare the header of the tables try: header = [ "Test case", ] history = table.get("history", None) if history: for item in history: header.extend([ "{0} Throughput [Mpps]".format(item["title"]), "{0} Stdev [Mpps]".format(item["title"]) ]) header.extend([ "{0} Throughput [Mpps]".format(table["reference"]["title"]), "{0} Stdev [Mpps]".format(table["reference"]["title"]), "{0} Throughput [Mpps]".format(table["compare"]["title"]), "{0} Stdev [Mpps]".format(table["compare"]["title"]), "Change [%]" ]) header_str = ",".join(header) + "\n" except (AttributeError, KeyError) as err: logging.error( "The model is invalid, missing parameter: {0}".format(err)) return # Prepare data to the table: tbl_dict = dict() for job, builds in table["reference"]["data"].items(): for build in builds: for tst_name, tst_data in data[job][str(build)].iteritems(): if tbl_dict.get(tst_name, None) is None: name = "{0}-{1}".format( tst_data["parent"].split("-")[0], "-".join(tst_data["name"].split("-")[1:])) tbl_dict[tst_name] = { "name": name, "ref-data": list(), "cmp-data": list() } try: tbl_dict[tst_name]["ref-data"].\ append(tst_data["throughput"]["value"]) except TypeError: pass # No data in output.xml for this test for job, builds in table["compare"]["data"].items(): for build in builds: for tst_name, tst_data in data[job][str(build)].iteritems(): try: tbl_dict[tst_name]["cmp-data"].\ append(tst_data["throughput"]["value"]) except KeyError: pass except TypeError: tbl_dict.pop(tst_name, None) if history: for item in history: for job, builds in item["data"].items(): for build in builds: for tst_name, tst_data in data[job][str( build)].iteritems(): if tbl_dict.get(tst_name, None) is None: continue if tbl_dict[tst_name].get("history", None) is None: tbl_dict[tst_name]["history"] = OrderedDict() if tbl_dict[tst_name]["history"].get( item["title"], None) is None: tbl_dict[tst_name]["history"][item["title"]] = \ list() try: tbl_dict[tst_name]["history"][item["title"]].\ append(tst_data["throughput"]["value"]) except (TypeError, KeyError): pass tbl_lst = list() for tst_name in tbl_dict.keys(): item = [ tbl_dict[tst_name]["name"], ] if history: if tbl_dict[tst_name].get("history", None) is not None: for hist_data in tbl_dict[tst_name]["history"].values(): if hist_data: item.append(round(mean(hist_data) / 1000000, 2)) item.append(round(stdev(hist_data) / 1000000, 2)) else: item.extend([None, None]) else: item.extend([None, None]) data_t = tbl_dict[tst_name]["ref-data"] if data_t: item.append(round(mean(data_t) / 1000000, 2)) item.append(round(stdev(data_t) / 1000000, 2)) else: item.extend([None, None]) data_t = tbl_dict[tst_name]["cmp-data"] if data_t: item.append(round(mean(data_t) / 1000000, 2)) item.append(round(stdev(data_t) / 1000000, 2)) else: item.extend([None, None]) if item[-4] is not None and item[-2] is not None and item[-4] != 0: item.append(int(relative_change(float(item[-4]), float(item[-2])))) if len(item) == len(header): tbl_lst.append(item) # Sort the table according to the relative change tbl_lst.sort(key=lambda rel: rel[-1], reverse=True) # Generate tables: # All tests in csv: tbl_names = [ "{0}-ndr-1t1c-full{1}".format(table["output-file"], table["output-file-ext"]), "{0}-ndr-2t2c-full{1}".format(table["output-file"], table["output-file-ext"]), "{0}-ndr-4t4c-full{1}".format(table["output-file"], table["output-file-ext"]), "{0}-pdr-1t1c-full{1}".format(table["output-file"], table["output-file-ext"]), "{0}-pdr-2t2c-full{1}".format(table["output-file"], table["output-file-ext"]), "{0}-pdr-4t4c-full{1}".format(table["output-file"], table["output-file-ext"]) ] for file_name in tbl_names: logging.info(" Writing file: '{0}'".format(file_name)) with open(file_name, "w") as file_handler: file_handler.write(header_str) for test in tbl_lst: if (file_name.split("-")[-3] in test[0] and # NDR vs PDR file_name.split("-")[-2] in test[0]): # cores test[0] = "-".join(test[0].split("-")[:-1]) file_handler.write(",".join([str(item) for item in test]) + "\n") # All tests in txt: tbl_names_txt = [ "{0}-ndr-1t1c-full.txt".format(table["output-file"]), "{0}-ndr-2t2c-full.txt".format(table["output-file"]), "{0}-ndr-4t4c-full.txt".format(table["output-file"]), "{0}-pdr-1t1c-full.txt".format(table["output-file"]), "{0}-pdr-2t2c-full.txt".format(table["output-file"]), "{0}-pdr-4t4c-full.txt".format(table["output-file"]) ] for i, txt_name in enumerate(tbl_names_txt): logging.info(" Writing file: '{0}'".format(txt_name)) convert_csv_to_pretty_txt(tbl_names[i], txt_name) # Selected tests in csv: input_file = "{0}-ndr-1t1c-full{1}".format(table["output-file"], table["output-file-ext"]) with open(input_file, "r") as in_file: lines = list() for line in in_file: lines.append(line) output_file = "{0}-ndr-1t1c-top{1}".format(table["output-file"], table["output-file-ext"]) logging.info(" Writing file: '{0}'".format(output_file)) with open(output_file, "w") as out_file: out_file.write(header_str) for i, line in enumerate(lines[1:]): if i == table["nr-of-tests-shown"]: break out_file.write(line) output_file = "{0}-ndr-1t1c-bottom{1}".format(table["output-file"], table["output-file-ext"]) logging.info(" Writing file: '{0}'".format(output_file)) with open(output_file, "w") as out_file: out_file.write(header_str) for i, line in enumerate(lines[-1:0:-1]): if i == table["nr-of-tests-shown"]: break out_file.write(line) input_file = "{0}-pdr-1t1c-full{1}".format(table["output-file"], table["output-file-ext"]) with open(input_file, "r") as in_file: lines = list() for line in in_file: lines.append(line) output_file = "{0}-pdr-1t1c-top{1}".format(table["output-file"], table["output-file-ext"]) logging.info(" Writing file: '{0}'".format(output_file)) with open(output_file, "w") as out_file: out_file.write(header_str) for i, line in enumerate(lines[1:]): if i == table["nr-of-tests-shown"]: break out_file.write(line) output_file = "{0}-pdr-1t1c-bottom{1}".format(table["output-file"], table["output-file-ext"]) logging.info(" Writing file: '{0}'".format(output_file)) with open(output_file, "w") as out_file: out_file.write(header_str) for i, line in enumerate(lines[-1:0:-1]): if i == table["nr-of-tests-shown"]: break out_file.write(line)
# Append zero to the remaining group if len(group_rev_cnt_list) < no_of_groups: group_rev_cnt_list = assign_zero(class_group_list, group_rev_cnt_list) for row in group_rev_cnt_list: # group_rev_cnt_list is a 2D-list group_rev_no.append(row[0]) group_rev_count.append(row[1]) # Output for this part if is_null: stat = "Not a single group made any revisions this week. Please consider encouraging students to contribute more actively.</p>" else: avg = mean(group_rev_count) sd = stdev(group_rev_count) stat = "In this class, the average number of revisions per group is " + str( avg ) + ". Following is a brief analysis of weekly performance of the class.</p>" print( str(sum(group_rev_count)) + " " + str(avg) + " " + str(sd)) # This part is to get the Best 3 and Worst 3 groups in a class by comparing their revision counts # No need to fetch data from db here, @group_rev_cnt_list contains all group numbers with there revision counts. group_rev_cnt_list.sort( key=lambda x: x[1]) #order by 2nd column @revision_count AS num = 3 len_group_rev_cnt_list = len(group_rev_cnt_list) if len_group_rev_cnt_list < num: num = len_group_rev_cnt_list
xs = list() for code in votes: xs.append(votes[code][dim]) part.append(votes[code]['particip']) plt.scatter(xs, part) plt.show() ### Participation-related analysis print "Histogram of participation diff" diff = list() for code in jt: diff.append(tables[1][code]['particip'] - tables[0][code]['particip']) m = utils.mean(diff) st = utils.stdev(diff) print "Mean= %f , and stdev= %f " % (m, st) #plt.hist(diff, bins=range(-30, 30)) #plt.show() pbound = 98 geq = lambda v: v['particip'] >= pbound print "Tables in 7O with >%d pct particip: %d" % (pbound, len(utils.filter_by(tables[0], geq))) print "Tables in 14A with >%d pct particip: %d" % (pbound, len(utils.filter_by(tables[1], geq))) #delta = round(m + 3*st, 2) delta = 10 print "Tables in which there is more than %.2f pct participation difference:" % delta codes = utils.compare_by( tables,