def generate_graph(self, args, parser): try: renderer = self.get_graph_renderer(args) dataset = MatrixDataset.from_csv(args.dataset) except ValueError as ex: # pragma: no cover parser.error(ex) renderer.render(dataset, args.outfile)
def run_algorithm(self, args, parser): alg_objs = [] all_params = dict(args.alg_params or []) for cls in args.alg_classes: params, ignored = self.get_algorithm_params(cls, all_params) alg_objs.append(self.get_algorithm_object(cls, params)) if ignored: msg = self.get_ignored_parameters_message(cls, ignored) print("WARNING: {}".format(msg), file=sys.stderr) sup_data = None dataset = None if args.supervised: sup_data = SupervisedData.from_csv(args.dataset) dataset = sup_data.data else: # Catch error early if accuracy requested in output but dataset is # not supervised if OutputFields.ACCURACY in args.output_fields: parser.error("cannot calculate accuracy without --supervised") dataset = MatrixDataset.from_csv(args.dataset) output_obj = {} for alg in alg_objs: results = alg.run(dataset).filter(sources=args.sources, variables=args.variables) # Get results to display label = self.ALG_LABEL_MAPPING.inverse[alg.__class__] output_obj[label] = self.get_output_obj( results, output_fields=args.output_fields, sup_data=sup_data) print(yaml.dump(output_obj, indent=2, default_flow_style=False))
def test_from_csv_empty_rows(self, tmpdir): filepath = tmpdir.join("data.csv") filepath.write("\n".join(["1,2,", ",,", " ,\t,", "3,4,5"])) data = MatrixDataset.from_csv(filepath.open()) expected_matrix = ma.masked_values( [[1, 2, 0], [0, 0, 0], [0, 0, 0], [3, 4, 5]], 0) assert np.array_equal(data.sv.mask, expected_matrix.mask) assert (data.sv == expected_matrix).all()
def test_results(self, csv_dataset, csv_fileobj, capsys): self.run("run", "-a", "average_log", "-f", csv_dataset) got_results = yaml.safe_load(capsys.readouterr().out) assert "average_log" in got_results alg_results = got_results["average_log"] assert isinstance(alg_results, dict) exp_results = AverageLog().run(MatrixDataset.from_csv(csv_fileobj)) assert alg_results["trust"] == exp_results.trust assert alg_results["belief"] == exp_results.belief assert alg_results["iterations"] == exp_results.iterations
def test_from_csv_single_row_or_column(self, tmpdir): filepath1 = tmpdir.join("data1.csv") filepath1.write("1,,3,2,6") data1 = MatrixDataset.from_csv(filepath1.open()) exp_sv1 = ma.masked_values([[1, 0, 3, 2, 6]], 0) assert data1.num_sources == 1 assert data1.num_variables == 4 assert data1.num_claims == 4 assert np.array_equal(data1.sv.mask, exp_sv1.mask) assert (data1.sv == exp_sv1).all() filepath2 = tmpdir.join("data2.csv") filepath2.write("1\n\n3\n2\n6") data2 = MatrixDataset.from_csv(filepath2.open()) exp_sv2 = exp_sv1.T assert data2.num_sources == 4 assert data2.num_variables == 1 assert data2.num_claims == 4 assert np.array_equal(data2.sv.mask, exp_sv2.mask) assert (data2.sv == exp_sv2).all()
def test_belief_stats(self, csv_dataset, csv_fileobj, capsys): self.run("run", "-a", "sums", "-f", csv_dataset, "-o", "belief_stats") results = yaml.safe_load(capsys.readouterr().out)["sums"] assert set(results.keys()) == {"belief_stats"} exp_belief_stats = (Sums().run( MatrixDataset.from_csv(csv_fileobj)).get_belief_stats()) assert results["belief_stats"] == { var: { "mean": mean, "stddev": stddev } for var, (mean, stddev) in exp_belief_stats.items() }
def test_from_csv(self, tmpdir): filepath = tmpdir.join("data.csv") filepath.write("\n".join([ "1,,3, 2,6 ", # extra whitespace should not matter ", 9,0,2,5", "3,9, ,,1", "1,9 , 5.7,3,4", "5,1,3,1,1", "\n" # new lines at the end of file should not matter ])) data = MatrixDataset.from_csv(filepath.open()) expected_matrix = ma.masked_values( [[1, 999, 3, 2, 6], [999, 9, 0, 2, 5], [3, 9, 999, 999, 1], [1, 9, 5.7, 3, 4], [5, 1, 3, 1, 1]], 999) assert data.num_sources == 5 assert data.num_variables == 5 assert data.num_claims == 15 assert np.array_equal(data.sv.mask, expected_matrix.mask) assert (data.sv == expected_matrix).all()
def test_custom_output(self, csv_fileobj, csv_dataset, capsys): self.run("run", "-a", "sums", "-f", csv_dataset, "-o", "time") results = yaml.safe_load(capsys.readouterr().out)["sums"] assert set(results.keys()) == {"time"} self.run("run", "-a", "sums", "-f", csv_dataset, "-o", "time", "iterations") results = yaml.safe_load(capsys.readouterr().out)["sums"] assert set(results.keys()) == {"time", "iterations"} self.run("run", "-a", "sums", "-f", csv_dataset, "-o", "trust", "trust_stats") results = yaml.safe_load(capsys.readouterr().out)["sums"] assert set(results.keys()) == {"trust", "trust_stats"} exp_mean, exp_stddev = (Sums().run( MatrixDataset.from_csv(csv_fileobj)).get_trust_stats()) assert results["trust_stats"] == { "mean": exp_mean, "stddev": exp_stddev }
def test_matrix_renderer(self): buf = StringIO() buf.write(",5,7\n,,\n1,2,3") buf.seek(0) dataset = MatrixDataset.from_csv(buf) rend1 = MatrixDatasetGraphRenderer() rend2 = MatrixDatasetGraphRenderer(zero_indexed=False) rend1.render(dataset, BytesIO()) rend2.render(dataset, BytesIO()) assert rend1.get_source_label(0) == "s0" assert rend2.get_source_label(0) == "s1" assert rend1.get_var_label(0) == "v1" assert rend2.get_var_label(0) == "v2" # Note that source 1 (in 0-index terms) makes no claims: ID 1 should # therefore be source 2 (in 0-index terms) assert rend1.get_source_label(1) == "s2" assert rend2.get_source_label(1) == "s3" assert rend1.get_claim_label(0, 1) == "v1=7" assert rend2.get_claim_label(0, 1) == "v2=7"
def test_get_output_obj(self, csv_fileobj): dataset = MatrixDataset.from_csv(csv_fileobj) alg = Sums(iterator=FixedIterator(5)) # Default should be all fields if none are given, but not accuracy # unless supervised data given results = alg.run(dataset) out1 = BaseClient().get_output_obj(results) exp_keys = { f.value for f in OutputFields if f != OutputFields.ACCURACY } assert set(out1.keys()) == exp_keys sup_data = SupervisedData.from_csv(csv_fileobj) sup_results = alg.run(sup_data.data) out2 = BaseClient().get_output_obj(sup_results, sup_data=sup_data) assert set(out2.keys()) == {f.value for f in OutputFields} assert out2["trust"] == sup_results.trust assert out2["belief"] == sup_results.belief out3 = BaseClient().get_output_obj(results, output_fields=[OutputFields.TRUST]) assert set(out3.keys()) == {"trust"}
def run(self): """ Run an algorithm on a user-supplied dataset. Required HTTP parameters: * 'algorithm' * 'matrix' Optional parameters: * 'parameters' * 'previous_results' Responses are JSON objects of the form ``{"ok": True, "data": ...}`` or ``{"ok": False, "error": ...}`` """ alg_labels = request.args.getlist("algorithm") matrix_csv = request.args.get("matrix") if not alg_labels or not matrix_csv: err_msg = "'algorithm' and 'matrix' parameters are required" return jsonify(ok=False, error=err_msg), 400 matrix_csv = matrix_csv.replace("_", "") params_str = request.args.get("parameters") try: all_params = self.get_param_dict(params_str) dataset = MatrixDataset.from_csv(StringIO(matrix_csv)) except ValueError as ex: return jsonify(ok=False, error=str(ex)), 400 messages = [] all_output = {} for alg_label in alg_labels: try: alg_cls = self.algorithm_cls(alg_label) params, ignored = self.get_algorithm_params( alg_cls, all_params) alg = self.get_algorithm_object(alg_cls, params) except ValueError as ex: return jsonify(ok=False, error=str(ex)), 400 # Show a message for each of the ignored parameters if ignored: msg = self.get_ignored_parameters_message(alg_cls, ignored) messages.append(msg) try: results = alg.run(dataset) except ConvergenceError as ex: return jsonify(ok=False, error=str(ex)), 500 except EmptyDatasetError as ex: return jsonify(ok=False, error=str(ex)), 400 output = self.get_output_obj(results) # Construct a graph and/or animation output["imagery"] = {} cs = ResultsGradientColourScheme(results) renderer = self.get_graph_renderer(colours=cs) json_buffer = StringIO() renderer.render(dataset, json_buffer) output["imagery"]["graph"] = json_buffer.getvalue() # Note: can only produce animation for iterative algorithms if isinstance(alg, BaseIterativeAlgorithm): animator = JsonAnimator(renderer=self.get_graph_renderer()) json_buffer = StringIO() # Note: empty data and convergence error would already have # been caught above, so no need to check here animator.animate(json_buffer, alg, dataset, show_progress=False) output["imagery"]["animation"] = json_buffer.getvalue() # Include diff between previous results if available prev_results = request.args.get("previous_results") if prev_results is not None: try: obj = self.get_results_object(prev_results) except ValueError as ex: err_msg = "'previous_results' is invalid: {}".format(ex) return jsonify(ok=False, error=err_msg), 400 # Previous results have been converted to JSON, which may have # changed numeric keys to strings: to ensure results can be # compared, convert the current results to and from JSON current_results = self.get_results_object(json.dumps(output)) diff = ResultDiff(obj, current_results) output["diff"] = { "trust": diff.trust, "belief": diff.belief, "time_taken": diff.time_taken, "iterations": diff.iterations } all_output[alg_label] = output return jsonify({"ok": True, "data": all_output, "messages": messages})
def data(self): data_path = self.get_filepath("data.csv") with open(data_path) as csv_file: return MatrixDataset.from_csv(csv_file)
def test_invalid_csv_shape(self, tmpdir): filepath = tmpdir.join("data.csv") filepath.write("\n".join(["1,2,", "1,2"])) with pytest.raises(ValueError) as excinfo: MatrixDataset.from_csv(filepath.open()) assert "Expected 3 entries in row 2, got 2" in str(excinfo.value)