def do_analyze(args): namespace = {} clustering_namespace, clustering = load_clustering(args) namespace.update(clustering_namespace) analysis_stats = perform_analysis(args, clustering) namespace.update(analysis_stats) write_json_line(args.output, namespace)
def do_simulation(args): if args.seed is not None: random.seed(args.seed) data, stats = perform_simulation(args) namespace = utils.serialize_args(args) namespace.update(stats) output = args.output write_json_line(output, namespace) for i, seq in data: output.write("%s %s\n" % (i, seq))
def do_cluster(args): namespace = {} sim_namespace, simulation = load_simulation(args) namespace.update(sim_namespace) clustering_results, clustering_stats = perform_clustering(args, simulation) clustering_namespace = utils.serialize_args(args) namespace.update(clustering_namespace) namespace.update(clustering_stats) write_json_line(args.output, namespace) for cluster in clustering_results: write_json_line(args.output, cluster)
def do_mapper(args): params = dict( n=args.sim_size, nclusters=args.nclusters, split_join=args.split_join, join_negatives=bool(args.join_negatives), population_size=args.population_size, with_warnings=args.sampling_warnings, ) h0 = Grid.with_sim_clusters(p_err=args.h0_err, **params) h1 = Grid.with_sim_clusters(p_err=args.h1_err, **params) with PMTimer() as timer: results = h0.compare(h1, args.metrics) for result in results: result.update(timer.to_dict()) result.update(utils.serialize_args(args)) write_json_line(args.output, result)