예제 #1
0
def print_tables(vi):
    print "\n"
    for en in vi.ensemble:
        t = PrettyTable(["Method", "Weight", "Value"])
        t.float_format = ".2"
        t.align["Method"] = "l"

        # adjust valuation weights if necessary
        weight_sum = sum([k.weight for k in vi.ensemble[en].valuation])
        weight_mult = 1.0/weight_sum
        for v in vi.ensemble[en].valuation:
            v.weight *= weight_mult

        # hack: run valuations to get inferred values for notes
        map(lambda x: x.value(vi.data), vi.ensemble[en].valuation)
        
        # get weighted valuations
        vals = map(lambda x: [x.method + " (%s)" % x.notes(vi.data), x.weight, x.value(vi.data)],
                   vi.ensemble[en].valuation)
        total = sum(map(lambda x: x[1]*x[2], vals))
        map(lambda x: t.add_row(x), vals)
        t.add_row(["Total", 1.0, total])

        # PrettyTable voodoo to get a line above the "Total" row
        t._compute_widths(t._format_rows(t._rows, t._get_options({})), t._get_options({}))
        t.del_row(-1)
        t.add_row(map(lambda x: "-"*x, t._widths))
        t.add_row(["Total", 1.0, total])

        # Print table
        print "Valuation: %s" % en
        print t
        print "\n"
예제 #2
0
def main():

    global DF, PTABLE, OPTIONS

    parser = argparse.ArgumentParser()

    parser.add_argument("glob")
    parser.add_argument("--period", type=int)
    parser.add_argument("--bb-low", type=float)
    parser.add_argument("--bb-high", type=float)
    parser.add_argument("--lo-zone", type=float)
    parser.add_argument("--hi-zone", type=float)
    parser.add_argument("--lo-sigma", type=float)
    parser.add_argument("--hi-sigma", type=float)
    parser.add_argument("--protect-loss", type=bool)
    parser.add_argument("--method", default="dual_annealing")
    parser.add_argument("--finish", default=None)

    args = parser.parse_args()

    DF = pd.DataFrame(columns=["time", "mark", "ask", "bid"])

    for csvfile in glob.glob(args.glob):

        csvdf = pd.read_csv(csvfile, index_col=0)

        csvdf["time"] = csvdf.apply(timefunc, axis=1)
        csvdf["mark"] = pd.to_numeric(csvdf["mark"])
        csvdf["ask"] = pd.to_numeric(csvdf["ask"])
        csvdf["bid"] = pd.to_numeric(csvdf["bid"])

        if DF.shape[0] > 0:
            prev_time = DF.iloc[DF.shape[0] - 1]["time"]
            prev_mark = DF.iloc[DF.shape[0] - 1]["mark"]

            dt = csvdf.iloc[0]["time"] - prev_time
            scale = csvdf.iloc[0]["mark"] - prev_mark

            csvdf["time"] = csvdf["time"] - dt
            csvdf["mark"] = csvdf["mark"] - scale
            csvdf["ask"] = csvdf["ask"] - scale
            csvdf["bid"] = csvdf["bid"] - scale

        DF = DF.append(csvdf, ignore_index=True)

    bounds_dict = {
        "period": (12, 48 * 3600 / 5),
        "bb_low": (0.25, 4),
        "bb_high": (0.25, 4),
        "lo_zone": (-0.1, 0.5),
        "hi_zone": (0.5, 1.1),
        "lo_sigma": (0, 4),
        "hi_sigma": (0, 4),
        "protect_loss": (0, 1),
    }

    abs_dict = {
        "period": 1,
        "bb_low": 0.1,
        "bb_high": 0.1,
        "lo_zone": 0.01,
        "hi_zone": 0.01,
        "lo_sigma": 0.1,
        "hi_sigma": 0.1,
        "protect_loss": 1,
    }

    PTABLE = PrettyTable([
        "Iteration",
        "Time",
        "Period",
        "BB Low",
        "BB High",
        "Low Zone",
        "High Zone",
        "Low Sigma",
        "High Sigma",
        "Protect",
        "Return",
    ])
    PTABLE.float_format = ".4"

    bounds = []
    bounds.append((0, 100))
    bounds.append((
        f"{datetime(2021, 1, 1, 0, 0, 0):%X}",
        f"{datetime(2021, 1, 1, 23, 59, 59):%X}",
    ))
    bounds.append([int(v) for v in bounds_dict["period"]])
    bounds.append([float(v) for v in bounds_dict["bb_low"]])
    bounds.append([float(v) for v in bounds_dict["bb_high"]])
    bounds.append([float(v) for v in bounds_dict["lo_zone"]])
    bounds.append([float(v) for v in bounds_dict["hi_zone"]])
    bounds.append([float(v) for v in bounds_dict["lo_sigma"]])
    bounds.append([float(v) for v in bounds_dict["hi_sigma"]])
    bounds.append((False, True))
    bounds.append((-99.0, 99.0))
    for i in product([0, 1], repeat=len(bounds)):
        PTABLE.add_row([bounds[j][i[j]] for j in range(len(bounds))])
    OPTIONS = PTABLE._get_options({})
    frows = PTABLE._format_rows(PTABLE._get_rows(OPTIONS), OPTIONS)
    PTABLE._compute_widths(frows, OPTIONS)
    PTABLE._hrule = PTABLE._stringify_hrule(OPTIONS)
    print(PTABLE._stringify_header(OPTIONS))

    fixed = []
    bounds = []
    abs_diff = []
    for arg in [
            "period",
            "bb_low",
            "bb_high",
            "lo_zone",
            "hi_zone",
            "lo_sigma",
            "hi_sigma",
            "protect_loss",
    ]:
        if getattr(args, arg) is not None:
            fixed.append(getattr(args, arg))
        else:
            fixed.append(None)
            bounds.append(bounds_dict[arg])
            abs_diff.append(abs_dict[arg])

    res = None
    if args.method == "brute":

        x0, fval, grid, Jout = optimize.brute(
            func=run,
            args=tuple(fixed),
            ranges=bounds,
            full_output=True,
            finish=args.finish,
        )

        if grid.ndim == 1:
            plt.plot(grid, -np.log(Jout))
            plt.title(args.glob)
            plt.show()

        elif grid.ndim == 3:
            fig = plt.figure(figsize=(10, 6))
            ax1 = fig.add_subplot(111, projection="3d")

            mycmap = plt.get_cmap("gist_earth")
            surf1 = ax1.plot_surface(grid[0, :],
                                     grid[1, :],
                                     -np.log(Jout),
                                     cmap=mycmap)
            fig.colorbar(surf1, ax=ax1, shrink=0.5, aspect=5)

            plt.title(args.glob)
            plt.show()

    elif args.method == "basinhopping":
        res = optimize.basinhopping(
            func=run,
            x0=tuple(fixed),
            minimizer_kwargs={"args": tuple(7 * [None])},
        )

    elif args.method == "shgo-sobol":

        constraints = []
        if args.period is None:
            constraints.append({
                "type": "eq",
                "fun": lambda x: np.array([x[0] - int(x[0])])
            })

        if args.protect_loss is None:
            constraints.append({
                "type": "eq",
                "fun": lambda x: np.array([x[7] - int(x[7])])
            })

        res = optimize.shgo(
            func=run,
            args=tuple(fixed),
            bounds=bounds,
            constraints=constraints,
            options={"disp": True},
            sampling_method="sobol",
            minimizer_kwargs={"options": {
                "eps": np.array(abs_diff)
            }},
        )

        tbl = PrettyTable([
            "Period",
            "BB Low",
            "BB High",
            "Low Zone",
            "High Zone",
            "Low Sigma",
            "High Sigma",
            "Protect",
            "Return",
        ])
        tbl.float_format = ".4"

        for minim in res.xl:
            row = []
            i = 0
            for val in fixed:
                if val is None:
                    row.append(minim[i])
                    i += 1
                else:
                    row.append(val)
            score = run(minim, *fixed)
            row.append(-np.log(score))
            tbl.add_row(row)

        print(PTABLE._hrule)
        print()

        print(tbl)

    elif args.method == "hyperopt":

        space = [
            hp.quniform("period", 12, 48 * 3600 / 5, 1),
            hp.uniform("bb_low", 0.25, 4),
            hp.uniform("bb_high", 0.25, 4),
            hp.uniform("lo_zone", -0.1, 0.5),
            hp.uniform("hi_zone", 0.5, 1.1),
            hp.uniform("lo_sigma", 0, 4),
            hp.uniform("hi_sigma", 0, 4),
            hp.quniform("protect_loss", 0, 1, 1),
        ]

        res = fmin(run, space, algo=tpe.suggest, max_evals=200)

        print(run(space_eval(space, res)))

    elif len(bounds) == 0:
        run([], *fixed)

    elif len(bounds) == 1:

        x0 = [(bounds[0][0] + bounds[0][1]) / 2]
        constraints = ()
        options = {"disp": True}
        if args.period is None:
            constraints = [{
                "type": "eq",
                "fun": lambda x: np.array([x[0] - int(x[0])])
            }]
            options["finite_diff_rel_step"] = (1 / x0[0], )

        res = optimize.minimize(
            fun=run,
            x0=x0,
            method="trust-constr",
            args=tuple(fixed),
            bounds=Bounds(bounds[0][0], bounds[0][1]),
            constraints=constraints,
            options=options,
        )

    else:
        res = getattr(optimize, args.method)(
            func=run,
            args=tuple(fixed),
            bounds=bounds,
            maxiter=1000000,
            local_search_options={
                "options": {
                    "disp": True
                }
            },
        )

    if res is not None:
        print(res)

    print(f"Glob = {args.glob}")
    print(f"Default = {DF.iloc[DF.shape[0] - 1]['mark']/DF.iloc[0]['mark']}")
예제 #3
0
def run_benchmark(config):
    """ Benchmark script for tiledb-vcf"""

    # Open yaml config file
    with open(config, 'r') as stream:
        try:
            benchmarking_start = time.time()
            results = []
            config = yaml.load(stream)
            base_cmd = config['base_command']
            iterations = config['iterations']
            ingestion_files = config['ingestion_files']
            attribute_results = {}
            suite_index = 0
            suite_names = []

            errors = {}

            # Get the size of the files being ingested
            ingestion_size = 0
            for ingestion_file in ingestion_files:
                ingestion_size += os.path.getsize(ingestion_file) / (1024 *
                                                                     1024)

            # Loop through each test suite
            for suite_name, test_set in config['suites'].items():
                suite_names.append(suite_name)
                test_results = {}
                # Run each suite the given number of iterations
                iteration_count = 0
                for i in range(iterations):
                    iteration_count += 1

                    array_uri = test_set['array_uri']
                    group_uri = test_set['group_uri']
                    dir_to_rm = None
                    if 'group_uri' in test_set:
                        dir_to_rm = group_uri
                    else:
                        dir_to_rm = array_uri

                    if not dir_to_rm is None and os.path.isdir(dir_to_rm):
                        shutil.rmtree(dir_to_rm)

                    if not os.path.isdir(group_uri):
                        pathlib.Path(group_uri).mkdir(parents=True,
                                                      exist_ok=True)

                    # Run each test in the suite
                    for test in test_set['tests']:

                        # Flush caches
                        flush_caches()

                        test_name = test["name"]
                        logger.info("Starting test %s - %s iteration %d",
                                    suite_name, test_name, i)

                        # Add specified arguments
                        cmd = [base_cmd] + test['args']
                        # Add group uri argument
                        cmd.extend(["-a", array_uri])

                        # If store or register add ingestion files
                        if test_name == "store" or test_name == "register":
                            cmd.append("-f")
                            cmd.extend(ingestion_files)

                        if test_name == "export":
                            export_path = os.path.join(group_uri, "export")
                            if not os.path.isdir(export_path):
                                os.mkdir(export_path)
                            #cmd.extend(["-p",  export_path + os.path.sep])

                        logger.info("Running: %s", list2cmdline(cmd))

                        # Time and run test command
                        t0 = time.time()
                        t1 = None
                        try:
                            ret = call(cmd)
                            t1 = time.time()
                        except Exception as e:
                            if not suite_name in errors:
                                errors[suite_name] = {"test_name": []}
                            if not test_name in errors[suite_name]:
                                errors[suite_name][test_name] = []
                            errors[suite_name][test_name].append({
                                "iteration":
                                i,
                                "ret_code":
                                ret
                            })
                            logging.error(traceback.format_exc())
                            continue

                        array_size = 0
                        tiledb_file_sizes = None
                        if 'check_array_size' in test and test[
                                'check_array_size']:
                            array_size = get_folder_size(array_uri)
                            tiledb_file_sizes = get_tiledb_file_sizes(
                                array_uri)

                        # Save results
                        if not test_name in test_results:
                            test_results[test_name] = {
                                "time": [],
                                "size": [],
                                "file_sizes": {}
                            }
                        test_results[test_name]["time"].append(t1 - t0)
                        test_results[test_name]["size"].append(array_size)
                        if tiledb_file_sizes != None:
                            for file_name, size in tiledb_file_sizes.items():
                                if not file_name in test_results[test_name][
                                        "file_sizes"]:
                                    test_results[test_name]["file_sizes"][
                                        file_name] = []
                                test_results[test_name]["file_sizes"][
                                    file_name].append(size)

                # If there was a store test we should save results for printing table at the end
                if 'store' in test_results:
                    ingestion_times = test_results["store"]["time"]
                    ingestion_time_avg = numpy.average(ingestion_times)
                    size_avg = numpy.average(
                        test_results["store"]["size"]) / (1024 * 1024)
                    ingestion_time_std = numpy.std(ingestion_times)
                    export_time_avg = 'N/A'
                    export_time_std = 'N/A'

                    if 'export' in test_results:
                        export_times = test_results["export"]["time"]
                        export_time_avg = numpy.average(export_times)
                        export_time_std = numpy.std(export_times)

                    results.append([
                        suite_name, iteration_count, ingestion_time_avg,
                        ingestion_time_std, size_avg, ingestion_size,
                        export_time_avg, export_time_std
                    ])

                    for file_name, file_sizes in test_results['store'][
                            "file_sizes"].items():
                        if not file_name in attribute_results:
                            attribute_results[file_name] = [None] * len(
                                config['suites'])  #{suite_name: 'N/A'}

                        file_size_avg = numpy.average(file_sizes) / (1024 *
                                                                     1024)
                        attribute_results[file_name][
                            suite_index] = file_size_avg

                suite_index += 1

                # Remove directory to save space again
                dir_to_rm = None
                if 'group_uri' in test_set:
                    dir_to_rm = group_uri
                else:
                    dir_to_rm = array_uri

                if not dir_to_rm is None and os.path.isdir(dir_to_rm):
                    shutil.rmtree(dir_to_rm)

            header = [
                'Test', 'Iterations', 'Ingestion Time (seconds)',
                'Ingestion Time (seconds) STDDEV', 'Array Size (MB)',
                'Ingestion Size (MB)', 'Export Time (seconds)',
                'Export Time STDDEV (seconds)'
            ]
            t = PrettyTable(header)
            for result in results:
                t.add_row(result)

            data = ",".join(header) + "\n"
            for result in results:
                data += ",".join(map(str, result)) + "\n"
            logger.info(data)

            print("")
            print(t)

            t = PrettyTable()

            t.add_column("Test", suite_names)

            for file_name, sizes in attribute_results.items():
                t.add_column(file_name, sizes)

            #for result in attribute_results:
            #    print(result)
            #    t.add_row(result)
            #for index in range(len(suite_names)):
            #    results = [] #[None] * len(attribute_results)
            #    for file_name, result in attribute_results.items():
            #        results.append(result[index])
            #    t.add_column(suite_names[index], results)

            # Set file_name column
            #file_name_results = [] #[None] * len(attribute_results)
            #for file_name, result in attribute_results.items():
            #    file_name_results.append(file_name)
            #t.add_column("file_name", file_name_results)

            print("")
            print(t)

            data = ",".join(t.field_names) + "\n"
            for row in t._get_rows(t._get_options({})):
                data += ",".join(map(str, row)) + "\n"
            logger.info(data)

            logger.info("Total time taken to run benchmark was: %s",
                        date.compress(time.time() - benchmarking_start))

            if errors:
                logger.error("Errors detected in run, dumping details:")
                logger.error(errors)

        except yaml.YAMLError as exc:
            print(exc)