Exemple #1
0
def objective(p):
    # Round input parameters
    r = round(p[0] * 100.0) / 100.0
    t = round(p[1] * 100.0) / 100.0

    # Check if cache already has these values
    cost = float('inf')
    if (r, t) in cost_cache:
        cost = cost_cache[(r, t)]
    else:
        _, _, knees = compute_knee_points(r, t)
        #knees = mapping(knees, points_reduced, removed)
        #avg_x, _, _, _, p = performance(points, knees)
        #cost = avg_x / p

        # Check the performance on the reduced space
        points_reduced, _ = points_cache[r]
        # penalize solutions with a single knee
        if len(knees) == 1:
            cost = float('inf')
        else:
            if args.a is Accuracy.knee:
                _, _, _, _, cost = accuracy_knee(points_reduced, knees)
            else:
                _, _, _, _, cost = accuracy_trace(points_reduced, knees)
        cost_cache[(r, t)] = cost

    return cost
Exemple #2
0
def objective(p):
    # Round input parameters
    dx = round(p[0] * 100.0) / 100.0
    dy = round(p[1] * 100.0) / 100.0
    dz = round(p[2] * 100.0) / 100.0

    # Check if cache already has these values
    cost = float('inf')
    if (dx, dy, dz) in cost_cache:
        cost = cost_cache[(dx, dy, dz)]
    else:
        knees = compute_knee_points(dx, dy, dz)

        # penalize solutions with a single knee
        if len(knees) == 1:
            cost = float('inf')
        else:
            if args.a is Accuracy.knee:
                _, _, _, _, cost = accuracy_knee(points, knees)
            else:
                _, _, _, _, cost = accuracy_trace(points, knees)
        cost_cache[(dx, dy, dz)] = cost

    return cost
Exemple #3
0
def main(args):
    # get the expected file from the input file
    dirname = os.path.dirname(args.i)
    filename = os.path.splitext(os.path.basename(args.i))[0]
    expected_file = os.path.join(os.path.normpath(dirname),
                                 f'{filename}_expected.csv')

    expected = None

    if os.path.exists(expected_file):
        with open(expected_file, 'r') as f:
            reader = csv.reader(f, quoting=csv.QUOTE_NONNUMERIC)
            expected = list(reader)
    else:
        expected = []

    points = np.genfromtxt(args.i, delimiter=',')
    points_reduced, points_removed = rdp.rdp(points, args.r)

    space_saving = round((1.0 - (len(points_reduced) / len(points))) * 100.0,
                         2)
    logger.info('Number of data points after RDP: %s(%s %%)',
                len(points_reduced), space_saving)

    names = [
        'kneedle', 'kneedke(Rec)', 'l-method', 'dfdt', 'menger', 'curvature',
        'Tyler (RDP)', 'Tyler', 'RDP'
    ]
    methods = [
        kneedle.auto_knees, kneedle.multi_knee, lmethod.multi_knee,
        dfdt.multi_knee, menger.multi_knee, curvature.multi_knee, ps.knees
    ]
    knees = []
    knees_raw = []

    # Elbow methods
    for m, n in zip(methods, names):
        tmp = m(points_reduced)
        knees.append(tmp)
        raw_indexes = rdp.mapping(tmp, points_reduced, points_removed)
        knees_raw.append(raw_indexes)

    # Tyler
    candidates = ps.knees(points)
    knees.append(candidates)
    knees_raw.append(candidates)

    # RDP
    candidates = np.arange(1, len(points_reduced))
    knees.append(candidates)
    raw_indexes = rdp.mapping(candidates, points_reduced, points_removed)
    knees_raw.append(raw_indexes)

    #plot_knees(points, knees_raw, names)

    cmethod = {
        Clustering.single: clustering.single_linkage,
        Clustering.complete: clustering.complete_linkage,
        Clustering.average: clustering.average_linkage
    }

    # Cluster and select points
    filtered_knees_raw = []
    rankings = []
    for k, n in zip(knees, names):
        # remove 0 index in the knees:
        k = k[k != 0]
        if n == 'Tyler':
            filtered_knees_raw.append(k)
            ranks = np.full(len(k), 1.0)
            #rankings.append(ranking.slope_ranking(points, k))
            rankings.append(ranks)
        else:
            t_k = pp.filter_worst_knees(points_reduced, k)
            filtered_knees = pp.filter_clustring(points_reduced, t_k,
                                                 cmethod[args.c], args.t,
                                                 args.m)
            rankings.append(
                ranking.slope_ranking(points_reduced, filtered_knees))
            raw_indexes = rdp.mapping(filtered_knees, points_reduced,
                                      points_removed)
            filtered_knees_raw.append(raw_indexes)

    logger.info(f'Model          MSE(knees)   MSE(exp)   Cost(tr)   Cost(kn)')
    logger.info(f'----------------------------------------------------------')
    for k, n in zip(filtered_knees_raw, names):
        if len(expected) > 0:
            error_mse = evaluation.mse(points, k, expected,
                                       evaluation.Strategy.knees)
            error_mse_exp = evaluation.mse(points, k, expected,
                                           evaluation.Strategy.expected)
        else:
            error_mse = math.nan
            error_mse_exp = math.nan
        _, _, _, _, cost_trace = evaluation.accuracy_trace(points, k)
        _, _, _, _, cost_knee = evaluation.accuracy_knee(points, k)
        logger.info(
            f'{n:<13}| {error_mse:10.2E} {error_mse_exp:10.2E} {cost_trace:10.2E} {cost_knee:10.2E}'
        )

    plot_knees_ranking(points, filtered_knees_raw, names, rankings, expected)
Exemple #4
0
def main(args):
    # get the expected file from the input file
    dirname = os.path.dirname(args.i)
    filename = os.path.splitext(os.path.basename(args.i))[0]
    expected_file = os.path.join(os.path.normpath(dirname),
                                 f'{filename}_expected.csv')

    expected = None

    if os.path.exists(expected_file):
        with open(expected_file, 'r') as f:
            reader = csv.reader(f, quoting=csv.QUOTE_NONNUMERIC)
            expected = list(reader)
    else:
        expected = []
    expected = np.array(expected)
    points = np.genfromtxt(args.i, delimiter=',')

    rs = [0.75, 0.80, 0.85, 0.90, 0.95]
    ts = [0.01, 0.02, 0.03, 0.04, 0.05]

    evaluations = []

    for r in rs:
        ## Knee detection code ##
        points_reduced, points_removed = rdp.rdp(points, r)
        knees = np.arange(1, len(points_reduced))
        t_k = pp.filter_worst_knees(points_reduced, knees)
        t_k = pp.filter_corner_knees(points_reduced, t_k)
        for t in ts:
            ## Clustering ##
            filtered_knees = pp.filter_clustring(points_reduced, t_k,
                                                 clustering.average_linkage, t,
                                                 ClusterRanking.left)
            final_knees = pp.add_points_even(points, points_reduced,
                                             filtered_knees, points_removed)

            ## Evaluation ##
            error_rmspe = evaluation.rmspe(points, final_knees, expected,
                                           evaluation.Strategy.knees)
            error_rmspe_exp = evaluation.rmspe(points, final_knees, expected,
                                               evaluation.Strategy.expected)

            _, _, _, _, cost_trace = evaluation.accuracy_trace(
                points, final_knees)
            _, _, _, _, cost_knee = evaluation.accuracy_knee(
                points, final_knees)

            evaluations.append(
                [error_rmspe, error_rmspe_exp, cost_trace, cost_knee])

    ## Compute the Correlation ##
    evaluations = np.array(evaluations)
    rho = np.corrcoef(evaluations.T)
    rmspe_rmspe_exp = rho[0, 1]
    rmspe_cost_trace = rho[0, 2]
    rmspe_cost_knee = rho[0, 3]

    rmspe_exp_cost_trace = rho[1, 2]
    rmspe_exp_cost_knee = rho[1, 3]

    cost_trace_cost_knee = rho[2, 3]

    #logger.info(f'{rho}')
    logger.info(
        f'{rmspe_rmspe_exp}, {rmspe_cost_trace}, {rmspe_cost_knee}, {rmspe_exp_cost_trace}, {rmspe_exp_cost_knee}, {cost_trace_cost_knee}'
    )