Esempio n. 1
0
def test_comb_improve_mapping(lines, answers, working_cols, mapping, accuracy,
                              letters_to_change):
    format_pattern = '{0:0' + str(len(letters_to_change)) + 'b}'
    for i in range(len(letters_to_change) + 1):
        bin_string = format_pattern.format(i)
        mapping_copy = mapping.copy()
        for j, c in enumerate(bin_string):
            mapping_copy[letters_to_change[j]] = float(c)
        new_acc = fd.evaluate_mapping_accuracy(lines, answers, working_cols,
                                               mapping_copy)
        if new_acc > accuracy:
            return new_acc, mapping_copy
    return None, None
Esempio n. 2
0
def main():
    lines_rectangle, correct_responces, lengths_list = fd.prepare_rectangle_data(
        sys.argv)
    if not (lines_rectangle is None):
        cur_figure = 1
        line_len = lengths_list[1]
        for vec_size in range(exp_start, exp_end, exp_step):
            accuracy_list = run_experiments(vec_size, experiment_number,
                                            line_len, lines_rectangle,
                                            correct_responces)

            # minimal distance approach
            totat_variation_dist = su.calculate_total_var_dist(
                lines_rectangle, line_len)
            sorted_cols_indices = np.argsort(totat_variation_dist)

            test_cols = sorted_cols_indices[:vec_size]
            test_mapping = fd.generate_mapping(lines_rectangle,
                                               correct_responces, test_cols)
            test_accuracy = fd.evaluate_mapping_accuracy(
                lines_rectangle, correct_responces, test_cols, test_mapping)

            percentile = int(find_percentile(accuracy_list, test_accuracy))

            correct_responces = np.array(correct_responces)
            p = len(correct_responces[correct_responces]) / float(
                len(correct_responces))
            random_acc = p**2 + (1.0 - p)**2

            # plotting the histogram
            fig = plt.figure(cur_figure, figsize=(10, 6))
            cur_figure += 1
            axis = plt.gca()
            axis.set_xlim([-0.1, 1.1])
            fig.suptitle(
                'C = {3}, K = {0}, Experiments: {4}\nTest accuracy: {1}, Percentile: {2}'
                .format(vec_size, test_accuracy, percentile, line_len,
                        experiment_number))

            plt.hist(accuracy_list, bins=bins_num)
            test_line = plt.axvline(test_accuracy,
                                    c='r',
                                    label='Test accuracy')
            rnd_line = plt.axvline(random_acc, c='g', label='Random guess')
            plt.xlabel('Accuracy')
            plt.ylabel('Appearances')
            plt.legend(handles=[test_line, rnd_line])
            plt.ioff()
            plt.savefig("Experiments/Experiment5/k{0}.png".format(vec_size))
Esempio n. 3
0
def run_experiments(vec_size, experiment_num, line_length, lines, answers):
    print "Experiment K={0} started: {1}".format(vec_size,
                                                 datetime.datetime.now())
    accuracy_list = []
    for cur_exp in xrange(experiment_num):
        if cur_exp != 0 and cur_exp % 1000 == 0:
            print "Completed: ({0}, {1})".format(vec_size, cur_exp)
        cols = select_random_spaced_cols(vec_size, line_length)
        mapping = fd.generate_mapping(lines, answers, cols)
        accuracy = fd.evaluate_mapping_accuracy(lines, answers, cols, mapping)
        accuracy_list.append(accuracy)

    print "Experiment K={0} finished: {1}".format(vec_size,
                                                  datetime.datetime.now())
    return accuracy_list
Esempio n. 4
0
def test_comb_improve_cols(lines, answers, working_cols, mapping, accuracy,
                           change_comb, unused_cols):
    unused_len = len(unused_cols)
    change_len = len(change_comb)
    unused_comb = range(change_len)
    while not (unused_comb is None):
        working_cols_copy = working_cols[:]
        for i in range(change_len):
            working_cols_copy[change_comb[i]] = unused_cols[unused_comb[i]]
        new_acc = fd.evaluate_mapping_accuracy(lines, answers,
                                               working_cols_copy, mapping)
        if new_acc > accuracy:
            return new_acc, working_cols_copy
        unused_comb = su.get_next_combination(unused_comb, change_len,
                                              unused_len)
    return None, None
Esempio n. 5
0
def improve_solution(lines, answers, working_cols, mapping, line_len):
    accuracy_list = []
    not_improved_counter = 0
    accuracy = fd.evaluate_mapping_accuracy(lines, answers, working_cols,
                                            mapping)
    accuracy_list.append(accuracy)
    step_counter = 0
    while not_improved_counter < 2 and step_counter < max_improvement_steps:
        improved, new_acc, new_mapping = try_improve_mapping(
            lines, answers, working_cols, mapping, accuracy)
        if improved:
            not_improved_counter = 0
            accuracy = new_acc
            mapping = new_mapping
            accuracy_list.append(accuracy)
            step_counter += 1
            if step_counter % improvement_print_freq == 0:
                print 'Times improved: {0}/{1}, {2}'.format(
                    step_counter, max_improvement_steps,
                    datetime.datetime.now())
        else:
            not_improved_counter += 1
        improved, new_acc, new_cols = try_improve_columns(
            lines, answers, working_cols, mapping, line_len, accuracy)
        if improved:
            not_improved_counter = 0
            accuracy = new_acc
            working_cols = new_cols
            accuracy_list.append(accuracy)
            step_counter += 1
            if step_counter % improvement_print_freq == 0:
                print 'Times improved: {0}/{1}, {2}'.format(
                    step_counter, max_improvement_steps,
                    datetime.datetime.now())
        else:
            not_improved_counter += 1

    return accuracy_list
Esempio n. 6
0
def main():
    lines_rectangle, correct_responces, lengths_list = fd.prepare_rectangle_data(
        sys.argv)
    if not (lines_rectangle is None):
        cur_figure = 1
        line_len = lengths_list[1]
        for vec_size in range(exp_start, exp_end, exp_step):
            accuracy_list = run_experiments(vec_size, experiment_number,
                                            line_len, lines_rectangle,
                                            correct_responces)

            # minimal distance approach
            totat_variation_dist = su.calculate_total_var_dist(
                lines_rectangle, line_len)
            sorted_cols_indices = np.argsort(totat_variation_dist)

            test_cols = sorted_cols_indices[:vec_size]
            test_mapping = fd.generate_mapping(lines_rectangle,
                                               correct_responces, test_cols)
            test_accuracy = fd.evaluate_mapping_accuracy(
                lines_rectangle, correct_responces, test_cols, test_mapping)

            percentile = int(find_percentile(accuracy_list, test_accuracy))

            # plotting the histogram
            fig = plt.figure(cur_figure, figsize=(6, 6))
            cur_figure += 1
            fig.suptitle(
                'C = {3}, K = {0}, Experiments: {4}\nTest accuracy: {1}, Percentile: {2}'
                .format(vec_size, test_accuracy, percentile, line_len,
                        experiment_number))

            plt.hist(accuracy_list, bins=bins_num)
            plt.axvline(test_accuracy, c='r', label='Test accuracy')
            plt.xlabel('Accuracy')
            plt.ylabel('Appearances')
            plt.ioff()
            plt.savefig("Experiments/Experiment1/k{0}.png".format(vec_size))
Esempio n. 7
0
def main():
    lines_rectangle, correct_responces, lengths_list = fd.prepare_rectangle_data(
        sys.argv)
    if not (lines_rectangle is None):
        line_len = lengths_list[1]
        total_variation_dist = su.calculate_total_var_dist(
            lines_rectangle, line_len)
        sorted_cols_indices = np.argsort(total_variation_dist)
        cur_figure = 1
        all_lists = {}
        min_acc = 1.0
        max_acc = 0.0
        max_size = 0
        for vec_size in range(exp_start, exp_end, exp_step):
            accuracy_list = []
            for i in range(vec_size, line_len + 1):
                working_cols = sorted_cols_indices[i - vec_size:i]
                test_mapping = fd.generate_mapping(lines_rectangle,
                                                   correct_responces,
                                                   working_cols)
                test_accuracy = fd.evaluate_mapping_accuracy(
                    lines_rectangle, correct_responces, working_cols,
                    test_mapping)
                accuracy_list.append(test_accuracy)

                if min_acc > test_accuracy:
                    min_acc = test_accuracy
                if max_acc < test_accuracy:
                    max_acc = test_accuracy

            if len(accuracy_list) > max_size:
                max_size = len(accuracy_list)
            all_lists[vec_size] = accuracy_list

        delta = (max_acc - min_acc) * 0.1
        min_acc -= delta
        max_acc += delta
        max_size += 5

        for vec_size in range(exp_start, exp_end, exp_step):
            accuracy_list = all_lists[vec_size]
            # plotting
            fig = plt.figure(cur_figure, figsize=(10, 6))
            cur_figure += 1
            axis = plt.gca()
            axis.set_ylim([min_acc, max_acc])
            axis.set_xlim([-5, max_size])
            fig.suptitle('C = {1}, K = {0}'.format(vec_size, line_len))
            x = np.array(range(1, len(accuracy_list) + 1))
            y = np.array(accuracy_list)
            plt.plot(x, y, 'bo')

            # plotting linear regression
            x_pred = x.reshape(-1, 1)
            model = LinearRegression(n_jobs=8)
            model.fit(x_pred, y)
            y_pred = model.predict(x_pred)
            line = plt.plot(x, y_pred)
            plt.setp(line, 'color', 'r', 'linewidth', 2.0)

            plt.ylabel('Accuracy')
            plt.xlabel('First column rank')
            plt.savefig("Experiments/Experiment2/k{0}.png".format(vec_size))