def apply_moves(ranks, foundations, data_dict, eligible_moves): number_of_categories = ranks.__len__() sorted_eligible_moves = [i for i in {k: v for k, v in sorted(eligible_moves.items(), key=lambda item: item[1])}.keys()] swaps = 0 protected_is = [] for move in sorted_eligible_moves: if move[0] < move[1]: if swaps: continue popped_category = ranks.pop(move[0]) ranks.insert((move[0] - move[1]) % number_of_categories, popped_category) for i in range(move[0] + 1, (move[0] - move[1]) % number_of_categories + 1): foundations[i] = add_vectors(foundations[i - 1], data_dict[ranks[i - 1]]) swaps += 1 break if not set(protected_is) & set(range(move[0] - move[1], move[0] + 1)): popped_category = ranks.pop(move[0]) ranks.insert(move[0] - move[1], popped_category) protected_is.extend(range(max(0, move[0] - move[1]), min(number_of_categories - 1, move[0] + 1))) for i in range(move[0] - move[1] + 1, move[0] + 1): foundations[i] = add_vectors(foundations[i - 1], data_dict[ranks[i - 1]]) swaps += 1 return swaps
def find_best_position(i, data_dict, ranks, weights, foundation): g_below = foundation g_above = add_vectors(foundation, data_dict[ranks[i]]) cost_below = [] cost_above = [] cost_layer = [] for j in range(len(ranks)): if j != i: cost_below.append( layer_score(data_dict[ranks[j]], g_below, weights)) cost_above.append( layer_score(data_dict[ranks[j]], g_above, weights)) cost_layer.append( layer_score(data_dict[ranks[i]], g_below, weights)) g_below = add_vectors(g_below, data_dict[ranks[j]]) g_above = add_vectors(g_above, data_dict[ranks[j]]) cost_layer.append(layer_score(data_dict[ranks[i]], g_below, weights)) current_cost = sum(cost_above) + cost_layer[0] best_index = 0 best_cost = current_cost for j in range(1, len(ranks)): current_cost += cost_below[j - 1] - cost_above[j - 1] current_cost += cost_layer[j] - cost_layer[j - 1] if current_cost < best_cost: best_index = j best_cost = current_cost return best_index
def layer_score(layer, foundation, weights): bottom_line = foundation middle_line = add_vectors(foundation, [x / 2 for x in layer]) top_line = add_vectors(foundation, layer) layer_score_value = 1 wiggle_value = 0 if weights['fda']: if weights['bottom_line']: wiggle_value += wiggle_line(layer, bottom_line, weights) * weights['bottom_line'] if weights['middle_line']: wiggle_value += wiggle_line(layer, middle_line, weights) * weights['middle_line'] if weights['top_line']: wiggle_value += wiggle_line(layer, top_line, weights) * weights['top_line'] layer_score_value *= add_weight(wiggle_value, weights['fda']) bump_value = 0 if weights['sda']: if weights['bottom_line']: bump_value += bump_line(layer, bottom_line, weights) * weights['bottom_line'] if weights['middle_line']: bump_value += bump_line(layer, middle_line, weights) * weights['middle_line'] if weights['top_line']: bump_value += bump_line(layer, top_line, weights) * weights['top_line'] layer_score_value *= add_weight(bump_value, weights['sda']) break_value = 0 if weights['fdr']: if weights['bottom_line']: break_value += break_line(layer, bottom_line, weights) * weights['bottom_line'] if weights['middle_line']: break_value += break_line(layer, middle_line, weights) * weights['middle_line'] if weights['top_line']: break_value += break_line(layer, top_line, weights) * weights['top_line'] layer_score_value *= add_weight(break_value, weights['fdr']) return layer_score_value
def score(data_dict, ranks, weights, foundation): if not foundation: foundation = [0] * len(data_dict[ranks[0]]) chart_score = 0 for rank in ranks: chart_score += layer_score(data_dict[rank], foundation, weights) foundation = add_vectors(foundation, data_dict[rank]) return chart_score
def run(): data_dict = {} ignore_cols = int(sys.argv[2]) with open(sys.argv[1]) as csv_file: csv_reader = csv.reader(csv_file, delimiter=',') line_count = 0 categories = [] for row in csv_reader: row_data = row[ignore_cols:] if line_count == 0: categories = row_data line_count += 1 continue if line_count == 1: for i in range(row_data.__len__()): data_dict[categories[i]] = [float(row_data[i])] line_count += 1 continue for i in range(row_data.__len__()): data_dict[categories[i]].append(float(row_data[i])) line_count += 1 silhouette = None for value in data_dict.values(): if not silhouette: silhouette = value else: silhouette = add_vectors(silhouette, value) weight_exponent = settings.significance total_sum = sum([sum([x ** weight_exponent for x in layer]) for layer in data_dict.values()]) weights = { 'min_improvement': settings.min_improvement, 'fda': settings.flatness, 'sda': settings.straightness, 'fdr': settings.continuity, 'bottom_line': settings.bottom_line, 'middle_line': settings.middle_line, 'top_line': settings.top_line, 'weight_exponent': weight_exponent, 'total_sum': total_sum, 'silhouette': silhouette, } line_weight_sum = weights['bottom_line'] + weights['middle_line'] + weights['top_line'] weights['bottom_line'] /= line_weight_sum weights['middle_line'] /= line_weight_sum weights['top_line'] /= line_weight_sum ranks = calculate_ranks(data_dict, weights) foundation = [0] * data_dict[ranks[0]].__len__() destination_file = open(sys.argv[3], 'w') write_ranks(ranks, destination_file) print('Score: %f' % score(data_dict, ranks, weights, foundation))
def two_opt(data_dict, weights, ranks, foundation): swaps = 0 foundations = [] for rank in ranks: foundations.append(foundation) foundation = add_vectors(foundation, data_dict[rank]) best_score = score(data_dict, ranks, weights, foundation) * 2 while score(data_dict, ranks, weights, foundation) < best_score: best_score = score(data_dict, ranks, weights, foundation) for i in range(1, len(ranks)): current_score = (layer_score(data_dict[ranks[i]], foundations[i], weights) + layer_score(data_dict[ranks[i - 1]], foundations[i - 1], weights)) swap_foundation = add_vectors(foundations[i - 1], data_dict[ranks[i]]) swap_score = (layer_score(data_dict[ranks[i - 1]], swap_foundation, weights) + layer_score(data_dict[ranks[i]], foundations[i - 1], weights)) if swap_score < current_score: apply_moves(ranks, foundations, data_dict, {(i, 1): current_score - swap_score}) swaps += 1 print("Swappity swap # %d" % swaps) return ranks
def best_first(data_dict, weights, foundation): remaining_categories = [x for x in data_dict.keys()] ranks = [] num_layers = 0 while remaining_categories: min_score = None min_category = None for category in remaining_categories: category_layer_score = layer_score(data_dict[category], foundation, weights) if not min_category or category_layer_score < min_score: min_score = category_layer_score min_category = category ranks.append(min_category) remaining_categories.remove(min_category) foundation = add_vectors(foundation, data_dict[min_category]) print('Added %d layers' % num_layers) num_layers += 1 return ranks
def run(): num_start_cols = int(sys.argv[2]) start_cols = [] with open(sys.argv[1]) as csv_file: # csv_reader = csv.reader(csv_file, delimiter=',') row = list(csv.reader([csv_file.readline()]))[0] start_cols.append(row[:num_start_cols]) line_count = 0 categories = row[num_start_cols:] data_dict = {categories[i]: [] for i in range(len(categories))} while row: row = list(csv.reader([csv_file.readline()]))[0] start_cols.append(row[:num_start_cols]) row_data = row[num_start_cols:] for i in range(len(row_data)): data_dict[categories[i]].append(float(row_data[i])) line_count += 1 silhouette = None for value in data_dict.values(): if not silhouette: silhouette = value else: silhouette = add_vectors(silhouette, value) weight_exponent = settings.significance total_sum = sum([ sum([x**weight_exponent for x in layer]) for layer in data_dict.values() ]) weights = { 'min_improvement': settings.min_improvement, 'fda': settings.flatness, 'sda': settings.straightness, 'fdr': settings.continuity, 'bottom_line': settings.bottom_line, 'middle_line': settings.middle_line, 'top_line': settings.top_line, 'weight_exponent': weight_exponent, 'total_sum': total_sum, 'silhouette': silhouette, } line_weight_sum = weights['bottom_line'] + weights[ 'middle_line'] + weights['top_line'] weights['bottom_line'] /= line_weight_sum weights['middle_line'] /= line_weight_sum weights['top_line'] /= line_weight_sum silhouette_max = max(silhouette) ranks, foundation = calculate_ranks(data_dict, weights) foundation_min = min(foundation) foundation = [ x - foundation_min + silhouette_max * 0.05 for x in foundation ] destination_file = open(sys.argv[3], 'w') write_ranks(ranks, destination_file) print('Score: %f' % score(data_dict, ranks, weights, foundation))