Example #1
0
def lsi():
    if len(sys.argv) == 1:
        k = 100  # Default dimensions is 100
    elif len(sys.argv) == 2:
        k = int(sys.argv[1])
    else:
        shared.error("11", ["lsi", ""])
        return -1
    try:
        texts, documents = shared.build_texts("lsi")
    except:
        shared.error("0", ["lsi", k])
        return -1
    try:
        ck = get_lsi(texts, k)
    except:
        shared.error("4", ["lsi", k])
        return -1
    try:
        shared.write_to_file(ck, documents, LSIFOLDER, "lsi.csv")
    except:
        shared.error("8", ["lsi", k])
        return -1
    try:
        shared.insert_to_db("lsi", k, "Finished")
    except:
        shared.error("10", ["lsi", k])
        return -1
    return 1
Example #2
0
    distances = []
    min_distance = float('inf')
    for idx, point1 in enumerate(points):
        for point2 in points[idx + 1:]:
            if point1 != point2:
                distance = Point.distance(point1, point2)
                min_distance = distance if distance < min_distance else min_distance
                distance_tuple = (distance, point1, point2)
                distances.append(distance_tuple)
    return format_point_list(distances, min_distance)


def format_point_list(points, min_distance):
    closest_points = [p for p in points if p[0] == min_distance]
    closest_points.sort(key=lambda point: point[1].x)
    closest_points.sort(key=lambda point: point[1].y)

    point_string = str(min_distance)
    for p in closest_points:
        point_string += "\n{0} {1} {2} {3}".format(p[1].x, p[1].y, p[2].x,
                                                   p[2].y)
    return point_string


# Prevent running if imported as a module
if __name__ == "__main__":
    POINT_LIST = point_list_from_file(argv[1])
    result = closest_pair_brute(POINT_LIST)
    print result
    write_to_file('output_bruteforce.txt', result)
Example #3
0
        p for p in y_sorted_points if left_bound <= p.x <= right_bound
    ]

    return closest_cross_pair(middle_set, closest_pair)


def closest_cross_pair(points, closest_pair):
    for idx, point1 in enumerate(points):
        for point2 in points[idx + 1:]:
            if point2.y - point1.y > closest_pair[0]:
                break
            distance = Point.distance(point1, point2)
            if distance < closest_pair[0]:
                closest_pair = (distance, point1, point2)
    return closest_pair


def enhanced_closest_pair_dnc_main(points):
    x_sorted_points = sorted(points, key=lambda point: point.x)
    y_sorted_points = sorted(points, key=lambda point: point.y)

    return enhanced_closest_pair_dnc(x_sorted_points, y_sorted_points)


# Prevent running if imported as a module
if __name__ == "__main__":
    POINT_LIST = point_list_from_file(argv[1])
    result = enhanced_closest_pair_dnc_main(POINT_LIST)
    print format_output(result)
    write_to_file('output_enhanceddnc.txt', format_output(result))
Example #4
0
            elif a_shift == D[i][j]:
                B[i][j] = Shift.a_shift
            elif b_shift == D[i][j]:
                B[i][j] = Shift.b_shift

    trace = backtrace(sequence_a[1:], sequence_b[1:], B)

    final_cost = D[len(sequence_a) - 1][len(sequence_b) - 1]
    aligned_sequence_a = trace[0]
    aligned_sequence_b = trace[1]

    return Solution(aligned_sequence_a, aligned_sequence_b, final_cost)


# Prevent running if imported as a module
if __name__ == "__main__":
    # Load from command line arguments or default files, otherwise exit
    if len(sys.argv) == 3:
        COST_MATRIX = CostMatrix.from_file(sys.argv[1])
        SEQUENCES = load_sequences_from_file(sys.argv[2])
    elif path.isfile("imp2cost.txt") and path.isfile("imp2input.txt"):
        COST_MATRIX = CostMatrix.from_file("imp2cost.txt")
        SEQUENCES = load_sequences_from_file("imp2input.txt")
    else:
        sys.exit("ERROR: No input supplied via files or command line args.\n"
                 "USAGE: $ python sequence_align.py {COST_FILE} {INPUT_FILE}")

    # Map solutions to list, write to file
    SOLUTIONS = [str(align_sequences(*sequence)) for sequence in SEQUENCES]
    write_to_file('imp2output.txt', '\n'.join(SOLUTIONS))
Example #5
0
    # Run recursive algorithm on left and right halves
    closest_pair = min(closest_pair_dnc(left_set), closest_pair_dnc(right_set))

    # Get middle set (technically a list)
    left_bound = median - closest_pair[0]
    right_bound = median + closest_pair[0]
    middle_set = [p for p in points if left_bound <= p.x <= right_bound]

    return closest_cross_pair(middle_set, closest_pair)


def closest_cross_pair(points, closest_pair):
    points.sort(key=lambda point: point.y)
    for idx, point1 in enumerate(points):
        for point2 in points[idx + 1:]:
            if point2.y - point1.y > closest_pair[0]:
                break
            distance = Point.distance(point1, point2)
            if distance < closest_pair[0]:
                closest_pair = (distance, point1, point2)
    return closest_pair


# Prevent running if imported as a module
if __name__ == "__main__":
    POINT_LIST = point_list_from_file(argv[1])
    result = closest_pair_dnc(POINT_LIST)
    print format_output(result)
    write_to_file('output_divideandconquer.txt', format_output(result))
Example #6
0
 def test_write_to_file(self):
     shared.write_to_file([[1,3],[3,2]], ['doc0','doc1'], 'testing/',\
         'tmp.csv')
     self.assertTrue(os.path.getsize('testing/tmp.csv') > 0)
     os.remove('testing/tmp.csv')