Ejemplo n.º 1
0
def test_check_number_less_saved_paths(tmpdir):
    path = tmpdir.join("output.txt")
    gc = GenomCompare(same=5, diff=-5, gp=-2, max_paths=1, max_seq_len=100)
    seq1 = 'MARS'
    seq2 = 'SMART'
    _, saved_paths = gc.run_save(seq1, seq2, path)
    assert saved_paths == 1
Ejemplo n.º 2
0
def test_init_graph_nodes():
    gc = GenomCompare(same=5, diff=-5, gp=-2, max_paths=100, max_seq_len=100)
    seq1 = 'MARS'
    seq2 = 'SMART'
    _, G = gc._initialize_structures(seq1, seq2)
    assert list(G.nodes()) == [(0, 0), (1, 0), (2, 0), (3, 0), (4, 0), (0, 1),
                               (0, 2), (0, 3), (0, 4), (0, 5)]
Ejemplo n.º 3
0
def test_high_penalty_gap(tmpdir):
    path = tmpdir.join("output.txt")
    gc = GenomCompare(same=5, diff=-5, gp=-10, max_paths=100, max_seq_len=100)
    seq1 = 'SAM'
    seq2 = 'SUM'
    _ = gc.run_save(seq1, seq2, path)
    with open(path) as f:
        assert f.read() == 'SCORE = 5\n\nSAM\nSUM\n'
Ejemplo n.º 4
0
def test_check_saving_less_paths(tmpdir):
    path = tmpdir.join("output.txt")
    gc = GenomCompare(same=5, diff=-5, gp=-2, max_paths=1, max_seq_len=100)
    seq1 = 'MARS'
    seq2 = 'SMART'
    _ = gc.run_save(seq1, seq2, path)
    with open(path) as f:
        assert f.read() == 'SCORE = 9\n\n-MAR-S\nSMART-\n'
Ejemplo n.º 5
0
def test_too_long_seq2():
    gc = GenomCompare(same=5, diff=-5, gp=-2, max_paths=100, max_seq_len=4)
    seq1 = 'MARS'
    seq2 = 'SMART'
    with pytest.raises(InvalidSeqLengthError):
        gc.run(seq1, seq2)


########### MORE COMPLICATED EXAMPLES ################
Ejemplo n.º 6
0
def test_simple_value_after_order_switch():
    gc = GenomCompare(same=5, diff=-5, gp=-2, max_paths=100, max_seq_len=100)
    seq1 = 'MARS'
    seq2 = 'SMART'
    m, _ = gc.run(seq1, seq2)
    v1 = m[-1, -1]
    m, _ = gc.run(seq2, seq1)
    v2 = m[-1, -1]
    assert v1 == v2
Ejemplo n.º 7
0
def test_simple_matrix_values():
    gc = GenomCompare(same=5, diff=-5, gp=-2, max_paths=100, max_seq_len=100)
    seq1 = 'MARS'
    seq2 = 'SMART'
    m, _ = gc.run(seq1, seq2)
    assert np.array_equal(
        m,
        np.array([[0, -2, -4, -6, -8, -10], [-2, -4, 3, 1, -1, -3],
                  [-4, -6, 1, 8, 6, 4], [-6, -8, -1, 6, 13, 11],
                  [-8, -1, -3, 4, 11, 9]]))
Ejemplo n.º 8
0
def test_init_matrix_values():
    gc = GenomCompare(same=5, diff=-5, gp=-2, max_paths=100, max_seq_len=100)
    seq1 = 'MARS'
    seq2 = 'SMART'
    matrix, _ = gc._initialize_structures(seq1, seq2)
    assert np.array_equal(
        matrix,
        np.array([[0, -2, -4, -6, -8, -10], [-2, 0, 0, 0, 0, 0],
                  [-4, 0, 0, 0, 0, 0], [-6, 0, 0, 0, 0, 0],
                  [-8, 0, 0, 0, 0, 0]]))
Ejemplo n.º 9
0
def test_simple_graph_edges():
    gc = GenomCompare(same=5, diff=-5, gp=-2, max_paths=100, max_seq_len=100)
    seq1 = 'MARS'
    seq2 = 'SMART'
    _, G = gc.run(seq1, seq2)
    assert list(G.edges()) == [((1, 0), (0, 0)), ((2, 0), (1, 0)),
                               ((3, 0), (2, 0)), ((4, 0), (3, 0)),
                               ((0, 1), (0, 0)), ((0, 2), (0, 1)),
                               ((0, 3), (0, 2)), ((0, 4), (0, 3)),
                               ((0, 5), (0, 4)), ((1, 1), (0, 1)),
                               ((1, 1), (1, 0)), ((1, 2), (0, 1)),
                               ((1, 3), (1, 2)), ((1, 4), (1, 3)),
                               ((1, 5), (1, 4)), ((2, 1), (1, 1)),
                               ((2, 1), (2, 0)), ((2, 2), (1, 2)),
                               ((2, 3), (1, 2)), ((2, 4), (2, 3)),
                               ((2, 5), (2, 4)), ((3, 1), (2, 1)),
                               ((3, 1), (3, 0)), ((3, 2), (2, 2)),
                               ((3, 3), (2, 3)), ((3, 4), (2, 3)),
                               ((3, 5), (3, 4)), ((4, 1), (3, 0)),
                               ((4, 2), (3, 2)), ((4, 2), (4, 1)),
                               ((4, 3), (3, 3)), ((4, 4), (3, 4)),
                               ((4, 5), (3, 5)), ((4, 5), (4, 4))]
Ejemplo n.º 10
0
def test_simple_graph_nodes():
    gc = GenomCompare(same=5, diff=-5, gp=-2, max_paths=100, max_seq_len=100)
    seq1 = 'MARS'
    seq2 = 'SMART'
    _, G = gc.run(seq1, seq2)
    assert set(G.nodes()) == set(itertools.product(range(5), range(6)))
Ejemplo n.º 11
0
def test_simple_final_score():
    gc = GenomCompare(same=5, diff=-5, gp=-2, max_paths=100, max_seq_len=100)
    seq1 = 'MARS'
    seq2 = 'SMART'
    m, _ = gc.run(seq1, seq2)
    assert m[-1, -1] == 9
Ejemplo n.º 12
0
def test_init_matrix_size():
    gc = GenomCompare(same=5, diff=-5, gp=-2, max_paths=100, max_seq_len=100)
    seq1 = 'MARS'
    seq2 = 'SMART'
    matrix, _ = gc._initialize_structures(seq1, seq2)
    assert matrix.shape == (5, 6)
Ejemplo n.º 13
0
    parser.add_argument('-c', dest='config_path', required=True, help='path to config file')
    parser.add_argument('-o', dest='output_path', required=True, help='path to output file')

    args = parser.parse_args()
    return args


if __name__ == '__main__':
    args = parse_args()
    parser = ConfigParser()

    try:
        config = parser.load_config(args.config_path)
    except (json.JSONDecodeError, ConfigError) as e:
        logging.error(e)
        sys.exit(1)

    comparer = GenomCompare(same=config['SAME'], diff=config['DIFF'], gp=config['GP'], \
                            max_paths=config['MAX_NUMBER_PATHS'], max_seq_len=config['MAX_SEQ_LENGTH'])

    seq1 = load_fasta(args.seq1_path)
    seq2 = load_fasta(args.seq2_path)
    
    try:
        score, saved_paths = comparer.run_save(seq1, seq2, 'tmp.txt')
    except InvalidSeqLengthError as e:
        logging.error(e)
        sys.exit(1)

    print(f'Score: {score}\nPaths written: {saved_paths}')