Ejemplo n.º 1
0
def test_score():
    try:
        if not os.path.exists('test_items'):
            os.makedirs('test_items')
        item_file = 'test_items/data.item'
        feature_file = 'test_items/data.features'
        distance_file = 'test_items/data.distance'
        scorefilename = 'test_items/data.score'
        taskfilename = 'test_items/data.abx'
        items.generate_db_and_feat(3, 3, 1, item_file, 2, 3, feature_file)
        task = ABXpy.task.Task(item_file, 'c0', 'c1', 'c2')
        task.generate_triplets()
        distances.compute_distances(feature_file,
                                    '/features/',
                                    taskfilename,
                                    distance_file,
                                    dtw_cosine_distance,
                                    normalized=True,
                                    n_cpu=3)
        score.score(taskfilename, distance_file, scorefilename)
    finally:
        try:
            shutil.rmtree('test_items')
            # os.remove(item_file)
            # os.remove(feature_file)
            # os.remove(taskfilename)
            # os.remove(distance_file)
            # os.remove(scorefilename)
        except:
            pass
Ejemplo n.º 2
0
def test_frozen_analyze():
    """Frozen analyze compare the results of a previously "frozen" run with
    a new one, asserting that the code did not change in behaviour.
    """
    try:
        if not os.path.exists('test_items'):
            os.makedirs('test_items')
        item_file = frozen_file('item')
        feature_file = frozen_file('features')
        distance_file = 'test_items/data.distance'
        scorefilename = 'test_items/data.score'
        taskfilename = 'test_items/data.abx'
        analyzefilename = 'test_items/data.csv'

        task = ABXpy.task.Task(item_file, 'c0', 'c1', 'c2')
        task.generate_triplets(taskfilename)
        distances.compute_distances(feature_file, '/features/', taskfilename,
                                    distance_file, dtw_cosine_distance)
        score.score(taskfilename, distance_file, scorefilename)
        analyze.analyze(taskfilename, scorefilename, analyzefilename)

        # assert items.h5cmp(taskfilename, frozen_file('abx'))
        # assert items.h5cmp(distance_file, frozen_file('distance'))
        assert items.h5cmp(scorefilename, frozen_file('score'))
        assert items.cmp(analyzefilename, frozen_file('csv'))

    finally:
        try:
            os.remove(taskfilename)
            os.remove(distance_file)
            os.remove(scorefilename)
            os.remove(analyzefilename)
        except:
            pass
Ejemplo n.º 3
0
def test_multiple_across():
    items.generate_testitems(2, 3, name='data.item')
    try:
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            task = ABXpy.task.Task('data.item', 'c0', ['c1', 'c2'])

        stats = task.stats
        assert stats['nb_blocks'] == 8
        assert stats['nb_triplets'] == 8
        assert stats['nb_by_levels'] == 1

        task.generate_triplets()

        f = h5py.File('data.abx', 'r')
        triplets_block = get_triplets(f, '0')
        triplets = np.array([[0, 1, 6], [1, 0, 7], [2, 3, 4], [3, 2, 5],
                             [4, 5, 2], [5, 4, 3], [6, 7, 0], [7, 6, 1]])
        assert tables_equivalent(triplets, triplets_block)
    finally:
        try:
            os.remove('data.abx')
            os.remove('data.item')
        except:
            pass
Ejemplo n.º 4
0
def test_multiple_across():
    items.generate_testitems(2, 3, name='data.item')
    try:
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            task = ABXpy.task.Task('data.item', 'c0', ['c1', 'c2'])

        stats = task.stats
        assert stats['nb_blocks'] == 8
        assert stats['nb_triplets'] == 8
        assert stats['nb_by_levels'] == 1

        task.generate_triplets()

        f = h5py.File('data.abx', 'r')
        triplets_block = get_triplets(f, '0')
        triplets = np.array([[0, 1, 6], [1, 0, 7], [2, 3, 4], [3, 2, 5],
                             [4, 5, 2], [5, 4, 3], [6, 7, 0], [7, 6, 1]])
        assert tables_equivalent(triplets, triplets_block)
    finally:
        try:
            os.remove('data.abx')
            os.remove('data.item')
        except:
            pass
Ejemplo n.º 5
0
def test_threshold_analyze():
    try:
        if not os.path.exists('test_items'):
            os.makedirs('test_items')
        item_file = 'test_items/data.item'
        feature_file = 'test_items/data.features'
        distance_file = 'test_items/data.distance'
        scorefilename = 'test_items/data.score'
        taskfilename = 'test_items/data.abx'
        analyzefilename = 'test_items/data.csv'
        threshold = 2

        items.generate_db_and_feat(3, 3, 1, item_file, 2, 3, feature_file)
        task = ABXpy.task.Task(item_file, 'c0', 'c1', 'c2')
        task.generate_triplets(taskfilename, threshold=threshold)
        distances.compute_distances(
            feature_file, '/features/', taskfilename,
            distance_file, dtw_cosine_distance,
            normalized = True, n_cpu=1)
        score.score(taskfilename, distance_file, scorefilename)
        analyze.analyze(taskfilename, scorefilename, analyzefilename)
        number_triplets = np.loadtxt(analyzefilename, dtype=int,
                                     delimiter='\t', skiprows=1, usecols=[-1])
        assert np.all(number_triplets == threshold)
    finally:
        try:
            shutil.rmtree('test_items')
            # os.remove(item_file)
            # os.remove(feature_file)
            # os.remove(taskfilename)
            # os.remove(distance_file)
            # os.remove(scorefilename)
            # os.remove(analyzefilename)
        except:
            pass
Ejemplo n.º 6
0
def test_basic():
    items.generate_testitems(2, 3, name='data.item')
    try:
        task = ABXpy.task.Task('data.item', 'c0', 'c1', 'c2')
        stats = task.stats
        assert stats['nb_blocks'] == 8, "incorrect stats: number of blocks"
        assert stats['nb_triplets'] == 8
        assert stats['nb_by_levels'] == 2
        task.generate_triplets()
        f = h5py.File('data.abx', 'r')
        triplets = f['triplets']['data'][...]
        by_indexes = f['triplets']['by_index'][...]
        triplets_block0 = triplets[slice(*by_indexes[0])]
        triplets_block1 = triplets[slice(*by_indexes[1])]
        triplets_block0 = get_triplets(f, '0')
        triplets_block1 = get_triplets(f, '1')
        triplets = np.array([[0, 1, 2], [1, 0, 3], [2, 3, 0], [3, 2, 1]])
        assert tables_equivalent(triplets, triplets_block0), error_triplets
        assert tables_equivalent(triplets, triplets_block1), error_triplets
        pairs = [2, 6, 7, 3, 8, 12, 13, 9]
        pairs_block0 = get_pairs(f, '0')
        pairs_block1 = get_pairs(f, '1')
        assert (set(pairs) == set(pairs_block0[:, 0])), error_pairs
        assert (set(pairs) == set(pairs_block1[:, 0])), error_pairs
    finally:
        try:
            os.remove('data.abx')
            os.remove('data.item')
        except:
            pass
Ejemplo n.º 7
0
def fullrun():
    if not os.path.exists('example_items'):
        os.makedirs('example_items')
    item_file = 'example_items/data.item'
    feature_file = 'example_items/data.features'
    distance_file = 'example_items/data.distance'
    scorefilename = 'example_items/data.score'
    taskfilename = 'example_items/data.abx'
    analyzefilename = 'example_items/data.csv'

    # deleting pre-existing files
    for f in [item_file, feature_file, distance_file,
              scorefilename, taskfilename, analyzefilename]:
        try:
            os.remove(f)
        except OSError:
            pass

    # running the evaluation
    items.generate_db_and_feat(3, 3, 5, item_file, 2, 2, feature_file)

    task = ABXpy.task.Task(item_file, 'c0', across='c1', by='c2')
    task.generate_triplets(taskfilename)

    distances.compute_distances(
        feature_file, '/features/', taskfilename,
        distance_file, dtw_cosine_distance,
        normalized=True, n_cpu=1)

    score.score(taskfilename, distance_file, scorefilename)

    analyze.analyze(taskfilename, scorefilename, analyzefilename)
Ejemplo n.º 8
0
def test_filter():
    items.generate_testitems(2, 4, name='data.item')
    try:
        task = ABXpy.task.Task('data.item', 'c0', 'c1', 'c2',
                               filters=["[attr == 0 for attr in c3]"])
        stats = task.stats
        assert stats['nb_blocks'] == 8, "incorrect stats: number of blocks"
        assert stats['nb_triplets'] == 8
        assert stats['nb_by_levels'] == 2
        task.generate_triplets(output='data.abx')
        f = h5py.File('data.abx', 'r')
        triplets_block0 = get_triplets(f, '0')
        triplets_block1 = get_triplets(f, '1')
        triplets = np.array([[0, 1, 2], [1, 0, 3], [2, 3, 0], [3, 2, 1]])
        assert tables_equivalent(triplets, triplets_block0), error_triplets
        assert tables_equivalent(triplets, triplets_block1), error_triplets
        pairs = [2, 6, 7, 3, 8, 12, 13, 9]
        pairs_block0 = get_pairs(f, '0')
        pairs_block1 = get_pairs(f, '1')
        assert (set(pairs) == set(pairs_block0[:, 0])), error_pairs
        assert (set(pairs) == set(pairs_block1[:, 0])), error_pairs
    finally:
        try:
            os.remove('data.abx')
            os.remove('data.item')
        except:
            pass
Ejemplo n.º 9
0
def test_analyze():
    try:
        if not os.path.exists('test_items'):
            os.makedirs('test_items')
        item_file = 'test_items/data.item'
        feature_file = 'test_items/data.features'
        distance_file = 'test_items/data.distance'
        scorefilename = 'test_items/data.score'
        taskfilename = 'test_items/data.abx'
        analyzefilename = 'test_items/data.csv'

        items.generate_db_and_feat(3, 3, 1, item_file, 2, 3, feature_file)
        task = ABXpy.task.Task(item_file, 'c0', 'c1', 'c2')
        task.generate_triplets(taskfilename)
        distances.compute_distances(feature_file,
                                    '/features/',
                                    taskfilename,
                                    distance_file,
                                    dtw_cosine_distance,
                                    normalized=True,
                                    n_cpu=1)
        score.score(taskfilename, distance_file, scorefilename)
        analyze.analyze(taskfilename, scorefilename, analyzefilename)
    finally:
        shutil.rmtree('test_items', ignore_errors=True)
Ejemplo n.º 10
0
def test_score():
    try:
        if not os.path.exists('test_items'):
            os.makedirs('test_items')
        item_file = 'test_items/data.item'
        feature_file = 'test_items/data.features'
        distance_file = 'test_items/data.distance'
        scorefilename = 'test_items/data.score'
        taskfilename = 'test_items/data.abx'
        items.generate_db_and_feat(3, 3, 1, item_file, 2, 3, feature_file)
        task = ABXpy.task.Task(item_file, 'c0', 'c1', 'c2')
        task.generate_triplets()
        distances.compute_distances(
            feature_file, '/features/', taskfilename,
            distance_file, dtw_cosine_distance,
            normalized = True, n_cpu=3)
        score.score(taskfilename, distance_file, scorefilename)
    finally:
        try:
            shutil.rmtree('test_items')
            # os.remove(item_file)
            # os.remove(feature_file)
            # os.remove(taskfilename)
            # os.remove(distance_file)
            # os.remove(scorefilename)
        except:
            pass
Ejemplo n.º 11
0
def test_basic():
    items.generate_testitems(2, 3, name='data.item')
    try:
        task = ABXpy.task.Task('data.item', 'c0', 'c1', 'c2')
        stats = task.stats
        assert stats['nb_blocks'] == 8, "incorrect stats: number of blocks"
        assert stats['nb_triplets'] == 8
        assert stats['nb_by_levels'] == 2
        task.generate_triplets()
        f = h5py.File('data.abx', 'r')
        triplets = f['triplets']['data'][...]
        by_indexes = f['triplets']['by_index'][...]
        triplets_block0 = triplets[slice(*by_indexes[0])]
        triplets_block1 = triplets[slice(*by_indexes[1])]
        triplets_block0 = get_triplets(f, '0')
        triplets_block1 = get_triplets(f, '1')
        triplets = np.array([[0, 1, 2], [1, 0, 3], [2, 3, 0], [3, 2, 1]])
        assert tables_equivalent(triplets, triplets_block0), error_triplets
        assert tables_equivalent(triplets, triplets_block1), error_triplets
        pairs = [2, 6, 7, 3, 8, 12, 13, 9]
        pairs_block0 = get_pairs(f, '0')
        pairs_block1 = get_pairs(f, '1')
        assert (set(pairs) == set(pairs_block0[:, 0])), error_pairs
        assert (set(pairs) == set(pairs_block1[:, 0])), error_pairs
    finally:
        try:
            os.remove('data.abx')
            os.remove('data.item')
        except:
            pass
Ejemplo n.º 12
0
def test_threshold_analyze():
    try:
        if not os.path.exists('test_items'):
            os.makedirs('test_items')
        item_file = 'test_items/data.item'
        feature_file = 'test_items/data.features'
        distance_file = 'test_items/data.distance'
        scorefilename = 'test_items/data.score'
        taskfilename = 'test_items/data.abx'
        analyzefilename = 'test_items/data.csv'
        threshold = 2

        items.generate_db_and_feat(3, 3, 1, item_file, 2, 3, feature_file)
        task = ABXpy.task.Task(item_file, 'c0', 'c1', 'c2')
        task.generate_triplets(taskfilename, threshold=threshold)
        distances.compute_distances(feature_file,
                                    '/features/',
                                    taskfilename,
                                    distance_file,
                                    dtw_cosine_distance,
                                    normalized=True,
                                    n_cpu=1)
        score.score(taskfilename, distance_file, scorefilename)
        analyze.analyze(taskfilename, scorefilename, analyzefilename)
        number_triplets = np.loadtxt(analyzefilename,
                                     dtype=int,
                                     delimiter='\t',
                                     skiprows=1,
                                     usecols=[-1])
        assert np.all(number_triplets == threshold)
    finally:
        shutil.rmtree('test_items', ignore_errors=True)
Ejemplo n.º 13
0
def test_frozen_analyze():
    """Frozen analyze compare the results of a previously "frozen" run with
    a new one, asserting that the code did not change in behaviour.
    """
    try:
        if not os.path.exists('test_items'):
            os.makedirs('test_items')
        item_file = frozen_file('item')
        feature_file = frozen_file('features')
        distance_file = 'test_items/data.distance'
        scorefilename = 'test_items/data.score'
        taskfilename = 'test_items/data.abx'
        analyzefilename = 'test_items/data.csv'

        task = ABXpy.task.Task(item_file, 'c0', 'c1', 'c2')
        task.generate_triplets(taskfilename)
        distances.compute_distances(feature_file,
                                    '/features/',
                                    taskfilename,
                                    distance_file,
                                    dtw_cosine_distance,
                                    normalized=True,
                                    n_cpu=1)
        score.score(taskfilename, distance_file, scorefilename)
        analyze.analyze(taskfilename, scorefilename, analyzefilename)

        # assert items.h5cmp(taskfilename, frozen_file('abx'))
        # assert items.h5cmp(distance_file, frozen_file('distance'))
        # assert items.h5cmp(scorefilename, frozen_file('score'))
        assert items.csv_cmp(analyzefilename, frozen_file('csv'))

    finally:
        shutil.rmtree('test_items', ignore_errors=True)
Ejemplo n.º 14
0
def test_filter():
    items.generate_testitems(2, 4, name='data.item')
    try:
        task = ABXpy.task.Task('data.item',
                               'c0',
                               'c1',
                               'c2',
                               filters=["[attr == 0 for attr in c3]"])
        stats = task.stats
        assert stats['nb_blocks'] == 8, "incorrect stats: number of blocks"
        assert stats['nb_triplets'] == 8
        assert stats['nb_by_levels'] == 2
        task.generate_triplets(output='data.abx')
        f = h5py.File('data.abx', 'r')
        triplets_block0 = get_triplets(f, '0')
        triplets_block1 = get_triplets(f, '1')
        triplets = np.array([[0, 1, 2], [1, 0, 3], [2, 3, 0], [3, 2, 1]])
        assert tables_equivalent(triplets, triplets_block0), error_triplets
        assert tables_equivalent(triplets, triplets_block1), error_triplets
        pairs = [2, 6, 7, 3, 8, 12, 13, 9]
        pairs_block0 = get_pairs(f, '0')
        pairs_block1 = get_pairs(f, '1')
        assert (set(pairs) == set(pairs_block0[:, 0])), error_pairs
        assert (set(pairs) == set(pairs_block1[:, 0])), error_pairs
    finally:
        try:
            os.remove('data.abx')
            os.remove('data.item')
        except:
            pass
Ejemplo n.º 15
0
def test_analyze():
    try:
        if not os.path.exists('test_items'):
            os.makedirs('test_items')
        item_file = 'test_items/data.item'
        feature_file = 'test_items/data.features'
        distance_file = 'test_items/data.distance'
        scorefilename = 'test_items/data.score'
        taskfilename = 'test_items/data.abx'
        analyzefilename = 'test_items/data.csv'

        items.generate_db_and_feat(3, 3, 1, item_file, 2, 3, feature_file)
        task = ABXpy.task.Task(item_file, 'c0', 'c1', 'c2')
        task.generate_triplets(taskfilename)
        distances.compute_distances(feature_file, '/features/', taskfilename,
                                    distance_file, dtw_cosine_distance)
        score.score(taskfilename, distance_file, scorefilename)
        analyze.analyze(taskfilename, scorefilename, analyzefilename)
    finally:
        try:
            os.remove(item_file)
            os.remove(feature_file)
            os.remove(taskfilename)
            os.remove(distance_file)
            os.remove(scorefilename)
            os.remove(analyzefilename)
            # pass
        except:
            pass
Ejemplo n.º 16
0
def test_analyze(itemfile,
                 featurefile,
                 args,
                 taskfile=None,
                 distance=None,
                 distancefile=None,
                 scorefile=None,
                 analyzefile=None,
                 filename=None):

    on = get_arg('on', args)
    assert on, ("The 'on' argument was not found, this argument is mandatory"
                "for the task")

    across = get_arg('across', args)
    by = get_arg('by', args)
    filters = get_arg('filters', args)
    reg = get_arg('reg', args)

    if not filename:
        filename = '_'.join(
            filter(None, [
                get_name(itemfile),
                get_name(featurefile),
                str(on),
                str(across),
                str(by)
            ]))

    if not distancefile:
        distancefile = filename + '.distance'

    if not scorefile:
        scorefile = filename + '.score'

    if not analyzefile:
        analyzefile = filename + '.csv'

    task = ABXpy.task.Task(itemfile,
                           on,
                           across,
                           by,
                           filters,
                           reg,
                           features=featurefile)
    task.generate_triplets()

    if not distance:
        distance = dtw_cosine_distance
    distances.compute_distances(featurefile, '/features/', taskfile,
                                distancefile, distance)

    score.score(taskfile, distancefile, scorefile)

    analyze.analyze(scorefile, taskfile, analyzefile)
Ejemplo n.º 17
0
def test_no_across():
    items.generate_testitems(2, 3, name='data.item')
    try:
        task = ABXpy.task.Task('data.item', 'c0', None, 'c2')
        stats = task.stats
        assert stats['nb_blocks'] == 8
        assert stats['nb_triplets'] == 16
        assert stats['nb_by_levels'] == 2
        task.generate_triplets()
    finally:
        try:
            os.remove('data.abx')
            os.remove('data.item')
        except:
            pass
Ejemplo n.º 18
0
def test_no_across():
    items.generate_testitems(2, 3, name='data.item')
    try:
        task = ABXpy.task.Task('data.item', 'c0', None, 'c2')
        stats = task.stats
        assert stats['nb_blocks'] == 8
        assert stats['nb_triplets'] == 16
        assert stats['nb_by_levels'] == 2
        task.generate_triplets()
    finally:
        try:
            os.remove('data.abx')
            os.remove('data.item')
        except:
            pass
Ejemplo n.º 19
0
def test_multiple_bys():
    items.generate_testitems(3, 4, name='data.item')
    try:
        task = ABXpy.task.Task('data.item', 'c0', None, ['c1', 'c2', 'c3'])
        stats = task.stats
        assert stats['nb_blocks'] == 81
        assert stats['nb_triplets'] == 0
        assert stats['nb_by_levels'] == 27
        task.generate_triplets()
    finally:
        try:
            os.remove('data.abx')
            os.remove('data.item')
        except:
            pass
Ejemplo n.º 20
0
def test_multiple_bys():
    items.generate_testitems(3, 4, name='data.item')
    try:
        task = ABXpy.task.Task('data.item', 'c0', None, ['c1', 'c2', 'c3'])
        stats = task.stats
        assert stats['nb_blocks'] == 81
        assert stats['nb_triplets'] == 0
        assert stats['nb_by_levels'] == 27
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            task.generate_triplets()
    finally:
        try:
            os.remove('data.abx')
            os.remove('data.item')
        except:
            pass
Ejemplo n.º 21
0
def test_sampling_task():
    items.generate_testitems(4, 6, name='data.item')
    try:
        task = ABXpy.task.Task('data.item', 'c0', 'c1', ['c2', 'c3'])
        print "stats computed"
        # stats = task.stats
        task.generate_triplets(sample=0.2)
        print "first sample"
        os.remove('data.abx')
        task.generate_triplets(sample=200)
        print "second sample"
    finally:
        try:
            os.remove('data.abx')
            os.remove('data.item')
        except:
            pass
Ejemplo n.º 22
0
def fullrun():
    if not os.path.exists('example_items'):
        os.makedirs('example_items')
    item_file = 'example_items/data.item'
    feature_file = 'example_items/data.features'
    distance_file = 'example_items/data.distance'
    scorefilename = 'example_items/data.score'
    taskfilename = 'example_items/data.abx'
    analyzefilename = 'example_items/data.csv'

    items.generate_db_and_feat(3, 3, 1, item_file, 2, 2, feature_file)
    task = ABXpy.task.Task(item_file, 'c0', 'c1', 'c2',
                           features=feature_file)
    task.generate_triplets()
    distances.compute_distances(feature_file, '/features/', taskfilename,
                                distance_file, dtw_cosine_distance)
    score.score(taskfilename, distance_file, scorefilename)
    analyze.analyze(scorefilename, taskfilename, analyzefilename)
Ejemplo n.º 23
0
def test_filter_on_B():
    items.generate_testitems(2, 2, name='data.item')
    try:
        task = ABXpy.task.Task('data.item', 'c0',
                               filters=["[attr == 0 for attr in c1_B]"])
        stats = task.stats
        assert stats['nb_blocks'] == 4, "incorrect stats: number of blocks"
        assert stats['nb_triplets'] == 4
        assert stats['nb_by_levels'] == 1
        task.generate_triplets()
        f = h5py.File('data.abx', 'r')
        triplets_block0 = get_triplets(f, '0')
        triplets = np.array([[0, 1, 2], [1, 0, 3], [2, 1, 0], [3, 0, 1]])
        assert tables_equivalent(triplets, triplets_block0), error_triplets
    finally:
        try:
            os.remove('data.abx')
            os.remove('data.item')
        except:
            pass
Ejemplo n.º 24
0
def test_filter_on_C():
    items.generate_testitems(2, 2, name='data.item')
    try:
        task = ABXpy.task.Task('data.item',
                               'c0',
                               filters=["[attr == 0 for attr in c1_X]"])
        stats = task.stats
        assert stats['nb_blocks'] == 4, "incorrect stats: number of blocks"
        assert stats['nb_triplets'] == 4
        assert stats['nb_by_levels'] == 1
        task.generate_triplets()
        f = h5py.File('data.abx', 'r')
        triplets_block0 = get_triplets(f, '0')
        triplets = np.array([[2, 1, 0], [2, 3, 0], [3, 0, 1], [3, 2, 1]])
        assert tables_equivalent(triplets, triplets_block0), error_triplets
    finally:
        try:
            os.remove('data.abx')
            os.remove('data.item')
        except:
            pass
Ejemplo n.º 25
0
def memorizable_abx(data_file, on, across, by, njobs, tmpdir=None,
                    distance=cosine_distance, item_features_hash='0'):
    ''' wrap ABXpy funcions and compute the scores
    '''
    item_file = '{}.item'.format(data_file)
    feature_file = '{}.features'.format(data_file)
    if not os.path.isfile(item_file) or not os.path.isfile(feature_file):
        raise ValueError('item_file or feature_file doesnt exist')

    distance_file = '{}.distance'.format(data_file)
    score_file = '{}.score'.format(data_file)
    task_file = '{}.abx'.format(data_file)
    analyze_file = '{}.csv'.format(data_file)

    # clean up before compute ABX
    remove_files = [distance_file,
                    score_file,
                    task_file,
                    analyze_file]
    map(os.remove, filter(os.path.exists, remove_files))

    # running the evaluation
    task = ABXpy.task.Task(item_file, on, across=across, by=by, verbose=False)
    task.generate_triplets(task_file, tmpdir=tmpdir)
    distances.compute_distances(feature_file, '/features/', task_file,
                                distance_file, distance, normalized=True,
                                n_cpu=njobs)
    score.score(task_file, distance_file, score_file)
    analyze.analyze(task_file, score_file, analyze_file)

    # I will keep only the ABX scores
    remove_files = [distance_file, score_file, task_file]
    map(os.remove, filter(os.path.exists, remove_files))

    analyze_data = open(analyze_file, 'r').read()
    return analyze_data
Ejemplo n.º 26
0
def test_analyze(
    itemfile,
    featurefile,
    args,
    taskfile=None,
    distance=None,
    distancefile=None,
    scorefile=None,
    analyzefile=None,
    filename=None,
):

    on = get_arg("on", args)
    assert on, "The 'on' argument was not found, this argument is mandatory" "for the task"
    across = get_arg("across")
    by = get_arg("by")
    filters = get_arg("filters")
    reg = get_arg("reg")

    if not filename:
        filename = "_".join(filter(None, [get_name(itemfile), get_name(featurefile), str(on), str(across), str(by)]))
    if not distancefile:
        distancefile = filename + ".distance"
    if not scorefile:
        scorefile = filename + ".score"
    if not analyzefile:
        analyzefile = filename + ".csv"

    task = ABXpy.task.Task(itemfile, on, across, by, filters, reg, features=featurefile)
    task.generate_triplets()

    if not distance:
        distance = dtw_cosine_distance
    distances.compute_distances(featurefile, "/features/", taskfile, distancefile, distance)
    score.score(taskfile, distancefile, scorefile)
    analyze.analyze(scorefile, taskfile, analyzefile)
Ejemplo n.º 27
0
def fullrun():

    if type(BY) == list:
        out = '/' + 'on_' + ON[0:2] + '_ac_' + ACROSS[0:2] + '_by_' + BY[0][
            0:2] + '_' + BY[1][0:2]
    else:
        out = '/' + 'on_' + ON[0:2] + '_ac_' + ACROSS[0:2] + '_by_' + BY[0:2]
    output_folder = input_folder + out

    print("the input folder is " + input_folder + "\n")
    print("the ABX task id done :" + out + "\n")
    print(feature)

    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    item_file = input_folder + '/' + ON + '.item'
    feature_file = input_folder + '/' + feature
    distance_file = output_folder + '/' + out + '.distance'
    scorefilename = output_folder + '/' + out + '.score'
    taskfilename = output_folder + '/' + out + '.abx'
    analyzefilename = output_folder + '/' + out + '.csv'
    statsfilename = output_folder + '/' + out + '.stats'

    # running the evaluation:
    if not os.path.exists(taskfilename):
        if ACROSS == "na" and BY != "na":
            task = ABXpy.task.Task(item_file, ON, by=BY)

        elif BY == "na" and ACROSS != "na":
            task = ABXpy.task.Task(item_file, ON, across=ACROSS)

        elif ACROSS == "na" and BY == "na":
            task = ABXpy.task.Task(item_file, ON)

        else:
            task = ABXpy.task.Task(item_file, ON, across=ACROSS, by=BY)

        task.generate_triplets(taskfilename)

        try:
            task.print_stats(statsfilename)
        except:
            pass
    print("the abx task file is created")

    print("number of cpu used is " + str(NB_CPU))

    if not os.path.exists(distance_file):
        if distance == 'cosine':
            distances.compute_distances(feature_file,
                                        '/features/',
                                        taskfilename,
                                        distance_file,
                                        dtw_cosine_distance,
                                        normalized=True,
                                        n_cpu=NB_CPU)
        elif distance == 'kl':
            distances.compute_distances(feature_file,
                                        '/features/',
                                        taskfilename,
                                        distance_file,
                                        dtw_kl_divergence,
                                        normalized=True,
                                        n_cpu=NB_CPU)
        else:
            raise ValueError('distance must be either cosine or kl')
        print("Computing %(distance)s distance has been computed")
    else:
        print("distance has already been computed")

    if not os.path.exists(scorefilename):
        score.score(taskfilename, distance_file, scorefilename)
        print("Score is computed")

    if not os.path.exists(analyzefilename):
        analyze.analyze(taskfilename, scorefilename, analyzefilename)
        print("Raw results are available in the csv file !")

    eval_abx.avg(analyzefilename, out_res, ON, ACROSS, ponderate)
    print('evaluation done')