Esempio n. 1
0
def test_frozen_analyze():
    """Frozen analyze compare the results of a previously "frozen" run with
    a new one, asserting that the code did not change in behaviour.
    """
    try:
        if not os.path.exists('test_items'):
            os.makedirs('test_items')
        item_file = frozen_file('item')
        feature_file = frozen_file('features')
        distance_file = 'test_items/data.distance'
        scorefilename = 'test_items/data.score'
        taskfilename = 'test_items/data.abx'
        analyzefilename = 'test_items/data.csv'

        task = ABXpy.task.Task(item_file, 'c0', 'c1', 'c2')
        task.generate_triplets(taskfilename)
        distances.compute_distances(feature_file, '/features/', taskfilename,
                                    distance_file, dtw_cosine_distance)
        score.score(taskfilename, distance_file, scorefilename)
        analyze.analyze(taskfilename, scorefilename, analyzefilename)

        # assert items.h5cmp(taskfilename, frozen_file('abx'))
        # assert items.h5cmp(distance_file, frozen_file('distance'))
        assert items.h5cmp(scorefilename, frozen_file('score'))
        assert items.cmp(analyzefilename, frozen_file('csv'))

    finally:
        try:
            os.remove(taskfilename)
            os.remove(distance_file)
            os.remove(scorefilename)
            os.remove(analyzefilename)
        except:
            pass
Esempio n. 2
0
def fullrun():
    if not os.path.exists('example_items'):
        os.makedirs('example_items')
    item_file = 'example_items/data.item'
    feature_file = 'example_items/data.features'
    distance_file = 'example_items/data.distance'
    scorefilename = 'example_items/data.score'
    taskfilename = 'example_items/data.abx'
    analyzefilename = 'example_items/data.csv'

    # deleting pre-existing files
    for f in [item_file, feature_file, distance_file,
              scorefilename, taskfilename, analyzefilename]:
        try:
            os.remove(f)
        except OSError:
            pass

    # running the evaluation
    items.generate_db_and_feat(3, 3, 5, item_file, 2, 2, feature_file)

    task = ABXpy.task.Task(item_file, 'c0', across='c1', by='c2')
    task.generate_triplets(taskfilename)

    distances.compute_distances(
        feature_file, '/features/', taskfilename,
        distance_file, dtw_cosine_distance,
        normalized=True, n_cpu=1)

    score.score(taskfilename, distance_file, scorefilename)

    analyze.analyze(taskfilename, scorefilename, analyzefilename)
Esempio n. 3
0
def test_analyze():
    try:
        if not os.path.exists('test_items'):
            os.makedirs('test_items')
        item_file = 'test_items/data.item'
        feature_file = 'test_items/data.features'
        distance_file = 'test_items/data.distance'
        scorefilename = 'test_items/data.score'
        taskfilename = 'test_items/data.abx'
        analyzefilename = 'test_items/data.csv'

        items.generate_db_and_feat(3, 3, 1, item_file, 2, 3, feature_file)
        task = ABXpy.task.Task(item_file, 'c0', 'c1', 'c2')
        task.generate_triplets(taskfilename)
        distances.compute_distances(feature_file, '/features/', taskfilename,
                                    distance_file, dtw_cosine_distance)
        score.score(taskfilename, distance_file, scorefilename)
        analyze.analyze(taskfilename, scorefilename, analyzefilename)
    finally:
        try:
            os.remove(item_file)
            os.remove(feature_file)
            os.remove(taskfilename)
            os.remove(distance_file)
            os.remove(scorefilename)
            os.remove(analyzefilename)
            # pass
        except:
            pass
Esempio n. 4
0
def test_threshold_analyze():
    try:
        if not os.path.exists('test_items'):
            os.makedirs('test_items')
        item_file = 'test_items/data.item'
        feature_file = 'test_items/data.features'
        distance_file = 'test_items/data.distance'
        scorefilename = 'test_items/data.score'
        taskfilename = 'test_items/data.abx'
        analyzefilename = 'test_items/data.csv'
        threshold = 2

        items.generate_db_and_feat(3, 3, 1, item_file, 2, 3, feature_file)
        task = ABXpy.task.Task(item_file, 'c0', 'c1', 'c2')
        task.generate_triplets(taskfilename, threshold=threshold)
        distances.compute_distances(
            feature_file, '/features/', taskfilename,
            distance_file, dtw_cosine_distance,
            normalized = True, n_cpu=1)
        score.score(taskfilename, distance_file, scorefilename)
        analyze.analyze(taskfilename, scorefilename, analyzefilename)
        number_triplets = np.loadtxt(analyzefilename, dtype=int,
                                     delimiter='\t', skiprows=1, usecols=[-1])
        assert np.all(number_triplets == threshold)
    finally:
        try:
            shutil.rmtree('test_items')
            # os.remove(item_file)
            # os.remove(feature_file)
            # os.remove(taskfilename)
            # os.remove(distance_file)
            # os.remove(scorefilename)
            # os.remove(analyzefilename)
        except:
            pass
Esempio n. 5
0
def fullrun():
    if not os.path.exists('example_items'):
        os.makedirs('example_items')
    item_file = 'example_items/data.item'
    feature_file = 'example_items/data.features'
    distance_file = 'example_items/data.distance'
    scorefilename = 'example_items/data.score'
    taskfilename = 'example_items/data.abx'
    analyzefilename = 'example_items/data.csv'

    items.generate_db_and_feat(3, 3, 1, item_file, 2, 2, feature_file)
    task = ABXpy.task.Task(item_file, 'c0', 'c1', 'c2',
                           features=feature_file)
    task.generate_triplets()
    distances.compute_distances(feature_file, '/features/', taskfilename,
                                distance_file, dtw_cosine_distance)
    score.score(taskfilename, distance_file, scorefilename)
    analyze.analyze(scorefilename, taskfilename, analyzefilename)
Esempio n. 6
0
def run_ABX(feat_file, task_file, dis_file, score_file, result_file, distance,
            normalized):
    """
    Run distances, scores and results ABXpy steps based on
    provided features and task files.
    Results are saved in:
        $res_folder/distances/'$res_id'.distances
        $res_folder/scores/'$res_id'.scores
        $res_folder/results/'$res_id'.txt
    """
    dis.compute_distances(feat_file,
                          '/features/',
                          task_file,
                          dis_file,
                          distance,
                          normalized=normalized,
                          n_cpu=1)
    sco.score(task_file, dis_file, score_file)
    ana.analyze(task_file, score_file, result_file)
Esempio n. 7
0
def test_threshold_analyze():
    try:
        if not os.path.exists('test_items'):
            os.makedirs('test_items')
        item_file = 'test_items/data.item'
        feature_file = 'test_items/data.features'
        distance_file = 'test_items/data.distance'
        scorefilename = 'test_items/data.score'
        taskfilename = 'test_items/data.abx'
        analyzefilename = 'test_items/data.csv'
        threshold = 2

        items.generate_db_and_feat(3, 3, 1, item_file, 2, 3, feature_file)
        task = ABXpy.task.Task(item_file, 'c0', 'c1', 'c2')
        task.generate_triplets(taskfilename, threshold=threshold)
        distances.compute_distances(feature_file,
                                    '/features/',
                                    taskfilename,
                                    distance_file,
                                    dtw_cosine_distance,
                                    normalized=True,
                                    n_cpu=1)
        score.score(taskfilename, distance_file, scorefilename)
        analyze.analyze(taskfilename, scorefilename, analyzefilename)
        number_triplets = np.loadtxt(analyzefilename,
                                     dtype=int,
                                     delimiter='\t',
                                     skiprows=1,
                                     usecols=[-1])
        assert np.all(number_triplets == threshold)
    finally:
        try:
            shutil.rmtree('test_items')
            # os.remove(item_file)
            # os.remove(feature_file)
            # os.remove(taskfilename)
            # os.remove(distance_file)
            # os.remove(scorefilename)
            # os.remove(analyzefilename)
        except:
            pass
Esempio n. 8
0
def test_frozen_analyze():
    """Frozen analyze compare the results of a previously "frozen" run with
    a new one, asserting that the code did not change in behaviour.
    """
    try:
        if not os.path.exists('test_items'):
            os.makedirs('test_items')
        item_file = frozen_file('item')
        feature_file = frozen_file('features')
        distance_file = 'test_items/data.distance'
        scorefilename = 'test_items/data.score'
        taskfilename = 'test_items/data.abx'
        analyzefilename = 'test_items/data.csv'

        task = ABXpy.task.Task(item_file, 'c0', 'c1', 'c2')
        task.generate_triplets(taskfilename)
        distances.compute_distances(feature_file,
                                    '/features/',
                                    taskfilename,
                                    distance_file,
                                    dtw_cosine_distance,
                                    normalized=True,
                                    n_cpu=1)
        score.score(taskfilename, distance_file, scorefilename)
        analyze.analyze(taskfilename, scorefilename, analyzefilename)

        # assert items.h5cmp(taskfilename, frozen_file('abx'))
        # assert items.h5cmp(distance_file, frozen_file('distance'))
        # assert items.h5cmp(scorefilename, frozen_file('score'))
        assert items.csv_cmp(analyzefilename, frozen_file('csv'))

    finally:
        try:
            shutil.rmtree('test_items')
            # os.remove(taskfilename)
            # os.remove(distance_file)
            # os.remove(scorefilename)
            # os.remove(analyzefilename)
        except:
            pass
Esempio n. 9
0
def fullrun():
    if not os.path.exists('example_items'):
        os.makedirs('example_items')
    item_file = 'example_items/data.item'
    feature_file = 'example_items/data.features'
    distance_file = 'example_items/data.distance'
    scorefilename = 'example_items/data.score'
    taskfilename = 'example_items/data.abx'
    analyzefilename = 'example_items/data.csv'

    # deleting pre-existing files
    for f in [
            item_file, feature_file, distance_file, scorefilename,
            taskfilename, analyzefilename
    ]:
        try:
            os.remove(f)
        except OSError:
            pass

    # running the evaluation
    items.generate_db_and_feat(3, 3, 5, item_file, 2, 2, feature_file)

    task = ABXpy.task.Task(item_file, 'c0', across='c1', by='c2')
    task.generate_triplets(taskfilename)

    distances.compute_distances(feature_file,
                                'features',
                                taskfilename,
                                distance_file,
                                dtw_cosine_distance,
                                normalized=True,
                                n_cpu=1)

    score.score(taskfilename, distance_file, scorefilename)

    analyze.analyze(taskfilename, scorefilename, analyzefilename)
Esempio n. 10
0
def test_analyze(
    itemfile,
    featurefile,
    args,
    taskfile=None,
    distance=None,
    distancefile=None,
    scorefile=None,
    analyzefile=None,
    filename=None,
):

    on = get_arg("on", args)
    assert on, "The 'on' argument was not found, this argument is mandatory" "for the task"
    across = get_arg("across")
    by = get_arg("by")
    filters = get_arg("filters")
    reg = get_arg("reg")

    if not filename:
        filename = "_".join(filter(None, [get_name(itemfile), get_name(featurefile), str(on), str(across), str(by)]))
    if not distancefile:
        distancefile = filename + ".distance"
    if not scorefile:
        scorefile = filename + ".score"
    if not analyzefile:
        analyzefile = filename + ".csv"

    task = ABXpy.task.Task(itemfile, on, across, by, filters, reg, features=featurefile)
    task.generate_triplets()

    if not distance:
        distance = dtw_cosine_distance
    distances.compute_distances(featurefile, "/features/", taskfile, distancefile, distance)
    score.score(taskfile, distancefile, scorefile)
    analyze.analyze(scorefile, taskfile, analyzefile)
Esempio n. 11
0
def memorizable_abx(data_file, on, across, by, njobs, tmpdir=None,
                    distance=cosine_distance, item_features_hash='0'):
    ''' wrap ABXpy funcions and compute the scores
    '''
    item_file = '{}.item'.format(data_file)
    feature_file = '{}.features'.format(data_file)
    if not os.path.isfile(item_file) or not os.path.isfile(feature_file):
        raise ValueError('item_file or feature_file doesnt exist')

    distance_file = '{}.distance'.format(data_file)
    score_file = '{}.score'.format(data_file)
    task_file = '{}.abx'.format(data_file)
    analyze_file = '{}.csv'.format(data_file)

    # clean up before compute ABX
    remove_files = [distance_file,
                    score_file,
                    task_file,
                    analyze_file]
    map(os.remove, filter(os.path.exists, remove_files))

    # running the evaluation
    task = ABXpy.task.Task(item_file, on, across=across, by=by, verbose=False)
    task.generate_triplets(task_file, tmpdir=tmpdir)
    distances.compute_distances(feature_file, '/features/', task_file,
                                distance_file, distance, normalized=True,
                                n_cpu=njobs)
    score.score(task_file, distance_file, score_file)
    analyze.analyze(task_file, score_file, analyze_file)

    # I will keep only the ABX scores
    remove_files = [distance_file, score_file, task_file]
    map(os.remove, filter(os.path.exists, remove_files))

    analyze_data = open(analyze_file, 'r').read()
    return analyze_data
Esempio n. 12
0
def fullrun(task,
            data_folder,
            feature_folder,
            h5,
            file_sizes,
            corpus,
            distance,
            outputdir,
            normalized,
            doall=True,
            ncpus=None):
    print("Processing task {}".format(task['section']))

    feature_file = os.path.join(outputdir, lookup('featurefile', task))

    try:
        if distance:
            distancepair = distance.split('.')
            distancemodule = distancepair[0]
            distancefunction = distancepair[1]
            path, mod = os.path.split(distancemodule)
            sys.path.insert(0, path)
            distancefun = getattr(__import__(mod), distancefunction)
        else:
            distancemodule = lookup('distancemodule', task,
                                    os.path.join(CURDIR, 'distance'))
            distancefunction = lookup('distancefunction', task, 'distance')
            path, mod = os.path.split(distancemodule)
            sys.path.insert(0, path)
            distancefun = getattr(__import__(mod), distancefunction)
    except:
        sys.stderr.write('distance not found\n')
        raise

    distance_file = os.path.join(outputdir, lookup('distancefile', task))
    scorefilename = os.path.join(outputdir, lookup('scorefile', task))

    taskfilename = os.path.join(
        data_folder, 'test', corpus, '{}s'.format(file_sizes),
        '{}s_{}.abx'.format(file_sizes, lookup('type', task)))

    # # taskfilename = os.path.join(CURDIR, lookup('taskfile', task))
    # taskname = os.path.join(
    #     lookup('taskdir', task), '{}/{}s_{}_{}.abx'.format(
    #         corpus, file_sizes, distinction, lookup('type', task)))
    # taskfilename = os.path.abspath(os.path.join(CURDIR, taskname))
    print('Task file is {}'.format(taskfilename))
    assert os.path.isfile(taskfilename), 'Task file unknown'

    analyzefilename = os.path.join(outputdir, lookup('analyzefile', task))
    # on = lookup('on', task)
    # across = nonesplit(lookup('across', task))
    # by = nonesplit(lookup('by', task))
    # filters = lookup('filters', task)
    # regressors = lookup('regressors', task)
    # sampling = lookup('sampling', task)
    if not ncpus:
        ncpus = int(lookup('ncpus', task, 1))

    makedirs([feature_file, distance_file, scorefilename, analyzefilename])

    # tasktime = getmtime(taskfilename)
    # featuretime = getmtime(feature_file)
    # distancetime = getmtime(distance_file)
    # scoretime = getmtime(scorefilename)
    # analyzetime = getmtime(analyzefilename)
    # featfoldertime = max([getmtime(os.path.join(feature_folder, f))
    #                       for f in os.listdir(feature_folder)])

    # Preprocessing
    if not h5:
        try:
            print("Preprocessing... Writing the features in h5 format")
            tryremove(feature_file)
            any2h5features.convert(feature_folder,
                                   h5_filename=feature_file,
                                   load=loadfeats)
            # featuretime = getmtime(feature_file)
            with h5py.File(feature_file) as fh:
                fh.attrs.create('done', True)
        except:
            sys.stderr.write('Error when writing the features from {} to {}\n'
                             'Check the paths availability\n'.format(
                                 os.path.realpath(feature_folder),
                                 os.path.realpath(feature_file)))
            # tryremove(feature_file)
            raise
    else:
        feature_file = os.path.join(feature_folder,
                                    '{}s.h5f'.format(file_sizes))

    # computing
    try:
        print("Computing the distances")
        tryremove(distance_file)
        distances.compute_distances(feature_file,
                                    '/features/',
                                    taskfilename,
                                    distance_file,
                                    distancefun,
                                    normalized=normalized,
                                    n_cpu=ncpus)

        tryremove(scorefilename)
        print("Computing the scores")
        score.score(taskfilename, distance_file, scorefilename)

        tryremove(analyzefilename)
        print("Collapsing the results")
        analyze.analyze(taskfilename, scorefilename, analyzefilename)

        return avg(analyzefilename, task)
    # except Exception as e:
    #     sys.stderr.write('An error occured during the computation\n')
    #     raise e
    finally:
        tryremove(distance_file)
        tryremove(scorefilename)
        tryremove(analyzefilename)
        if not h5:
            tryremove(feature_file)
Esempio n. 13
0
def fullrun():

    if type(BY) == list:
        out = '/' + 'on_' + ON[0:2] + '_ac_' + ACROSS[0:2] + '_by_' + BY[0][
            0:2] + '_' + BY[1][0:2]
    else:
        out = '/' + 'on_' + ON[0:2] + '_ac_' + ACROSS[0:2] + '_by_' + BY[0:2]
    output_folder = input_folder + out

    print("the input folder is " + input_folder + "\n")
    print("the ABX task id done :" + out + "\n")
    print(feature)

    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    item_file = input_folder + '/' + ON + '.item'
    feature_file = input_folder + '/' + feature
    distance_file = output_folder + '/' + out + '.distance'
    scorefilename = output_folder + '/' + out + '.score'
    taskfilename = output_folder + '/' + out + '.abx'
    analyzefilename = output_folder + '/' + out + '.csv'
    statsfilename = output_folder + '/' + out + '.stats'

    # running the evaluation:
    if not os.path.exists(taskfilename):
        if ACROSS == "na" and BY != "na":
            task = ABXpy.task.Task(item_file, ON, by=BY)

        elif BY == "na" and ACROSS != "na":
            task = ABXpy.task.Task(item_file, ON, across=ACROSS)

        elif ACROSS == "na" and BY == "na":
            task = ABXpy.task.Task(item_file, ON)

        else:
            task = ABXpy.task.Task(item_file, ON, across=ACROSS, by=BY)

        task.generate_triplets(taskfilename)

        try:
            task.print_stats(statsfilename)
        except:
            pass
    print("the abx task file is created")

    print("number of cpu used is " + str(NB_CPU))

    if not os.path.exists(distance_file):
        if distance == 'cosine':
            distances.compute_distances(feature_file,
                                        '/features/',
                                        taskfilename,
                                        distance_file,
                                        dtw_cosine_distance,
                                        normalized=True,
                                        n_cpu=NB_CPU)
        elif distance == 'kl':
            distances.compute_distances(feature_file,
                                        '/features/',
                                        taskfilename,
                                        distance_file,
                                        dtw_kl_divergence,
                                        normalized=True,
                                        n_cpu=NB_CPU)
        else:
            raise ValueError('distance must be either cosine or kl')
        print("Computing %(distance)s distance has been computed")
    else:
        print("distance has already been computed")

    if not os.path.exists(scorefilename):
        score.score(taskfilename, distance_file, scorefilename)
        print("Score is computed")

    if not os.path.exists(analyzefilename):
        analyze.analyze(taskfilename, scorefilename, analyzefilename)
        print("Raw results are available in the csv file !")

    eval_abx.avg(analyzefilename, out_res, ON, ACROSS, ponderate)
    print('evaluation done')