Esempio n. 1
0
def test_intersect(project):
    am = AnnotationManager(project)

    input_annotations = pd.read_csv("examples/valid_raw_data/annotations/intersect.csv")
    am.import_annotations(input_annotations)

    intersection = AnnotationManager.intersection(
        am.annotations[am.annotations["set"].isin(["textgrid", "vtc_rttm"])]
    ).convert_dtypes()

    a = intersection[intersection["set"] == "textgrid"]
    b = intersection[intersection["set"] == "vtc_rttm"]

    columns = a.columns.tolist()
    columns.remove("imported_at")
    columns.remove("package_version")
    columns.remove("merged_from")

    pd.testing.assert_frame_equal(
        standardize_dataframe(a, columns),
        standardize_dataframe(
            pd.read_csv("tests/truth/intersect_a.csv"), columns
        ).convert_dtypes(),
    )

    pd.testing.assert_frame_equal(
        standardize_dataframe(b, columns),
        standardize_dataframe(
            pd.read_csv("tests/truth/intersect_b.csv"), columns
        ).convert_dtypes(),
    )
def test_intersect(project):
    am = AnnotationManager(project)

    input_annotations = pd.read_csv(
        'examples/valid_raw_data/raw_annotations/intersect.csv')
    am.import_annotations(input_annotations)
    am.read()

    a, b = am.intersection(am.annotations[am.annotations['set'] == 'textgrid'],
                           am.annotations[am.annotations['set'] == 'vtc_rttm'])

    pd.testing.assert_frame_equal(
        a.sort_index(axis=1).sort_values(a.columns.tolist()).reset_index(
            drop=True).drop(columns=['imported_at']),
        pd.read_csv('tests/truth/intersect_a.csv').sort_index(
            axis=1).sort_values(a.columns.tolist()).reset_index(
                drop=True).drop(columns=['imported_at']))

    pd.testing.assert_frame_equal(
        b.sort_index(axis=1).sort_values(b.columns.tolist()).reset_index(
            drop=True).drop(columns=['imported_at']),
        pd.read_csv('tests/truth/intersect_b.csv').sort_index(
            axis=1).sort_values(b.columns.tolist()).reset_index(
                drop=True).drop(columns=['imported_at']))
Esempio n. 3
0
import argparse

parser = argparse.ArgumentParser(
    description=
    'compute agreement measures for all given annotators over a whole dataset')
parser.add_argument('path', help='path to the dataset')
parser.add_argument('--sets', nargs='+', help='sets to include')
args = parser.parse_args()

speakers = ['CHI', 'OCH', 'FEM', 'MAL']

project = ChildProject(args.path)
am = AnnotationManager(project)
am.read()

intersection = AnnotationManager.intersection(am.annotations, args.sets)
segments = am.get_collapsed_segments(intersection)

segments = segments[segments['speaker_type'].isin(speakers)]

vectors = [
    grid_to_vector(
        segments_to_grid(segments[segments['set'] == s],
                         0,
                         segments['segment_offset'].max(),
                         100,
                         'speaker_type',
                         speakers,
                         none=False), speakers) for s in args.sets
]