def test_import(project):
    am = AnnotationManager(project)

    input_annotations = pd.read_csv(
        'examples/valid_raw_data/raw_annotations/input.csv')
    am.import_annotations(input_annotations)
    am.read()

    assert am.annotations.shape[0] == input_annotations.shape[
        0], "imported annotations length does not match input"

    assert all([
        os.path.exists(os.path.join(project.path, 'annotations', f))
        for f in am.annotations['annotation_filename'].tolist()
    ]), "some annotations are missing"

    errors, warnings = am.validate()
    assert len(errors) == 0 and len(
        warnings) == 0, "malformed annotations detected"

    for dataset in ['eaf', 'textgrid', 'eaf_solis']:
        annotations = am.annotations[am.annotations['set'] == dataset]
        segments = am.get_segments(annotations)
        segments.drop(columns=annotations.columns, inplace=True)

        pd.testing.assert_frame_equal(
            segments.sort_index(axis=1).sort_values(
                segments.columns.tolist()).reset_index(drop=True),
            pd.read_csv('tests/truth/{}.csv'.format(dataset)).sort_index(
                axis=1).sort_values(
                    segments.columns.tolist()).reset_index(drop=True),
            check_less_precise=True)
Esempio n. 2
0
def import_annotations(args):
    """convert and import a set of annotations"""

    project = ChildProject(args.source)
    errors, warnings = project.validate_input_data()

    if len(errors) > 0:
        print("validation failed, {} error(s) occured".format(len(errors)),
              file=sys.stderr)
        sys.exit(1)

    if args.annotations:
        annotations = pd.read_csv(args.annotations)
    else:
        annotations = pd.DataFrame([{
            col.name: getattr(args, col.name)
            for col in AnnotationManager.INDEX_COLUMNS if not col.generated
        }])

    am = AnnotationManager(project)
    am.import_annotations(annotations)

    errors, warnings = am.validate()

    if len(am.errors) > 0:
        print("importation completed with {} errors and {} warnings".format(
            len(am.errors) + len(errors), len(warnings)),
              file=sys.stderr)
        print("\n".join(am.errors), file=sys.stderr)
        print("\n".join(errors), file=sys.stderr)
        print("\n".join(warnings))
Esempio n. 3
0
def test_rename(project):
    am = AnnotationManager(project)

    input_annotations = pd.read_csv("examples/valid_raw_data/annotations/input.csv")
    am.import_annotations(input_annotations[input_annotations["set"] == "textgrid"])
    am.read()
    tg_count = am.annotations[am.annotations["set"] == "textgrid"].shape[0]

    am.rename_set("textgrid", "renamed")
    am.read()

    errors, warnings = am.validate()
    assert len(errors) == 0 and len(warnings) == 0, "malformed annotations detected"

    assert am.annotations[am.annotations["set"] == "textgrid"].shape[0] == 0
    assert am.annotations[am.annotations["set"] == "renamed"].shape[0] == tg_count
Esempio n. 4
0
def test_import(project):
    am = AnnotationManager(project)

    input_annotations = pd.read_csv("examples/valid_raw_data/annotations/input.csv")
    am.import_annotations(input_annotations)
    am.read()

    assert (
        am.annotations.shape[0] == input_annotations.shape[0]
    ), "imported annotations length does not match input"

    assert all(
        [
            os.path.exists(
                os.path.join(
                    project.path,
                    "annotations",
                    a["set"],
                    "converted",
                    a["annotation_filename"],
                )
            )
            for a in am.annotations.to_dict(orient="records")
        ]
    ), "some annotations are missing"

    errors, warnings = am.validate()
    assert len(errors) == 0 and len(warnings) == 0, "malformed annotations detected"
    
    errors, warnings = am.read()
    assert len(errors) == 0 and len(warnings) == 0, "malformed annotation indexes detected"

    for dataset in ["eaf_basic", "textgrid", "eaf_solis"]:
        annotations = am.annotations[am.annotations["set"] == dataset]
        segments = am.get_segments(annotations)
        segments.drop(columns=set(annotations.columns) - {"raw_filename"}, inplace=True)
        truth = pd.read_csv("tests/truth/{}.csv".format(dataset))

        print(segments)
        print(truth)

        pd.testing.assert_frame_equal(
            standardize_dataframe(segments, set(truth.columns.tolist())),
            standardize_dataframe(truth, set(truth.columns.tolist())),
            check_less_precise=True,
        )
Esempio n. 5
0
def test_custom_importation(project):
    am = AnnotationManager(project)
    input = pd.DataFrame(
        [
            {
                "set": "vtc_rttm",
                "range_onset": 0,
                "range_offset": 4000,
                "recording_filename": "sound.wav",
                "time_seek": 0,
                "raw_filename": "example.rttm",
                "format": "custom",
            }
        ]
    )

    am.import_annotations(input, import_function=custom_function)
    am.read()

    errors, warnings = am.validate()
    assert len(errors) == 0