예제 #1
0
def test_aclew(project):
    data = pd.read_csv("tests/data/aclew.csv")

    am = AnnotationManager(project)
    am.import_annotations(
        pd.DataFrame([{
            "set": set,
            "raw_filename": "file.rttm",
            "time_seek": 0,
            "recording_filename": "sound.wav",
            "range_onset": 0,
            "range_offset": 4000,
            "format": "rttm",
        } for set in ["aclew_vtc", "aclew_alice", "aclew_vcm"]]),
        import_function=partial(fake_vocs, data),
    )

    aclew = AclewMetrics(project,
                         by="child_id",
                         rec_cols='date_iso',
                         child_cols='experiment,child_dob',
                         vtc='aclew_vtc',
                         alice='aclew_alice',
                         vcm='aclew_vcm')
    aclew.extract()

    truth = pd.read_csv("tests/truth/aclew_metrics.csv")

    pd.testing.assert_frame_equal(aclew.metrics, truth, check_like=True)
예제 #2
0
def import_annotations(args):
    """convert and import a set of annotations"""

    project = ChildProject(args.source)
    errors, warnings = project.validate_input_data()

    if len(errors) > 0:
        print("validation failed, {} error(s) occured".format(len(errors)),
              file=sys.stderr)
        sys.exit(1)

    if args.annotations:
        annotations = pd.read_csv(args.annotations)
    else:
        annotations = pd.DataFrame([{
            col.name: getattr(args, col.name)
            for col in AnnotationManager.INDEX_COLUMNS if not col.generated
        }])

    am = AnnotationManager(project)
    am.import_annotations(annotations)

    errors, warnings = am.validate()

    if len(am.errors) > 0:
        print("importation completed with {} errors and {} warnings".format(
            len(am.errors) + len(errors), len(warnings)),
              file=sys.stderr)
        print("\n".join(am.errors), file=sys.stderr)
        print("\n".join(errors), file=sys.stderr)
        print("\n".join(warnings))
예제 #3
0
def test_lena(project):
    data = pd.read_csv("tests/data/lena_its.csv")

    am = AnnotationManager(project)
    am.import_annotations(
        pd.DataFrame([{
            "set": "lena_its",
            "raw_filename": "file.its",
            "time_seek": 0,
            "recording_filename": "sound.wav",
            "range_onset": 0,
            "range_offset": 100000000,
            "format": "its",
        }]),
        import_function=partial(fake_vocs, data),
    )

    lena = LenaMetrics(project,
                       set="lena_its",
                       period='1h',
                       from_time='10:00:00',
                       to_time='16:00:00')
    lena.extract()

    truth = pd.read_csv("tests/truth/lena_metrics.csv")

    pd.testing.assert_frame_equal(lena.metrics, truth, check_like=True)
예제 #4
0
def test_intersect(project):
    am = AnnotationManager(project)

    input_annotations = pd.read_csv("examples/valid_raw_data/annotations/intersect.csv")
    am.import_annotations(input_annotations)

    intersection = AnnotationManager.intersection(
        am.annotations[am.annotations["set"].isin(["textgrid", "vtc_rttm"])]
    ).convert_dtypes()

    a = intersection[intersection["set"] == "textgrid"]
    b = intersection[intersection["set"] == "vtc_rttm"]

    columns = a.columns.tolist()
    columns.remove("imported_at")
    columns.remove("package_version")
    columns.remove("merged_from")

    pd.testing.assert_frame_equal(
        standardize_dataframe(a, columns),
        standardize_dataframe(
            pd.read_csv("tests/truth/intersect_a.csv"), columns
        ).convert_dtypes(),
    )

    pd.testing.assert_frame_equal(
        standardize_dataframe(b, columns),
        standardize_dataframe(
            pd.read_csv("tests/truth/intersect_b.csv"), columns
        ).convert_dtypes(),
    )
예제 #5
0
def test_specs(project):
    data = pd.read_csv("tests/data/lena_its.csv")

    am = AnnotationManager(project)
    am.import_annotations(
        pd.DataFrame([{
            "set": "specs_its",
            "raw_filename": "file.its",
            "time_seek": 0,
            "recording_filename": "sound.wav",
            "range_onset": 0,
            "range_offset": 100000000,
            "format": "its",
        }]),
        import_function=partial(fake_vocs, data),
    )

    msp = MetricsSpecificationPipeline()

    parameters = "tests/data/parameters_metrics.yml"
    msp.run(parameters)

    output = pd.read_csv(msp.destination)
    truth = pd.read_csv("tests/truth/specs_metrics.csv")

    pd.testing.assert_frame_equal(output, truth, check_like=True)

    new_params = msp.parameters_path
    msp.run(new_params)

    output = pd.read_csv(msp.destination)

    pd.testing.assert_frame_equal(output, truth, check_like=True)
예제 #6
0
def test_custom(project):
    am = AnnotationManager(project)

    data = pd.read_csv("tests/data/lena_its.csv")

    am.import_annotations(
        pd.DataFrame([{
            "set": "custom_its",
            "raw_filename": "file.its",
            "time_seek": 0,
            "recording_filename": "sound.wav",
            "range_onset": 0,
            "range_offset": 100000000,
            "format": "its",
        }]),
        import_function=partial(fake_vocs, data),
    )

    parameters = "tests/data/list_metrics.csv"
    cmm = CustomMetrics(project, parameters)
    cmm.extract()

    truth = pd.read_csv("tests/truth/custom_metrics.csv")

    pd.testing.assert_frame_equal(cmm.metrics, truth, check_like=True)
예제 #7
0
def test_random_vocalization(project):
    segments = [{
        'segment_onset': 1000,
        'segment_offset': 2000,
        'speaker_type': speaker
    } for speaker in ['CHI', 'FEM', 'MAL']]

    segments = pd.DataFrame(segments)

    am = AnnotationManager(project)
    am.import_annotations(
        pd.DataFrame([{
            "set": "random",
            "raw_filename": "file.rttm",
            "time_seek": 0,
            "recording_filename": "sound.wav",
            "range_onset": 0,
            "range_offset": 4000,
            "format": "rttm",
        }]),
        import_function=partial(fake_conversation, segments),
    )

    sampler = RandomVocalizationSampler(project=project,
                                        annotation_set="random",
                                        target_speaker_type=["CHI"],
                                        sample_size=1,
                                        threads=1)
    sampler.sample()

    chi_segments = segments[segments["speaker_type"] == "CHI"]
    pd.testing.assert_frame_equal(
        sampler.segments[["segment_onset", "segment_offset"]].astype(int),
        chi_segments[["segment_onset", "segment_offset"]].astype(int))
def test_import(project):
    am = AnnotationManager(project)

    input_annotations = pd.read_csv(
        'examples/valid_raw_data/raw_annotations/input.csv')
    am.import_annotations(input_annotations)
    am.read()

    assert am.annotations.shape[0] == input_annotations.shape[
        0], "imported annotations length does not match input"

    assert all([
        os.path.exists(os.path.join(project.path, 'annotations', f))
        for f in am.annotations['annotation_filename'].tolist()
    ]), "some annotations are missing"

    errors, warnings = am.validate()
    assert len(errors) == 0 and len(
        warnings) == 0, "malformed annotations detected"

    for dataset in ['eaf', 'textgrid', 'eaf_solis']:
        annotations = am.annotations[am.annotations['set'] == dataset]
        segments = am.get_segments(annotations)
        segments.drop(columns=annotations.columns, inplace=True)

        pd.testing.assert_frame_equal(
            segments.sort_index(axis=1).sort_values(
                segments.columns.tolist()).reset_index(drop=True),
            pd.read_csv('tests/truth/{}.csv'.format(dataset)).sort_index(
                axis=1).sort_values(
                    segments.columns.tolist()).reset_index(drop=True),
            check_less_precise=True)
예제 #9
0
def test_conversation_sampler(project):
    conversations = [
        {
            "onset": 0,
            "vocs": 5
        },
        {
            "onset": 60 * 1000,
            "vocs": 10
        },
        {
            "onset": 1800 * 1000,
            "vocs": 15
        },
    ]
    segments = []
    for conversation in conversations:
        segments += [{
            "segment_onset":
            conversation["onset"] + i * (2000 + 500),
            "segment_offset":
            conversation["onset"] + i * (2000 + 500) + 2000,
            "speaker_type": ["FEM", "CHI"][i % 2],
        } for i in range(conversation["vocs"])]
    segments = pd.DataFrame(segments)

    am = AnnotationManager(project)
    am.import_annotations(
        pd.DataFrame([{
            "set": "conv",
            "raw_filename": "file.rttm",
            "time_seek": 0,
            "recording_filename": "sound.wav",
            "range_onset": 0,
            "range_offset": 3600 * 1000 * 1000,
            "format": "rttm",
        }]),
        import_function=partial(fake_conversation, segments),
    )
    sampler = ConversationSampler(
        project,
        "conv",
        count=5,
        interval=1000,
        speakers=["FEM", "CHI"],
    )
    sampler.sample()

    assert len(sampler.segments) == len(conversations)
    assert sampler.segments["segment_onset"].tolist() == [
        conv["onset"] for conv in sorted(
            conversations, key=lambda c: c["vocs"], reverse=True)
    ]
예제 #10
0
def test_rename(project):
    am = AnnotationManager(project)

    input_annotations = pd.read_csv("examples/valid_raw_data/annotations/input.csv")
    am.import_annotations(input_annotations[input_annotations["set"] == "textgrid"])
    am.read()
    tg_count = am.annotations[am.annotations["set"] == "textgrid"].shape[0]

    am.rename_set("textgrid", "renamed")
    am.read()

    errors, warnings = am.validate()
    assert len(errors) == 0 and len(warnings) == 0, "malformed annotations detected"

    assert am.annotations[am.annotations["set"] == "textgrid"].shape[0] == 0
    assert am.annotations[am.annotations["set"] == "renamed"].shape[0] == tg_count
예제 #11
0
def test_import(project):
    am = AnnotationManager(project)

    input_annotations = pd.read_csv("examples/valid_raw_data/annotations/input.csv")
    am.import_annotations(input_annotations)
    am.read()

    assert (
        am.annotations.shape[0] == input_annotations.shape[0]
    ), "imported annotations length does not match input"

    assert all(
        [
            os.path.exists(
                os.path.join(
                    project.path,
                    "annotations",
                    a["set"],
                    "converted",
                    a["annotation_filename"],
                )
            )
            for a in am.annotations.to_dict(orient="records")
        ]
    ), "some annotations are missing"

    errors, warnings = am.validate()
    assert len(errors) == 0 and len(warnings) == 0, "malformed annotations detected"
    
    errors, warnings = am.read()
    assert len(errors) == 0 and len(warnings) == 0, "malformed annotation indexes detected"

    for dataset in ["eaf_basic", "textgrid", "eaf_solis"]:
        annotations = am.annotations[am.annotations["set"] == dataset]
        segments = am.get_segments(annotations)
        segments.drop(columns=set(annotations.columns) - {"raw_filename"}, inplace=True)
        truth = pd.read_csv("tests/truth/{}.csv".format(dataset))

        print(segments)
        print(truth)

        pd.testing.assert_frame_equal(
            standardize_dataframe(segments, set(truth.columns.tolist())),
            standardize_dataframe(truth, set(truth.columns.tolist())),
            check_less_precise=True,
        )
def test_vc_stats(project, turntakingthresh):
    am = AnnotationManager(project)
    am.import_annotations(
        pd.read_csv('examples/valid_raw_data/raw_annotations/input.csv'))

    raw_rttm = 'example_metrics.rttm'
    segments = am.annotations[am.annotations['raw_filename'] == raw_rttm]

    vc = am.get_vc_stats(am.get_segments(segments),
                         turntakingthresh=turntakingthresh).reset_index()
    truth_vc = pd.read_csv(
        'tests/truth/vc_truth_{:.1f}.csv'.format(turntakingthresh))

    pd.testing.assert_frame_equal(
        vc.reset_index().sort_index(axis=1).sort_values(vc.columns.tolist()),
        truth_vc.reset_index().sort_index(axis=1).sort_values(
            vc.columns.tolist()),
        atol=3)
def test_clipping(project):
    am = AnnotationManager(project)

    input_annotations = pd.read_csv(
        'examples/valid_raw_data/raw_annotations/input.csv')
    am.import_annotations(input_annotations)
    am.read()

    start = 1981
    stop = 1984
    segments = am.get_segments(
        am.annotations[am.annotations['set'] == 'vtc_rttm'])
    segments = am.clip_segments(segments, start, stop)

    assert segments['segment_onset'].between(
        start, stop).all() and segments['segment_offset'].between(
            start, stop).all(), "segments not properly clipped"
    assert segments.shape[0] == 2, "got {} segments, expected 2".format(
        segments.shape[0])
예제 #14
0
def test_clipping(project):
    am = AnnotationManager(project)

    input_annotations = pd.read_csv("examples/valid_raw_data/annotations/input.csv")
    input_annotations = input_annotations[input_annotations["recording_filename"] == "sound.wav"]
    am.import_annotations(input_annotations[input_annotations["set"] == "vtc_rttm"])
    am.read()

    start = 1981000
    stop = 1984000
    segments = am.get_segments(am.annotations[am.annotations["set"] == "vtc_rttm"])
    segments = am.clip_segments(segments, start, stop)

    assert (
        segments["segment_onset"].between(start, stop).all()
        and segments["segment_offset"].between(start, stop).all()
    ), "segments not properly clipped"
    assert segments.shape[0] == 2, "got {} segments, expected 2".format(
        segments.shape[0]
    )
예제 #15
0
def test_custom_importation(project):
    am = AnnotationManager(project)
    input = pd.DataFrame(
        [
            {
                "set": "vtc_rttm",
                "range_onset": 0,
                "range_offset": 4000,
                "recording_filename": "sound.wav",
                "time_seek": 0,
                "raw_filename": "example.rttm",
                "format": "custom",
            }
        ]
    )

    am.import_annotations(input, import_function=custom_function)
    am.read()

    errors, warnings = am.validate()
    assert len(errors) == 0
예제 #16
0
def test_metrics_segments(project):
    data = pd.read_csv("tests/data/aclew.csv")

    am = AnnotationManager(project)
    am.import_annotations(
        pd.DataFrame([{
            "set": set,
            "raw_filename": "file.rttm",
            "time_seek": 0,
            "recording_filename": "sound.wav",
            "range_onset": 0,
            "range_offset": 4000,
            "format": "rttm",
        } for set in ["segments_vtc", "segments_alice", "segments_vcm"]]),
        import_function=partial(fake_vocs, data),
    )
    lm = pd.DataFrame(np.array([
        ["voc_speaker", "segments_vtc", 'FEM'],
        ["voc_speaker", "segments_vtc", 'CHI'],
        ["voc_speaker_ph", "segments_vtc", 'FEM'],
        ["voc_speaker_ph", "segments_vtc", 'CHI'],
        ["wc_speaker_ph", "segments_alice", 'FEM'],
        ["lp_n", "segments_vcm", pd.NA],
        ["lp_dur", "segments_vcm", pd.NA],
    ]),
                      columns=["callable", "set", "speaker"])
    metrics = Metrics(project,
                      metrics_list=lm,
                      by="segments",
                      rec_cols='date_iso',
                      child_cols='experiment,child_dob',
                      segments='tests/data/segments.csv')
    metrics.extract()

    truth = pd.read_csv("tests/truth/segments_metrics.csv")

    pd.testing.assert_frame_equal(metrics.metrics, truth, check_like=True)
def test_intersect(project):
    am = AnnotationManager(project)

    input_annotations = pd.read_csv(
        'examples/valid_raw_data/raw_annotations/intersect.csv')
    am.import_annotations(input_annotations)
    am.read()

    a, b = am.intersection(am.annotations[am.annotations['set'] == 'textgrid'],
                           am.annotations[am.annotations['set'] == 'vtc_rttm'])

    pd.testing.assert_frame_equal(
        a.sort_index(axis=1).sort_values(a.columns.tolist()).reset_index(
            drop=True).drop(columns=['imported_at']),
        pd.read_csv('tests/truth/intersect_a.csv').sort_index(
            axis=1).sort_values(a.columns.tolist()).reset_index(
                drop=True).drop(columns=['imported_at']))

    pd.testing.assert_frame_equal(
        b.sort_index(axis=1).sort_values(b.columns.tolist()).reset_index(
            drop=True).drop(columns=['imported_at']),
        pd.read_csv('tests/truth/intersect_b.csv').sort_index(
            axis=1).sort_values(b.columns.tolist()).reset_index(
                drop=True).drop(columns=['imported_at']))
예제 #18
0
def test_periodic(project):
    """
    os.makedirs('output/eaf', exist_ok = True)

    project = ChildProject('examples/valid_raw_data')
    project.read()
    
    am = AnnotationManager(project)
    am.read()
    """
    data = pd.read_csv("tests/data/eaf_segments.csv")

    am = AnnotationManager(project)
    am.import_annotations(
        pd.DataFrame([{
            "set": "vtc",
            "raw_filename": "file.rttm",
            "time_seek": 0,
            "recording_filename": "sound.wav",
            "range_onset": 0,
            "range_offset": 4000,
            "format": "vtc_rttm",
        }]),
        import_function=partial(fake_vocs, data),
    )

    sampler = PeriodicSampler(project, 500, 500, 250, recordings=['sound.wav'])
    sampler.sample()
    sampler.segments.to_csv('output/eaf/segments.csv')

    ranges = sampler.segments.rename(columns={
        "segment_onset": "range_onset",
        "segment_offset": "range_offset",
    })
    annotations = am.get_within_ranges(ranges, [IMP_FROM], 'warn')
    #annotations = am.annotations[am.annotations["set"] == IMP_FROM].drop_duplicates(['set', 'recording_filename', 'time_seek', 'range_onset', 'range_offset', 'raw_filename', 'format', 'filter'],ignore_index=True)
    annot_segments = am.get_segments(annotations)

    eaf_builder = EafBuilderPipeline()
    eaf_builder.run(
        destination='output/eaf',
        segments='output/eaf/segments.csv',
        eaf_type='periodic',
        template='basic',
        context_onset=250,
        context_offset=250,
        path='output/eaf',
        import_speech_from='vtc',
    )

    eaf = Eaf('output/eaf/sound/sound_periodic_basic.eaf')

    code = eaf.tiers['code_periodic'][0]
    segments = []

    for pid in code:
        (start_ts, end_ts, value, svg_ref) = code[pid]
        (start_t, end_t) = (eaf.timeslots[start_ts], eaf.timeslots[end_ts])
        segments.append({
            'segment_onset': int(start_t),
            'segment_offset': int(end_t)
        })

    segments = pd.DataFrame(segments)

    pd.testing.assert_frame_equal(
        segments[['segment_onset', 'segment_offset'
                  ]].sort_values(['segment_onset',
                                  'segment_offset']).reset_index(drop=True),
        sampler.segments[['segment_onset', 'segment_offset'
                          ]].sort_values(['segment_onset', 'segment_offset'
                                          ]).reset_index(drop=True))

    segments = []
    vtc_speech = eaf.tiers['VTC-SPEECH'][0]
    for pid in vtc_speech:
        (start_ts, end_ts, value, svg_ref) = vtc_speech[pid]
        (start_t, end_t) = (eaf.timeslots[start_ts], eaf.timeslots[end_ts])
        segments.append({
            'segment_onset': int(start_t),
            'segment_offset': int(end_t)
        })

    segments = pd.DataFrame(segments)

    speech_segs = annot_segments[pd.isnull(annot_segments['speaker_type'])]

    pd.testing.assert_frame_equal(
        segments[['segment_onset', 'segment_offset'
                  ]].sort_values(['segment_onset',
                                  'segment_offset']).reset_index(drop=True),
        speech_segs[['segment_onset', 'segment_offset'
                     ]].sort_values(['segment_onset',
                                     'segment_offset']).reset_index(drop=True))

    segments = []
    vtc_chi = eaf.tiers['VTC-CHI'][0]
    for pid in vtc_chi:
        (start_ts, end_ts, value, svg_ref) = vtc_chi[pid]
        (start_t, end_t) = (eaf.timeslots[start_ts], eaf.timeslots[end_ts])
        segments.append({
            'segment_onset': int(start_t),
            'segment_offset': int(end_t)
        })

    segments = pd.DataFrame(segments)

    chi_segs = annot_segments[annot_segments['speaker_type'] == 'CHI']

    pd.testing.assert_frame_equal(
        segments[['segment_onset', 'segment_offset'
                  ]].sort_values(['segment_onset',
                                  'segment_offset']).reset_index(drop=True),
        chi_segs[['segment_onset', 'segment_offset'
                  ]].sort_values(['segment_onset',
                                  'segment_offset']).reset_index(drop=True))

    segments = []
    vtc_och = eaf.tiers['VTC-OCH'][0]
    for pid in vtc_och:
        (start_ts, end_ts, value, svg_ref) = vtc_och[pid]
        (start_t, end_t) = (eaf.timeslots[start_ts], eaf.timeslots[end_ts])
        segments.append({
            'segment_onset': int(start_t),
            'segment_offset': int(end_t)
        })

    segments = pd.DataFrame(segments)

    och_segs = annot_segments[annot_segments['speaker_type'] == 'OCH']

    pd.testing.assert_frame_equal(
        segments[['segment_onset', 'segment_offset'
                  ]].sort_values(['segment_onset',
                                  'segment_offset']).reset_index(drop=True),
        och_segs[['segment_onset', 'segment_offset'
                  ]].sort_values(['segment_onset',
                                  'segment_offset']).reset_index(drop=True))

    segments = []
    vtc_fem = eaf.tiers['VTC-FEM'][0]
    for pid in vtc_fem:
        (start_ts, end_ts, value, svg_ref) = vtc_fem[pid]
        (start_t, end_t) = (eaf.timeslots[start_ts], eaf.timeslots[end_ts])
        segments.append({
            'segment_onset': int(start_t),
            'segment_offset': int(end_t)
        })

    segments = pd.DataFrame(segments)

    fem_segs = annot_segments[annot_segments['speaker_type'] == 'FEM']

    pd.testing.assert_frame_equal(
        segments[['segment_onset', 'segment_offset'
                  ]].sort_values(['segment_onset',
                                  'segment_offset']).reset_index(drop=True),
        fem_segs[['segment_onset', 'segment_offset'
                  ]].sort_values(['segment_onset',
                                  'segment_offset']).reset_index(drop=True))

    assert eaf.media_descriptors[0]['MEDIA_URL'] == 'sound.wav'
예제 #19
0
def test_merge(project):
    am = AnnotationManager(project)

    input_annotations = pd.read_csv("examples/valid_raw_data/annotations/input.csv")
    input_annotations = input_annotations[
        input_annotations["set"].isin(["vtc_rttm", "alice"])
    ]
    print(input_annotations)
    am.import_annotations(input_annotations)
    am.read()

    print(am.annotations)
    am.read()
    am.merge_sets(
        left_set="vtc_rttm",
        right_set="alice",
        left_columns=["speaker_type"],
        right_columns=["phonemes", "syllables", "words"],
        output_set="alice_vtc",
        full_set_merge = False,
        recording_filter = {'sound.wav'}
    )
    am.read()

    anns = am.annotations[am.annotations['set'] == 'alice_vtc']
    assert anns.shape[0] == 1
    assert anns.iloc[0]['recording_filename'] == 'sound.wav'
    
    time.sleep(2) #sleeping for 2 seconds to have different 'imported_at' values so that can make sure both merge did fine
    
    am.merge_sets(
        left_set="vtc_rttm",
        right_set="alice",
        left_columns=["speaker_type"],
        right_columns=["phonemes", "syllables", "words"],
        output_set="alice_vtc",
        full_set_merge = False,
        skip_existing = True
    )
    am.read()
    
    anns = am.annotations[am.annotations['set'] == 'alice_vtc']
    assert anns.shape[0] == 2
    assert set(anns['recording_filename'].unique()) == {'sound.wav','sound2.wav'}
    assert anns.iloc[0]['imported_at'] != anns.iloc[1]['imported_at']
    
    segments = am.get_segments(am.annotations[am.annotations["set"] == "alice_vtc"])
    vtc_segments = am.get_segments(am.annotations[am.annotations["set"] == "vtc_rttm"])
    assert segments.shape[0] == vtc_segments.shape[0]
    assert segments.shape[1] == vtc_segments.shape[1] + 3

    adult_segments = (
        segments[segments["speaker_type"].isin(["FEM", "MAL"])]
        .sort_values(["segment_onset", "segment_offset"])
        .reset_index(drop=True)
    )
    alice = (
        am.get_segments(am.annotations[am.annotations["set"] == "alice"])
        .sort_values(["segment_onset", "segment_offset"])
        .reset_index(drop=True)
    )

    pd.testing.assert_frame_equal(
        adult_segments[["phonemes", "syllables", "words"]],
        alice[["phonemes", "syllables", "words"]],
    )
예제 #20
0
import argparse
import os

parser = argparse.ArgumentParser(
    description='import and convert VTC annotations into the project')
parser.add_argument("--source", help="project path", required=True)
parser.add_argument("--overwrite",
                    help="project path",
                    dest='overwrite',
                    action='store_true')
args = parser.parse_args()

project = ChildProject(args.source)
am = AnnotationManager(project)

if args.overwrite:
    am.remove_set('vtc')

input = project.recordings[['filename']]
input.rename(columns={'filename': 'recording_filename'}, inplace=True)
input = input[input['recording_filename'] != 'NA']
input['set'] = 'vtc'
input['time_seek'] = 0
input['range_onset'] = 0
input['range_offset'] = 0
input['raw_filename'] = input['recording_filename'].apply(
    lambda s: os.path.join('vtc', s + '.rttm'))
input['format'] = 'vtc_rttm'

am.import_annotations(input)
예제 #21
0
#!/usr/bin/env python3
from ChildProject.projects import ChildProject
from ChildProject.annotations import AnnotationManager

import argparse
import os

parser = argparse.ArgumentParser(description='import and convert VTC annotations into the project')
parser.add_argument("--source", help = "project path", required = True)
parser.add_argument("--set", help = "annotation set. the rttm files should lie in <source>/annotations/<set>/raw/", default = 'vtc')
parser.add_argument("--overwrite", help = "project path", dest = 'overwrite', action = 'store_true')
args = parser.parse_args()

project = ChildProject(args.source)
am = AnnotationManager(project)

if args.overwrite:
    am.remove_set(args.set)

input = project.recordings[['recording_filename', 'duration']]
input = input[input['recording_filename'] != 'NA']
input['set'] = args.set
input['time_seek'] = 0
input['range_onset'] = 0
input['range_offset'] = input['duration']
input['raw_filename'] = input['recording_filename'].apply(lambda s: os.path.splitext(s)[0] + '.rttm')
input['format'] = 'vtc_rttm'

am.import_annotations(input, threads = 4)