def test_intersect(project): am = AnnotationManager(project) input_annotations = pd.read_csv("examples/valid_raw_data/annotations/intersect.csv") am.import_annotations(input_annotations) intersection = AnnotationManager.intersection( am.annotations[am.annotations["set"].isin(["textgrid", "vtc_rttm"])] ).convert_dtypes() a = intersection[intersection["set"] == "textgrid"] b = intersection[intersection["set"] == "vtc_rttm"] columns = a.columns.tolist() columns.remove("imported_at") columns.remove("package_version") columns.remove("merged_from") pd.testing.assert_frame_equal( standardize_dataframe(a, columns), standardize_dataframe( pd.read_csv("tests/truth/intersect_a.csv"), columns ).convert_dtypes(), ) pd.testing.assert_frame_equal( standardize_dataframe(b, columns), standardize_dataframe( pd.read_csv("tests/truth/intersect_b.csv"), columns ).convert_dtypes(), )
def test_intersect(project): am = AnnotationManager(project) input_annotations = pd.read_csv( 'examples/valid_raw_data/raw_annotations/intersect.csv') am.import_annotations(input_annotations) am.read() a, b = am.intersection(am.annotations[am.annotations['set'] == 'textgrid'], am.annotations[am.annotations['set'] == 'vtc_rttm']) pd.testing.assert_frame_equal( a.sort_index(axis=1).sort_values(a.columns.tolist()).reset_index( drop=True).drop(columns=['imported_at']), pd.read_csv('tests/truth/intersect_a.csv').sort_index( axis=1).sort_values(a.columns.tolist()).reset_index( drop=True).drop(columns=['imported_at'])) pd.testing.assert_frame_equal( b.sort_index(axis=1).sort_values(b.columns.tolist()).reset_index( drop=True).drop(columns=['imported_at']), pd.read_csv('tests/truth/intersect_b.csv').sort_index( axis=1).sort_values(b.columns.tolist()).reset_index( drop=True).drop(columns=['imported_at']))
import argparse parser = argparse.ArgumentParser( description= 'compute agreement measures for all given annotators over a whole dataset') parser.add_argument('path', help='path to the dataset') parser.add_argument('--sets', nargs='+', help='sets to include') args = parser.parse_args() speakers = ['CHI', 'OCH', 'FEM', 'MAL'] project = ChildProject(args.path) am = AnnotationManager(project) am.read() intersection = AnnotationManager.intersection(am.annotations, args.sets) segments = am.get_collapsed_segments(intersection) segments = segments[segments['speaker_type'].isin(speakers)] vectors = [ grid_to_vector( segments_to_grid(segments[segments['set'] == s], 0, segments['segment_offset'].max(), 100, 'speaker_type', speakers, none=False), speakers) for s in args.sets ]