def test_from_df(annotation): # Check that we can reconstruct an annotation from a Pandas # dataframe containing its tracks. column_names = [PYANNOTE_SEGMENT, PYANNOTE_TRACK, PYANNOTE_LABEL] df = pd.DataFrame.from_records( annotation.itertracks(True), columns=column_names) actual = Annotation.from_df(df) expected = annotation assert actual == expected
def read(self, path, uri=None, modality=None, **kwargs): """ Parameters ---------- path : str modality : str, optional Force all entries to be considered as coming from this modality. Only taken into account when file format does not provide any field related to modality (e.g. .seg files) """ # load whole file df = pandas.read_table(path, delim_whitespace=True, header=None, names=self.fields(), comment=self.comment(), converters=self.converters(), dtype={PYANNOTE_LABEL: object}) # remove comment lines # (i.e. lines for which all fields are either None or NaN) keep = [not all(pandas.isnull(item) for item in row[1:]) for row in df.itertuples()] df = df[keep] # add 'segment' column build from start time & duration df[PYANNOTE_SEGMENT] = [self.get_segment(row) for row in df.itertuples()] # add unique track numbers if they are not read from file if PYANNOTE_TRACK not in self.fields(): df[PYANNOTE_TRACK] = range(df.shape[0]) # add uri column in case it does not exist if PYANNOTE_URI not in df: if uri is None: raise ValueError('missing uri -- use uri=') df[PYANNOTE_URI] = uri # obtain list of resources uris = list(df[PYANNOTE_URI].unique()) # add modality column in case it does not exist if PYANNOTE_MODALITY not in df: if modality is None: raise ValueError('missing modality -- use modality=') df[PYANNOTE_MODALITY] = modality if modality is not None else "" # obtain list of modalities modalities = list(df[PYANNOTE_MODALITY].unique()) self._loaded = {} # loop on resources for uri in uris: # filter based on resource df_ = df[df[PYANNOTE_URI] == uri] # loop on modalities for modality in modalities: # filter based on modality modality = modality if modality is not None else "" df__ = df_[df_[PYANNOTE_MODALITY] == modality] a = Annotation.from_df(df__, modality=modality, uri=uri) self._loaded[uri, modality] = a return self
def read(self, path, uri=None, modality=None, **kwargs): """ Parameters ---------- path : str modality : str, optional Force all entries to be considered as coming from this modality. Only taken into account when file format does not provide any field related to modality (e.g. .seg files) """ # load whole file df = pandas.read_table(path, delim_whitespace=True, header=None, names=self.fields(), comment=self.comment(), converters=self.converters(), dtype={PYANNOTE_URI: object, PYANNOTE_LABEL: object}, keep_default_na=False, na_values=[]) # remove comment lines # (i.e. lines for which all fields are either None or NaN) keep = [not all(pandas.isnull(item) for item in row[1:]) for row in df.itertuples()] df = df[keep] # add 'segment' column build from start time & duration df[PYANNOTE_SEGMENT] = [self.get_segment(row) for row in df.itertuples()] # add unique track numbers if they are not read from file if PYANNOTE_TRACK not in self.fields(): df[PYANNOTE_TRACK] = range(df.shape[0]) # add uri column in case it does not exist if PYANNOTE_URI not in df: if uri is None: raise ValueError('missing uri -- use uri=') df[PYANNOTE_URI] = uri # obtain list of resources uris = list(df[PYANNOTE_URI].unique()) # add modality column in case it does not exist if PYANNOTE_MODALITY not in df: if modality is None: raise ValueError('missing modality -- use modality=') df[PYANNOTE_MODALITY] = modality if modality is not None else "" # obtain list of modalities modalities = list(df[PYANNOTE_MODALITY].unique()) self._loaded = {} # loop on resources for uri in uris: # filter based on resource df_ = df[df[PYANNOTE_URI] == uri] # loop on modalities for modality in modalities: # filter based on modality modality = modality if modality is not None else "" df__ = df_[df_[PYANNOTE_MODALITY] == modality] a = Annotation.from_df(df__, modality=modality, uri=uri) self._loaded[uri, modality] = a return self