def __init__(self, labels: Union[List[str], List[int]], sequence_field: SequenceField, label_namespace: str = 'labels') -> None: self.labels = labels self.sequence_field = sequence_field self._label_namespace = label_namespace self._indexed_labels = None if not (self._label_namespace.endswith("tags") or self._label_namespace.endswith("labels")): logger.warning("Your sequence label namespace was '%s'. We recommend you use a namespace " "ending with 'tags' or 'labels', so we don't add UNK and PAD tokens by " "default to your vocabulary. See documentation for " "`non_padded_namespaces` parameter in Vocabulary.", self._label_namespace) if len(labels) != sequence_field.sequence_length(): raise ConfigurationError("Label length and sequence length " "don't match: %d and %d" % (len(labels), sequence_field.sequence_length())) if all([isinstance(x, int) for x in labels]): self._indexed_labels = labels elif not all([isinstance(x, str) for x in labels]): raise ConfigurationError("SequenceLabelFields must be passed either all " "strings or all ints. Found labels {} with " "types: {}.".format(labels, [type(x) for x in labels]))
def __init__( self, labels: Union[List[str], List[int]], sequence_field: SequenceField, label_namespace: str = "labels", ) -> None: self.labels = labels self.sequence_field = sequence_field self._label_namespace = label_namespace self._indexed_labels = None self._maybe_warn_for_namespace(label_namespace) if len(labels) != sequence_field.sequence_length(): raise ConfigurationError( "Label length and sequence length " "don't match: %d and %d" % (len(labels), sequence_field.sequence_length())) self._skip_indexing = False if all(isinstance(x, int) for x in labels): self._indexed_labels = labels self._skip_indexing = True elif not all(isinstance(x, str) for x in labels): raise ConfigurationError( "SequenceLabelFields must be passed either all " "strings or all ints. Found labels {} with " "types: {}.".format(labels, [type(x) for x in labels]))
def __init__( self, indices: List[Tuple[int, int]], sequence_field1: SequenceField, sequence_field2: SequenceField, labels: List[str] = None, label_namespace: str = "labels", padding_value: int = -1, ) -> None: self.indices = indices self.labels = labels self.sequence_field1 = sequence_field1 self.sequence_field2 = sequence_field2 self._label_namespace = label_namespace self._padding_value = padding_value self._indexed_labels: Optional[List[int]] = None self._maybe_warn_for_namespace(label_namespace) num_rows = sequence_field1.sequence_length() num_columns = sequence_field2.sequence_length() if len(set(indices)) != len(indices): raise ConfigurationError( f"Indices must be unique, but found {indices}") if not all(0 <= index[1] < num_columns and 0 <= index[0] < num_rows for index in indices): raise ConfigurationError( f"Label indices and sequence length " f"are incompatible: {indices} and {num_rows}, {num_columns}") if labels is not None and len(indices) != len(labels): raise ConfigurationError( f"Labelled indices were passed, but their lengths do not match: " f" {labels}, {indices}")
def __init__(self, labels: List[Dict], sequence_field: SequenceField, ontology: set) -> None: # sort ontology self.ontology = sorted(list(set(ontology))) self.ontology_to_idx = {} self.idx_to_ontology = {} for i, k in enumerate(self.ontology): self.ontology_to_idx[k] = i self.idx_to_ontology[i] = k self.labels = [None for i in range(sequence_field.sequence_length())] self.masks = [None for i in range(sequence_field.sequence_length())] for lab_idx, label_dict in enumerate(labels): mask_vector = np.zeros((len(self.ontology))) label_vector = np.zeros((len(self.ontology))) for k, v in label_dict.items(): # skip everything that isn't in the ontology if k not in self.ontology: continue k_idx = self.ontology_to_idx[k] value = v['value'] confidence = v['confidence'] label_vector[k_idx] = value mask_vector[k_idx] = confidence self.labels[lab_idx] = label_vector self.masks[lab_idx] = mask_vector self.sequence_field = sequence_field if len(labels) != sequence_field.sequence_length(): raise ConfigurationError( "Label length and sequence length " "don't match: %d and %d" % (len(labels), sequence_field.sequence_length()))
def __init__( self, labels: Sequence[Sequence[Union[str, int]]], sequence_field: SequenceField, label_namespace: str = "labels", skip_indexing: bool = False, num_labels: Optional[int] = None, ) -> None: self.labels = labels self.sequence_field = sequence_field self._label_namespace = label_namespace self._indexed_labels = None self._label_ids = None self._maybe_warn_for_namespace(label_namespace) self._num_labels = num_labels if len(labels) != sequence_field.sequence_length(): raise ConfigurationError( "Label length and sequence length " "don't match: %d and %d" % (len(labels), sequence_field.sequence_length())) self._skip_indexing = False for label_list in labels: if all(isinstance(x, int) for x in label_list): self._indexed_labels = labels self._skip_indexing = True elif not all(isinstance(x, str) for x in label_list): raise ConfigurationError( "SequenceLabelFields must be passed either all " "strings or all ints. Found labels {} with " "types: {}.".format(label_list, [type(x) for x in label_list])) if skip_indexing and self.labels: if not all(isinstance(label, int) for label in labels): raise ConfigurationError( "In order to skip indexing, your labels must be integers. " "Found labels = {}".format(labels)) if not num_labels: raise ConfigurationError( "In order to skip indexing, num_labels can't be None.") if not all(cast(int, label) < num_labels for label in labels): raise ConfigurationError( "All labels should be < num_labels. " "Found num_labels = {} and labels = {} ".format( num_labels, labels)) self._label_ids = labels else: for label_list in labels: if not all(isinstance(label, str) for label in label_list): raise ConfigurationError( "SequenceMultiLabelFields expects string labels if skip_indexing=False. " "Found labels: {}".format(labels))
def __init__(self, indices: List[Tuple[int, int, int]], sequence_field: SequenceField, labels: List[str] = None, label_namespace: str = 'labels', padding_value: int = -1) -> None: self.indices = indices self.labels = labels self.sequence_field = sequence_field self._label_namespace = label_namespace self._padding_value = padding_value self._indexed_labels: List[int] = None self._maybe_warn_for_namespace(label_namespace) field_length = sequence_field.sequence_length() if len(set(indices)) != len(indices): raise ConfigurationError( f"Indices must be unique, but found {indices}") if not all([ 0 <= index[1] < field_length and 0 <= index[0] < field_length for index in indices ]): raise ConfigurationError( f"Label indices and sequence length " f"are incompatible: {indices} and {field_length}") if labels is not None and len(indices) != len(labels): raise ConfigurationError( f"Labelled indices were passed, but their lengths do not match: " f" {labels}, {indices}")
def __init__(self, labels: Union[List[str], List[int]], sequence_field: SequenceField, label_namespace: str = 'labels') -> None: self.labels = labels self.sequence_field = sequence_field self._label_namespace = label_namespace self._indexed_labels = None self._maybe_warn_for_namespace(label_namespace) if len(labels) != sequence_field.sequence_length(): raise ConfigurationError("Label length and sequence length " "don't match: %d and %d" % (len(labels), sequence_field.sequence_length())) if all([isinstance(x, int) for x in labels]): self._indexed_labels = labels elif not all([isinstance(x, str) for x in labels]): raise ConfigurationError("SequenceLabelFields must be passed either all " "strings or all ints. Found labels {} with " "types: {}.".format(labels, [type(x) for x in labels]))
def __init__(self, tags: List[str], sequence_field: SequenceField, tag_namespace: str = 'tags') -> None: self._tags = tags self._sequence_field = sequence_field self._tag_namespace = tag_namespace self._indexed_tags = None # type: Optional[List[int]] self._num_tags = None # type: Optional[int] if not self._tag_namespace.endswith("tags"): logger.warning( "Your tag namespace was '%s'. We recommend you use a namespace " "ending with 'tags', so we don't add UNK and PAD tokens by " "default to your vocabulary. See documentation for " "`non_padded_namespaces` parameter in Vocabulary.", self._tag_namespace) if len(tags) != sequence_field.sequence_length(): raise ConfigurationError( "Tag length and sequence length " "don't match: %d and %d" % (len(tags), sequence_field.sequence_length()))
def __init__( self, labels: Sequence[Sequence[Union[str, int]]], sequence_field: SequenceField, label_namespace: str = "labels", skip_indexing: bool = False, num_labels: Optional[int] = None, ) -> None: self.labels = labels self._sequence_field = sequence_field self._label_namespace = label_namespace self._label_ids = None self._maybe_warn_for_namespace(label_namespace) self._num_labels = num_labels if len(labels) != sequence_field.sequence_length(): raise ConfigurationError( "Label length and sequence length " "don't match: %d and %d" % (len(labels), sequence_field.sequence_length())) if not all(isinstance(label_list, list) for label_list in labels): raise ConfigurationError( "SequenceMultiLabelFields expects a list-of-lists where each sublist contains the labels. " "Found labels: {}".format(labels)) self._skip_indexing = False for label_list in labels: if all([isinstance(label, int) for label in label_list]): self._label_ids = labels self._skip_indexing = True if self._skip_indexing == False: for label_list in labels: if not all(isinstance(label, str) for label in label_list): raise ConfigurationError( "SequenceMultiLabelFields expects string labels if skip_indexing=False." "Found labels: {}".format(labels))