def remove_punctuation(reader, inst, punctuation=PUNCTUATION):
    words, labels = list(
        zip(*[(word, label) for word, label in zip(
            inst.fields['sentence'],
            inst.fields['labels'].labels,
        ) if word.text not in punctuation]))
    sentence = TextField(words, reader.token_indexers)
    label_field = SequenceLabelField(labels=labels, sequence_field=sentence)
    inst_out = Instance({"sentence": sentence, 'labels': label_field})
    if hasattr(inst, 'index'):
        inst_out.index = inst.index
    return inst_out
def text_to_instance(sentence: List[str],
                     tags: List[str] = None,
                     idx: str = None,
                     token_indexers=None) -> Instance:
    token_indexers = token_indexers or {"tokens": SingleIdTokenIndexer()}
    tokens = [Token(word) for word in sentence]
    sentence_field = TextField(tokens, token_indexers)
    fields = {"sentence": sentence_field}

    if tags:
        label_field = SequenceLabelField(labels=tags,
                                         sequence_field=sentence_field)
        fields["labels"] = label_field
    inst = Instance(fields)
    inst.index = idx
    return inst
    def text_to_instance(  # type: ignore
            self,
            tokens: List[Token],
            tags: List[str] = None,
            idx: str = None) -> Instance:
        """
        We take `pre-tokenized` input here, because we don't have a tokenizer in this class.
        """

        fields: Dict[str, Field] = {}
        sequence = TextField(tokens, self._token_indexers)
        fields["tokens"] = sequence
        fields["metadata"] = MetadataField({"words": [x.text for x in tokens]})
        if tags is not None:
            fields["tags"] = SequenceLabelField(tags, sequence)
        inst = Instance(fields)
        inst.index = idx
        return inst