def from_csv(cls, file_path: str):
        data_df = pd.read_csv(file_path, usecols=['text'])

        seq_vocab = TokenDictionary()
        label_dict = Dictionary()

        max_seq_len = 0
        for i, row in data_df.iterrows():
            tokens = row['text'].split(' ')
            max_seq_len = max(max_seq_len, len(tokens))

            seq_vocab.add_items(tokens)
            labels = row['label'].split(' ')
            label_dict.add_items(labels)

        return cls(data_df, max_seq_len, seq_vocab, label_dict)
Ejemplo n.º 2
0
def get_label_dict(labels: List[str], delimiter=' '):
    label_dict = Dictionary()
    for label in labels:
        label = label.replace('\n', '')
        label_dict.add_items(label.split(delimiter))
    return label_dict