コード例 #1
0
        X_text.append(s_)

X_val = [
    value for value in tqdm(
        sem_eval_dataset_dev.iterate_x(max_len=MAX_LEN, one_hot=True))
]
X_val_text = []
for value in tqdm(
        sem_eval_dataset_dev.iterate_x(max_len=MAX_LEN, one_hot=False)):
    splitted = value.split(' <eou> ')
    for s_ in splitted:
        s_ = s_ + ['_<PAD>_'] * (161 - len(s_))
        X_val_text.append(s_)

Y_val = [y for y in tqdm(sem_eval_dataset_dev.iterate_y(one_hot=True))]
Y = [y for y in sem_eval_dataset.iterate_train_y(one_hot=True)]

# val_data = ({'one_hot_input': np.array(X_val),
#              'text_input': np.array(X_val_text)},
#             {'output': np.array(Y_val)}
#             )
val_data = ({'text_input': np.array(X_val_text)}, {'output': np.array(Y_val)})

filepath = "_{}_{}_{}_{}_{}_{}_".format(
    'LSTM' if RNN_TYPE == LSTM else 'GRU',
    'X'.join(str(x) for x in LAYERS_SIZE), 'ATT' if ATTENTION else 'NO_ATT',
    'DROPOUT_' + 'X'.join(str(x) for x in SPATIAL_DROPOUT),
    str(DENSE) if DENSE != 0 else 'NO_DENSE',
    str() if DROPOUT_DENSE else '')
filepath = "/data/spc_{epoch:02d}_F1_{f1_micro_score:.4f}_catAcc_{val_categorical_accuracy:.4f}_trainable" + filepath + ".hdf5"
コード例 #2
0
import env
import itertools
from dataset.dataset import FixedSplitDataSet
from dataset.reader import CSVReader
from preprocessor.ekhprasis import EkhprasisPreprocessor

labels_map = {'happy': 'sentiment',
              'sad': 'sentiment',
              'angry': 'sentiment',
              'others': 'nosentiment'}
labels = ['sentiment', 'nosentiment']

ekhprasis_preprocessor = EkhprasisPreprocessor(verbose=1)

sem_eval_dataset = FixedSplitDataSet(
    train_dataset=CSVReader(env.TRAIN_FILE_PATH, preprocessor=ekhprasis_preprocessor).read(
        sents_cols=['turn1', 'turn2', 'turn3'], label_col="label", merge_with=' eou '),
    test_dataset=CSVReader(env.DEV_FILE_PATH, preprocessor=ekhprasis_preprocessor).read(
        sents_cols=['turn1', 'turn2', 'turn3'], label_col="label", merge_with=' eou '),
    labels_map=labels_map
)


with open('train.csv', 'w', encoding='utf8') as f:
    for xy in itertools.zip_longest(sem_eval_dataset.iterate_train_x(), sem_eval_dataset.iterate_train_y()):
        f.write(str(labels.index(xy[1])) + ',' + xy[0] + '\n')


with open('val.csv', 'w', encoding='utf8') as f:
    for xy in itertools.zip_longest(sem_eval_dataset.iterate_test_x(), sem_eval_dataset.iterate_test_y()):
        f.write(str(labels.index(xy[1])) + ',' + xy[0] + '\n')