X_text.append(s_) X_val = [ value for value in tqdm( sem_eval_dataset_dev.iterate_x(max_len=MAX_LEN, one_hot=True)) ] X_val_text = [] for value in tqdm( sem_eval_dataset_dev.iterate_x(max_len=MAX_LEN, one_hot=False)): splitted = value.split(' <eou> ') for s_ in splitted: s_ = s_ + ['_<PAD>_'] * (161 - len(s_)) X_val_text.append(s_) Y_val = [y for y in tqdm(sem_eval_dataset_dev.iterate_y(one_hot=True))] Y = [y for y in sem_eval_dataset.iterate_train_y(one_hot=True)] # val_data = ({'one_hot_input': np.array(X_val), # 'text_input': np.array(X_val_text)}, # {'output': np.array(Y_val)} # ) val_data = ({'text_input': np.array(X_val_text)}, {'output': np.array(Y_val)}) filepath = "_{}_{}_{}_{}_{}_{}_".format( 'LSTM' if RNN_TYPE == LSTM else 'GRU', 'X'.join(str(x) for x in LAYERS_SIZE), 'ATT' if ATTENTION else 'NO_ATT', 'DROPOUT_' + 'X'.join(str(x) for x in SPATIAL_DROPOUT), str(DENSE) if DENSE != 0 else 'NO_DENSE', str() if DROPOUT_DENSE else '') filepath = "/data/spc_{epoch:02d}_F1_{f1_micro_score:.4f}_catAcc_{val_categorical_accuracy:.4f}_trainable" + filepath + ".hdf5"
import env import itertools from dataset.dataset import FixedSplitDataSet from dataset.reader import CSVReader from preprocessor.ekhprasis import EkhprasisPreprocessor labels_map = {'happy': 'sentiment', 'sad': 'sentiment', 'angry': 'sentiment', 'others': 'nosentiment'} labels = ['sentiment', 'nosentiment'] ekhprasis_preprocessor = EkhprasisPreprocessor(verbose=1) sem_eval_dataset = FixedSplitDataSet( train_dataset=CSVReader(env.TRAIN_FILE_PATH, preprocessor=ekhprasis_preprocessor).read( sents_cols=['turn1', 'turn2', 'turn3'], label_col="label", merge_with=' eou '), test_dataset=CSVReader(env.DEV_FILE_PATH, preprocessor=ekhprasis_preprocessor).read( sents_cols=['turn1', 'turn2', 'turn3'], label_col="label", merge_with=' eou '), labels_map=labels_map ) with open('train.csv', 'w', encoding='utf8') as f: for xy in itertools.zip_longest(sem_eval_dataset.iterate_train_x(), sem_eval_dataset.iterate_train_y()): f.write(str(labels.index(xy[1])) + ',' + xy[0] + '\n') with open('val.csv', 'w', encoding='utf8') as f: for xy in itertools.zip_longest(sem_eval_dataset.iterate_test_x(), sem_eval_dataset.iterate_test_y()): f.write(str(labels.index(xy[1])) + ',' + xy[0] + '\n')