from keras import Input, Sequential import tensorflow as tf from keras.layers import Lambda, Bidirectional, LSTM, Dense import env from ai.layer import WeightedConcatLayer from dataset.dataset import SimpleDataSet from dataset.reader import CSVReader from feature.pretrained import GensimPretrainedFeature from preprocessor.ekhprasis import EkhprasisPreprocessor ekhprasis_preprocessor = EkhprasisPreprocessor(verbose=1) sem_eval_dataset_dev = SimpleDataSet( dataset=CSVReader(env.DEV_FILE_PATH, preprocessor=ekhprasis_preprocessor ).read(sents_cols=['turn1', 'turn2', 'turn3'], label_col="label", merge_with=' <eou> ')) input = Input(shape=(161, ), name='one_hot_input') glove_features: GensimPretrainedFeature = GensimPretrainedFeature( word2index=sem_eval_dataset_dev.word2index, input=input, gensim_pretrained_embedding='glove-twitter-25', embedding_vector_length=25, max_len=161) model = Sequential() model.add(input) model.add(glove_features.embedding_layer(trainable=False))
import env from dataset.dataset import FixedSplitDataSet from dataset.reader import CSVReader sem_eval_dataset = FixedSplitDataSet( train_dataset=CSVReader(env.TRAIN_TAGS_FILE_PATH_, header=None).read(sents_cols=None, label_col=None, merge_with=' <eou> '), test_dataset=CSVReader(env.DEV_TAGS_FILE_PATH_, header=None).read(sents_cols=None, label_col=None, merge_with=' <eou> ')) dict_ = {} for sentence in sem_eval_dataset.iterate_x(): for word in sentence.split(' '): dict_.setdefault(word, 0) dict_[word] = dict_[word] + 1 sorted_by_value = sorted(dict_.items(), key=lambda kv: kv[1])
######################### labels_map = { 'happy': 'sentiment', 'sad': 'sentiment', 'angry': 'sentiment', 'others': 'nosentiment' } ekhprasis_preprocessor = EkhprasisPreprocessor(verbose=1) ## old sem_eval_dataset_spc = FixedSplitDataSet( train_dataset=CSVReader(env.TRAIN_FILE_PATH, preprocessor=ekhprasis_preprocessor).read( sents_cols=['turn1', 'turn2', 'turn3'], label_col="label", merge_with=' <eou> '), test_dataset=CSVReader(env.DEV_FILE_PATH, preprocessor=ekhprasis_preprocessor).read( sents_cols=['turn1', 'turn2', 'turn3'], label_col="label", merge_with=' <eou> '), skip_labels=['others']) sem_eval_dataset_sent = FixedSplitDataSet( train_dataset=CSVReader(env.TRAIN_FILE_PATH, preprocessor=EkhprasisPreprocessor( verbose=1, dicts=[others, emoticons_original_old])).read( sents_cols=['turn1', 'turn2', 'turn3'],
BATCH_SIZE = 64 EPOCHS = 25 labels_map = { 'happy': 'sentiment', 'sad': 'sentiment', 'angry': 'sentiment', 'others': 'nosentiment' } ekhprasis_preprocessor = EkhprasisPreprocessor(verbose=1) sem_eval_dataset = FixedSplitDataSet( train_dataset=CSVReader(env.TRAIN_FILE_PATH, preprocessor=ekhprasis_preprocessor).read( sents_cols=['turn1', 'turn2', 'turn3'], label_col="label", merge_with=' <eou> '), test_dataset=CSVReader(env.DEV_FILE_PATH, preprocessor=ekhprasis_preprocessor).read( sents_cols=['turn1', 'turn2', 'turn3'], label_col="label", merge_with=' <eou> '), labels_map=labels_map) sem_eval_dataset_dev = SimpleDataSet(dataset=CSVReader( env.DEV_FILE_PATH, preprocessor=ekhprasis_preprocessor).read( sents_cols=['turn1', 'turn2', 'turn3'], label_col="label", merge_with=' <eou> '), labels_map=labels_map,
EPOCHS = 25 labels_map = { 'happy': 'sentiment', 'sad': 'sentiment', 'angry': 'sentiment', 'others': 'nosentiment' } ekhprasis_preprocessor = EkhprasisPreprocessor(verbose=1) simple_preprocessor = SimplePreprocessor() sem_eval_dataset_all = FixedSplitDataSet( train_dataset=CSVReader('/data/dataset/all.csv', preprocessor=simple_preprocessor, header=None).read(sents_cols=None, label_col=None, merge_with=' <eou> '), test_dataset=CSVReader(env.DEV_FILE_PATH_BL, preprocessor=simple_preprocessor, header=None).read(sents_cols=None, label_col=None, merge_with=' <eou> ')) sem_eval_dataset = FixedSplitDataSet( train_dataset=CSVReader(env.DEV_FILE_PATH_BL, preprocessor=simple_preprocessor, header=None).read(sents_cols=None, label_col=None, merge_with=' <eou> '), test_dataset=CSVReader(env.TEST_FILE_PATH_BL,
import env import itertools from dataset.dataset import FixedSplitDataSet from dataset.reader import CSVReader from preprocessor.ekhprasis import EkhprasisPreprocessor labels_map = {'happy': 'sentiment', 'sad': 'sentiment', 'angry': 'sentiment', 'others': 'nosentiment'} labels = ['sentiment', 'nosentiment'] ekhprasis_preprocessor = EkhprasisPreprocessor(verbose=1) sem_eval_dataset = FixedSplitDataSet( train_dataset=CSVReader(env.TRAIN_FILE_PATH, preprocessor=ekhprasis_preprocessor).read( sents_cols=['turn1', 'turn2', 'turn3'], label_col="label", merge_with=' eou '), test_dataset=CSVReader(env.DEV_FILE_PATH, preprocessor=ekhprasis_preprocessor).read( sents_cols=['turn1', 'turn2', 'turn3'], label_col="label", merge_with=' eou '), labels_map=labels_map ) with open('train.csv', 'w', encoding='utf8') as f: for xy in itertools.zip_longest(sem_eval_dataset.iterate_train_x(), sem_eval_dataset.iterate_train_y()): f.write(str(labels.index(xy[1])) + ',' + xy[0] + '\n') with open('val.csv', 'w', encoding='utf8') as f: for xy in itertools.zip_longest(sem_eval_dataset.iterate_test_x(), sem_eval_dataset.iterate_test_y()): f.write(str(labels.index(xy[1])) + ',' + xy[0] + '\n')