コード例 #1
0
from keras import Input, Sequential
import tensorflow as tf
from keras.layers import Lambda, Bidirectional, LSTM, Dense

import env
from ai.layer import WeightedConcatLayer
from dataset.dataset import SimpleDataSet
from dataset.reader import CSVReader
from feature.pretrained import GensimPretrainedFeature
from preprocessor.ekhprasis import EkhprasisPreprocessor

ekhprasis_preprocessor = EkhprasisPreprocessor(verbose=1)

sem_eval_dataset_dev = SimpleDataSet(
    dataset=CSVReader(env.DEV_FILE_PATH, preprocessor=ekhprasis_preprocessor
                      ).read(sents_cols=['turn1', 'turn2', 'turn3'],
                             label_col="label",
                             merge_with=' <eou> '))

input = Input(shape=(161, ), name='one_hot_input')

glove_features: GensimPretrainedFeature = GensimPretrainedFeature(
    word2index=sem_eval_dataset_dev.word2index,
    input=input,
    gensim_pretrained_embedding='glove-twitter-25',
    embedding_vector_length=25,
    max_len=161)

model = Sequential()

model.add(input)
model.add(glove_features.embedding_layer(trainable=False))
コード例 #2
0
import env
from dataset.dataset import FixedSplitDataSet
from dataset.reader import CSVReader

sem_eval_dataset = FixedSplitDataSet(
    train_dataset=CSVReader(env.TRAIN_TAGS_FILE_PATH_,
                            header=None).read(sents_cols=None,
                                              label_col=None,
                                              merge_with=' <eou> '),
    test_dataset=CSVReader(env.DEV_TAGS_FILE_PATH_,
                           header=None).read(sents_cols=None,
                                             label_col=None,
                                             merge_with=' <eou> '))

dict_ = {}
for sentence in sem_eval_dataset.iterate_x():
    for word in sentence.split(' '):
        dict_.setdefault(word, 0)
        dict_[word] = dict_[word] + 1

sorted_by_value = sorted(dict_.items(), key=lambda kv: kv[1])
コード例 #3
0
#########################

labels_map = {
    'happy': 'sentiment',
    'sad': 'sentiment',
    'angry': 'sentiment',
    'others': 'nosentiment'
}

ekhprasis_preprocessor = EkhprasisPreprocessor(verbose=1)

## old
sem_eval_dataset_spc = FixedSplitDataSet(
    train_dataset=CSVReader(env.TRAIN_FILE_PATH,
                            preprocessor=ekhprasis_preprocessor).read(
                                sents_cols=['turn1', 'turn2', 'turn3'],
                                label_col="label",
                                merge_with=' <eou> '),
    test_dataset=CSVReader(env.DEV_FILE_PATH,
                           preprocessor=ekhprasis_preprocessor).read(
                               sents_cols=['turn1', 'turn2', 'turn3'],
                               label_col="label",
                               merge_with=' <eou> '),
    skip_labels=['others'])

sem_eval_dataset_sent = FixedSplitDataSet(
    train_dataset=CSVReader(env.TRAIN_FILE_PATH,
                            preprocessor=EkhprasisPreprocessor(
                                verbose=1,
                                dicts=[others, emoticons_original_old])).read(
                                    sents_cols=['turn1', 'turn2', 'turn3'],
コード例 #4
0
BATCH_SIZE = 64
EPOCHS = 25

labels_map = {
    'happy': 'sentiment',
    'sad': 'sentiment',
    'angry': 'sentiment',
    'others': 'nosentiment'
}

ekhprasis_preprocessor = EkhprasisPreprocessor(verbose=1)

sem_eval_dataset = FixedSplitDataSet(
    train_dataset=CSVReader(env.TRAIN_FILE_PATH,
                            preprocessor=ekhprasis_preprocessor).read(
                                sents_cols=['turn1', 'turn2', 'turn3'],
                                label_col="label",
                                merge_with=' <eou> '),
    test_dataset=CSVReader(env.DEV_FILE_PATH,
                           preprocessor=ekhprasis_preprocessor).read(
                               sents_cols=['turn1', 'turn2', 'turn3'],
                               label_col="label",
                               merge_with=' <eou> '),
    labels_map=labels_map)

sem_eval_dataset_dev = SimpleDataSet(dataset=CSVReader(
    env.DEV_FILE_PATH, preprocessor=ekhprasis_preprocessor).read(
        sents_cols=['turn1', 'turn2', 'turn3'],
        label_col="label",
        merge_with=' <eou> '),
                                     labels_map=labels_map,
コード例 #5
0
EPOCHS = 25

labels_map = {
    'happy': 'sentiment',
    'sad': 'sentiment',
    'angry': 'sentiment',
    'others': 'nosentiment'
}

ekhprasis_preprocessor = EkhprasisPreprocessor(verbose=1)
simple_preprocessor = SimplePreprocessor()

sem_eval_dataset_all = FixedSplitDataSet(
    train_dataset=CSVReader('/data/dataset/all.csv',
                            preprocessor=simple_preprocessor,
                            header=None).read(sents_cols=None,
                                              label_col=None,
                                              merge_with=' <eou> '),
    test_dataset=CSVReader(env.DEV_FILE_PATH_BL,
                           preprocessor=simple_preprocessor,
                           header=None).read(sents_cols=None,
                                             label_col=None,
                                             merge_with=' <eou> '))

sem_eval_dataset = FixedSplitDataSet(
    train_dataset=CSVReader(env.DEV_FILE_PATH_BL,
                            preprocessor=simple_preprocessor,
                            header=None).read(sents_cols=None,
                                              label_col=None,
                                              merge_with=' <eou> '),
    test_dataset=CSVReader(env.TEST_FILE_PATH_BL,
コード例 #6
0
import env
import itertools
from dataset.dataset import FixedSplitDataSet
from dataset.reader import CSVReader
from preprocessor.ekhprasis import EkhprasisPreprocessor

labels_map = {'happy': 'sentiment',
              'sad': 'sentiment',
              'angry': 'sentiment',
              'others': 'nosentiment'}
labels = ['sentiment', 'nosentiment']

ekhprasis_preprocessor = EkhprasisPreprocessor(verbose=1)

sem_eval_dataset = FixedSplitDataSet(
    train_dataset=CSVReader(env.TRAIN_FILE_PATH, preprocessor=ekhprasis_preprocessor).read(
        sents_cols=['turn1', 'turn2', 'turn3'], label_col="label", merge_with=' eou '),
    test_dataset=CSVReader(env.DEV_FILE_PATH, preprocessor=ekhprasis_preprocessor).read(
        sents_cols=['turn1', 'turn2', 'turn3'], label_col="label", merge_with=' eou '),
    labels_map=labels_map
)


with open('train.csv', 'w', encoding='utf8') as f:
    for xy in itertools.zip_longest(sem_eval_dataset.iterate_train_x(), sem_eval_dataset.iterate_train_y()):
        f.write(str(labels.index(xy[1])) + ',' + xy[0] + '\n')


with open('val.csv', 'w', encoding='utf8') as f:
    for xy in itertools.zip_longest(sem_eval_dataset.iterate_test_x(), sem_eval_dataset.iterate_test_y()):
        f.write(str(labels.index(xy[1])) + ',' + xy[0] + '\n')