labels_map = { 'happy': 'sentiment', 'sad': 'sentiment', 'angry': 'sentiment', 'others': 'nosentiment' } ekhprasis_preprocessor = EkhprasisPreprocessor(verbose=1) ## old sem_eval_dataset_spc = FixedSplitDataSet( train_dataset=CSVReader(env.TRAIN_FILE_PATH, preprocessor=ekhprasis_preprocessor).read( sents_cols=['turn1', 'turn2', 'turn3'], label_col="label", merge_with=' <eou> '), test_dataset=CSVReader(env.DEV_FILE_PATH, preprocessor=ekhprasis_preprocessor).read( sents_cols=['turn1', 'turn2', 'turn3'], label_col="label", merge_with=' <eou> '), skip_labels=['others']) sem_eval_dataset_sent = FixedSplitDataSet( train_dataset=CSVReader(env.TRAIN_FILE_PATH, preprocessor=EkhprasisPreprocessor( verbose=1, dicts=[others, emoticons_original_old])).read( sents_cols=['turn1', 'turn2', 'turn3'], label_col="label", merge_with=' <eou> '), test_dataset=CSVReader(env.DEV_FILE_PATH,
import env from dataset.dataset import FixedSplitDataSet from dataset.reader import CSVReader sem_eval_dataset = FixedSplitDataSet( train_dataset=CSVReader(env.TRAIN_TAGS_FILE_PATH_, header=None).read(sents_cols=None, label_col=None, merge_with=' <eou> '), test_dataset=CSVReader(env.DEV_TAGS_FILE_PATH_, header=None).read(sents_cols=None, label_col=None, merge_with=' <eou> ')) dict_ = {} for sentence in sem_eval_dataset.iterate_x(): for word in sentence.split(' '): dict_.setdefault(word, 0) dict_[word] = dict_[word] + 1 sorted_by_value = sorted(dict_.items(), key=lambda kv: kv[1])
labels_map = { 'happy': 'sentiment', 'sad': 'sentiment', 'angry': 'sentiment', 'others': 'nosentiment' } ekhprasis_preprocessor = EkhprasisPreprocessor(verbose=1) sem_eval_dataset = FixedSplitDataSet( train_dataset=CSVReader(env.TRAIN_FILE_PATH, preprocessor=ekhprasis_preprocessor).read( sents_cols=['turn1', 'turn2', 'turn3'], label_col="label", merge_with=' <eou> '), test_dataset=CSVReader(env.DEV_FILE_PATH, preprocessor=ekhprasis_preprocessor).read( sents_cols=['turn1', 'turn2', 'turn3'], label_col="label", merge_with=' <eou> '), labels_map=labels_map) # Check the itos file exists if not os.path.exists(itos_filename): print("Could not find " + itos_filename) exit(-1) # Check the classifier file exists if not os.path.exists(trained_classifier_filename): print("Could not find " + trained_classifier_filename) exit(-1)
labels_map = { 'happy': 'sentiment', 'sad': 'sentiment', 'angry': 'sentiment', 'others': 'nosentiment' } ekhprasis_preprocessor = EkhprasisPreprocessor(verbose=1) sem_eval_dataset = FixedSplitDataSet( train_dataset=CSVReader(env.TRAIN_FILE_PATH, preprocessor=ekhprasis_preprocessor).read( sents_cols=['turn1', 'turn2', 'turn3'], label_col="label", merge_with=' <eou> '), test_dataset=CSVReader(env.DEV_FILE_PATH, preprocessor=ekhprasis_preprocessor).read( sents_cols=['turn1', 'turn2', 'turn3'], label_col="label", merge_with=' <eou> '), labels_map=labels_map) sem_eval_dataset_dev = SimpleDataSet(dataset=CSVReader( env.DEV_FILE_PATH, preprocessor=ekhprasis_preprocessor).read( sents_cols=['turn1', 'turn2', 'turn3'], label_col="label", merge_with=' <eou> '), labels_map=labels_map, balancing='downsample') sem_eval_dataset_dev.word2index = sem_eval_dataset.word2index
import env from dataset.dataset import FixedSplitDataSet from dataset.reader import CSVReader from feature.glove import GensimPretrainedFeature from preprocessor.ekhprasis import EkhprasisPreprocessor ekhprasis_preprocessor = EkhprasisPreprocessor(verbose=1) SemEvalDataSet = FixedSplitDataSet( train_dataset=CSVReader(env.TRAIN_FILE_PATH, preprocessor=ekhprasis_preprocessor).read( sents_cols=['turn1', 'turn2', 'turn3'], label_col="label", merge_with=' <eou> '), test_dataset=CSVReader(env.DEV_FILE_PATH, preprocessor=ekhprasis_preprocessor).read( sents_cols=['turn1', 'turn2', 'turn3'], label_col="label", merge_with=' <eou> ')) simple_feature = GensimPretrainedFeature("SimpleFeature") features = simple_feature.transform(SemEvalDataSet, )
labels_map = { 'happy': 'sentiment', 'sad': 'sentiment', 'angry': 'sentiment', 'others': 'nosentiment' } ekhprasis_preprocessor = EkhprasisPreprocessor(verbose=1) simple_preprocessor = SimplePreprocessor() sem_eval_dataset_all = FixedSplitDataSet( train_dataset=CSVReader('/data/dataset/all.csv', preprocessor=simple_preprocessor, header=None).read(sents_cols=None, label_col=None, merge_with=' <eou> '), test_dataset=CSVReader(env.DEV_FILE_PATH_BL, preprocessor=simple_preprocessor, header=None).read(sents_cols=None, label_col=None, merge_with=' <eou> ')) sem_eval_dataset = FixedSplitDataSet( train_dataset=CSVReader(env.DEV_FILE_PATH_BL, preprocessor=simple_preprocessor, header=None).read(sents_cols=None, label_col=None, merge_with=' <eou> '), test_dataset=CSVReader(env.TEST_FILE_PATH_BL, preprocessor=simple_preprocessor, header=None).read(sents_cols=None,
import env import itertools from dataset.dataset import FixedSplitDataSet from dataset.reader import CSVReader from preprocessor.ekhprasis import EkhprasisPreprocessor labels_map = {'happy': 'sentiment', 'sad': 'sentiment', 'angry': 'sentiment', 'others': 'nosentiment'} labels = ['sentiment', 'nosentiment'] ekhprasis_preprocessor = EkhprasisPreprocessor(verbose=1) sem_eval_dataset = FixedSplitDataSet( train_dataset=CSVReader(env.TRAIN_FILE_PATH, preprocessor=ekhprasis_preprocessor).read( sents_cols=['turn1', 'turn2', 'turn3'], label_col="label", merge_with=' eou '), test_dataset=CSVReader(env.DEV_FILE_PATH, preprocessor=ekhprasis_preprocessor).read( sents_cols=['turn1', 'turn2', 'turn3'], label_col="label", merge_with=' eou '), labels_map=labels_map ) with open('train.csv', 'w', encoding='utf8') as f: for xy in itertools.zip_longest(sem_eval_dataset.iterate_train_x(), sem_eval_dataset.iterate_train_y()): f.write(str(labels.index(xy[1])) + ',' + xy[0] + '\n') with open('val.csv', 'w', encoding='utf8') as f: for xy in itertools.zip_longest(sem_eval_dataset.iterate_test_x(), sem_eval_dataset.iterate_test_y()): f.write(str(labels.index(xy[1])) + ',' + xy[0] + '\n')