def run_dnn(data_name, dkeys): print('Working on: ' + data_name) # parameters sent_len = 50 # the max length of sentence wt_path = '../../data/weight/'+ data_name + '.npy' epoch_num = 10 dp_rate = 0.2 """Preprocess""" # training data train_data = load_data_iter( data_name, suffix='train' ) # domain data domain_data, dlabel_encoder = load_domain_iter(data_name, dkeys) # load weights weights = np.load(wt_path) """Model, share layers between domain inputs and sentiment inputs""" # embedding embedding = Embedding( weights.shape[0], weights.shape[1], # size of data embedding weights=[weights], input_length=sent_len, trainable=True, name='embedding' ) # Bi-LSTM bilstm_shared = Bidirectional(LSTM(200, dropout=dp_rate), name='shared_lstm') bilstm_senti = Bidirectional(LSTM(200, dropout=dp_rate), name='senti_lstm') bilstm_domain = Bidirectional(LSTM(200, dropout=dp_rate), name='domain_lstm') '''for sentiment clfs''' # input text_input = Input(shape=(sent_len,), dtype='int32', name='text_input') # define sentiment task layers emb_senti = embedding(text_input) lstm_shared = bilstm_shared(emb_senti) lstm_senti = bilstm_senti(emb_senti) merge_lstm = keras.layers.concatenate([lstm_senti, lstm_shared], axis=-1) dense_1 = Dense(128, activation='relu')(merge_lstm) dense_dp = Dropout(dp_rate)(dense_1) senti_preds = Dense(1, activation='sigmoid', name='senti')(dense_dp) # binary # for domain prediction hp_lambda = 0.01 """Obtain the number of domain label, share layers with sentiment task""" domain_inputs = dict() for dkey in dkeys: domain_inputs[dkey] = [ Input( shape=(sent_len,), dtype='int32', name='domain'+str(dkey)+'_input' ) ] # shared layers start domain_inputs[dkey].append( embedding(domain_inputs[dkey][-1]) ) domain_inputs[dkey].append( bilstm_shared(domain_inputs[dkey][-1]) ) # shared layers end domain_inputs[dkey].append( bilstm_domain(domain_inputs[dkey][-2]) # embedding as input ) domain_inputs[dkey].append( keras.layers.concatenate([domain_inputs[dkey][-2], domain_inputs[dkey][-1]], axis=-1) ) domain_inputs[dkey].append( flipGradientTF.GradientReversal( hp_lambda, name='domain'+str(dkey)+'_flip' )(domain_inputs[dkey][-1]) ) # domain_inputs[dkey].append( # Dense( # 128, activation='relu', name='domain'+str(dkey)+'_dense' # )(domain_inputs[dkey][-1]) # ) # domain_inputs[dkey].append(Dropout(dp_rate)(domain_inputs[dkey][-1])) dim_size = len(dlabel_encoder[dkey]) print(dim_size) if dim_size == 2: dim_size = 1 # check the label size if dim_size == 1: domain_inputs[dkey].append( Dense( dim_size, activation='sigmoid', name='domain'+str(dkey)+'_pred' )(domain_inputs[dkey][-1]) ) else: domain_inputs[dkey].append( Dense( dim_size, activation='softmax', name='domain'+str(dkey)+'_pred' )(domain_inputs[dkey][-1]) ) model_sentiment = Model( # the first element of each domain task is the input layer inputs=[text_input] + [domain_inputs[dkey][0] for dkey in sorted(dkeys)], # the last layer of each domain task is the prediction layer outputs=[senti_preds] + [domain_inputs[dkey][-1] for dkey in sorted(dkeys)], ) # build loss (weight) for each domain loss_dict = {'senti': 'binary_crossentropy'} loss_w_dict = {'senti': 1} for dkey in dkeys: loss_w_dict['domain'+str(dkey)+'_pred'] = 0.1/len(dkeys) if len(dlabel_encoder[dkey]) > 2: loss_dict['domain'+str(dkey)+'_pred'] = 'categorical_crossentropy' else: loss_dict['domain'+str(dkey)+'_pred'] = 'binary_crossentropy' model_sentiment.compile( loss=loss_dict, loss_weights=loss_w_dict, optimizer='adam') print(model_sentiment.summary()) # fit the model cls_w = {'senti:': 'auto'} for dkey in dkeys: cls_w['domain'+str(dkey)+'_pred'] = 'auto' # load the development set dev_data = load_data_iter(data_name, suffix='dev') best_dev = 0 # test data test_data = load_data_iter(data_name, suffix='test') for e in range(epoch_num): accuracy = 0.0 loss = 0.0 step = 1 print('--------------Epoch: {}--------------'.format(e)) train_iter = data_gen(train_data) # train sentiment # train on batches for x_train, y_labels in train_iter: batch_docs, batch_labels = domain_data_gen(domain_data, dkeys, len(x_train)) batch_docs['text_input'] = x_train batch_labels['senti'] = y_labels # skip only 1 class in the training data if len(np.unique(batch_labels['senti'])) == 1: continue # train sentiment model tmp_senti = model_sentiment.train_on_batch( batch_docs, batch_labels, class_weight=cls_w, ) # calculate loss and accuracy loss += tmp_senti[0] loss_avg = loss / step if step % 40 == 0: print('Step: {}'.format(step)) print('\tLoss: {}.'.format(loss_avg)) print('-------------------------------------------------') step += 1 # validation process y_preds_dev = [] y_devs = [] dev_iter = data_gen(dev_data) for x_dev, y_dev in dev_iter: x_dev = np.asarray(x_dev) tmp_preds = model_sentiment.predict([x_dev for _ in range(len(dkeys) + 1)]) for item_tmp in tmp_preds[0]: y_preds_dev.append(np.round(item_tmp[0])) for item_tmp in y_dev: y_devs.append(int(item_tmp)) cur_dev = f1_score(y_true=y_devs, y_pred=y_preds_dev, average='weighted') # if we get better dev result, test if cur_dev > best_dev: best_dev = cur_dev test_iter = data_gen(test_data) y_preds = [] y_tests = [] for x_test, y_test in test_iter: x_test = np.asarray(x_test) tmp_preds = model_sentiment.predict([x_test for _ in range(len(dkeys) + 1)]) for item_tmp in tmp_preds[0]: y_preds.append(np.round(item_tmp[0])) for item_tmp in y_test: y_tests.append(int(item_tmp)) test_result = open('./DANN_keras_sample_single_domain_lstm3_' + str(dkeys) + '.txt', 'a') test_result.write(data_name + '\t' + ','.join(map(str, dkeys)) + '\t' + str(e) + '\n') test_result.write(str(f1_score(y_true=y_tests, y_pred=y_preds, average='weighted')) + '\n') test_result.write(classification_report(y_true=y_tests, y_pred=y_preds, digits=3)) test_result.write('...............................................................\n\n') test_result.flush()
def run_dnn(data_pair): print('Working on: ' + data_pair[1]) wt_path = './weights/' + data_pair[1] + '.npy' train_path = './data/' + data_pair[1] + '_source.txt' valid_path = './data/' + data_pair[1] + '_valid.txt' test_path = './data/' + data_pair[1] + '_target.txt' epoch_num = 15 # parameters sent_len = 60 # the max length of sentence # load the data domain_data, train_data, valid_data, test_data, label_encoder, domain_encoder = data_loader( data_pair[1]) label_encoder = list(sorted(label_encoder)) domain_encoder = list(sorted(domain_encoder)) """Preprocess""" # load weights weights = np.load(wt_path) # inputs text_input = Input(shape=(sent_len, ), dtype='int32', name='text_input') domain_input = Input(shape=(sent_len, ), dtype='int32', name='domain_input') # shared embedding embedding = Embedding( weights.shape[0], weights.shape[1], # size of data embedding weights=[weights], input_length=sent_len, trainable=False, name='embedding') # shared CNN conv1 = Conv1D( filters=300, kernel_size=5, padding='valid', strides=1, ) conv2 = Conv1D( filters=200, kernel_size=7, padding='valid', strides=1, ) max_pool = MaxPool1D() flatten = Flatten() # start to share sent_embed = embedding(text_input) domain_embed = embedding(domain_input) sent_conv1 = conv1(sent_embed) domain_conv1 = conv1(domain_embed) sent_conv2 = conv2(sent_conv1) domain_conv2 = conv2(domain_conv1) sent_pool = max_pool(sent_conv2) domain_pool = max_pool(domain_conv2) sent_flat = flatten(sent_pool) domain_flat = flatten(domain_pool) # for sentiment clf dense_1 = Dense(128, activation='relu')(sent_flat) dense_dp = Dropout(0.2)(dense_1) # for domain prediction hp_lambda = 0.01 flip = flipGradientTF.GradientReversal(hp_lambda)(domain_flat) dense_da = Dense(128, activation='relu')(flip) dense_da_dp = Dropout(0.2)(dense_da) da_preds = Dense(len(domain_encoder), activation='softmax', name='domain')(dense_da_dp) # multiple if 'dianping' in data_pair[1] or 'amazon' in data_pair[ 1] or 'yelp' in data_pair[1]: sentiment_preds = Dense(3, activation='softmax', name='senti')(dense_dp) # multilabels model_sent = Model( inputs=[text_input, domain_input], outputs=[sentiment_preds, da_preds], ) model_sent.compile(loss={ 'senti': 'categorical_crossentropy', 'domain': 'categorical_crossentropy' }, loss_weights={ 'senti': 1, 'domain': 0.001 }, optimizer='adam') else: sentiment_preds = Dense(1, activation='sigmoid', name='senti')(dense_dp) # binary model_sent = Model( inputs=[text_input, domain_input], outputs=[sentiment_preds, da_preds], ) model_sent.compile(loss={ 'senti': 'binary_crossentropy', 'domain': 'categorical_crossentropy' }, loss_weights={ 'senti': 1, 'domain': 0.001 }, optimizer='adam') print(model_sent.summary()) best_valid_f1 = 0.0 # fit the model for e in range(epoch_num): accuracy = 0.0 loss = 0.0 step = 1 print('--------------Epoch: {}--------------'.format(e)) train_iter = data_gen(train_data) # train sentiment # train on batches for x_train, y_train in train_iter: # skip only 1 class in the training data if len(np.unique(y_train)) == 1: continue batch_docs, batch_labels = domain_data_gen(domain_data, len(x_train)) batch_docs['text_input'] = x_train # encoder the (domain) labels if len(label_encoder) > 2: y_train_tmp = [] for idx in range(len(y_train)): dlabel = [0] * len(label_encoder) dlabel[label_encoder.index(y_train[idx])] = 1 y_train_tmp.append(dlabel) y_train = y_train_tmp dlabels = [] for idx in range(len(batch_labels['domain'])): dlabel = [0] * len(domain_encoder) dlabel[domain_encoder.index(batch_labels['domain'][idx])] = 1 dlabels.append(dlabel) batch_labels['domain'] = dlabels batch_labels['senti'] = y_train # convert to arrays for key in batch_docs: batch_docs[key] = np.asarray(batch_docs[key]) for key in batch_labels: batch_labels[key] = np.asarray(batch_labels[key]) # train sentiment model tmp_senti = model_sent.train_on_batch(batch_docs, batch_labels, class_weight={ 'senti:': 'auto', 'domain': 'auto' }) # calculate loss and accuracy loss += tmp_senti[0] loss_avg = loss / step if step % 40 == 0: print('Step: {}'.format(step)) print('\tLoss: {}.'.format(loss_avg)) print('-------------------------------------------------') step += 1 # each epoch try the valid data, get the best valid-weighted-f1 score print('Validating....................................................') valid_iter = data_gen(valid_data) y_preds_valids = [] y_valids = [] for x_valid, y_valid in valid_iter: x_valid = np.asarray(x_valid) tmp_preds_valid = model_sent.predict([x_valid, x_valid]) for item_tmp in tmp_preds_valid[0]: y_preds_valids.append(item_tmp) for item_tmp in y_valid: y_valids.append(int(item_tmp)) if len(y_preds_valids[0]) > 2: y_preds_valids = np.argmax(y_preds_valids, axis=1) else: y_preds_valids = [np.round(item[0]) for item in y_preds_valids] f1_valid = f1_score(y_true=y_valids, y_pred=y_preds_valids, average='weighted') print('Validating f1-weighted score: ' + str(f1_valid)) # if the validation f1 score is good, then test if f1_valid > best_valid_f1: best_valid_f1 = f1_valid test_iter = data_gen(test_data) y_preds = [] y_tests = [] for x_test, y_test in test_iter: x_test = np.asarray(x_test) tmp_preds = model_sent.predict([x_test, x_test]) for item_tmp in tmp_preds[0]: y_preds.append(item_tmp) for item_tmp in y_test: y_tests.append(int(item_tmp)) if len(y_preds[0]) > 2: y_preds = np.argmax(y_preds, axis=1) else: y_preds = [np.round(item[0]) for item in y_preds] test_result = open('./results_shared.txt', 'a') test_result.write(data_pair[1] + '\n') test_result.write( 'Epoch ' + str(e) + '..................................................\n') test_result.write( str( f1_score( y_true=y_tests, y_pred=y_preds, average='weighted')) + '\n') test_result.write('#####\n\n') test_result.write( classification_report(y_true=y_tests, y_pred=y_preds, digits=3)) test_result.write( '...............................................................\n\n' )
def run_dnn(data_name, dkeys): print('Working on: ' + data_name) # parameters sent_len = 50 # the max length of sentence wt_path = '../../data/weight/' + data_name + '.npy' epoch_num = 20 """Preprocess""" # training data train_data, dlabel_encoder = load_data_iter(data_name, dkeys, suffix='train') # load weights weights = np.load(wt_path) """Model""" # input text_input = Input(shape=(sent_len, ), dtype='int32', name='text_input') # embedding embedding = Embedding( weights.shape[0], weights.shape[1], # size of data embedding weights=[weights], input_length=sent_len, trainable=True, name='embedding')(text_input) # CNN conv1 = Conv1D( filters=300, kernel_size=5, padding='valid', strides=1, )(embedding) conv2 = Conv1D( filters=200, kernel_size=7, padding='valid', strides=1, )(conv1) max_pool = MaxPool1D()(conv2) flatten = Flatten()(max_pool) # for sentiment clfs dense_1 = Dense(128, activation='relu')(flatten) dense_dp = Dropout(0.2)(dense_1) sentiment_preds = Dense(1, activation='sigmoid', name='senti')(dense_dp) # binary # for domain prediction hp_lambda = 0.01 """Obtain the number of domain label""" domain_inputs = dict() for dkey in dkeys: domain_inputs[dkey] = [] domain_inputs[dkey].append( flipGradientTF.GradientReversal(hp_lambda, name='domain' + str(dkey) + '_flip')(flatten)) domain_inputs[dkey].append( Dense(128, activation='relu', name='domain' + str(dkey) + '_dense')(domain_inputs[dkey][-1])) domain_inputs[dkey].append(Dropout(0.2)(domain_inputs[dkey][-1])) dim_size = len(dlabel_encoder[dkey]) print(dim_size) if dim_size == 2: dim_size = 1 # check the label size domain_inputs[dkey].append( Dense(dim_size, activation='softmax', name='domain' + str(dkey) + '_pred')( domain_inputs[dkey][-1])) model_sentiment = Model( inputs=[text_input], # the last layer of each domain task is the prediction layer outputs=[sentiment_preds] + [domain_inputs[dkey][-1] for dkey in sorted(dkeys)], ) # build loss (weight) for each domain loss_dict = {'senti': 'binary_crossentropy'} loss_w_dict = {'senti': 1} for dkey in dkeys: loss_w_dict['domain' + str(dkey) + '_pred'] = 0.1 if len(dlabel_encoder[dkey]) > 2: loss_dict['domain' + str(dkey) + '_pred'] = 'categorical_crossentropy' else: loss_dict['domain' + str(dkey) + '_pred'] = 'binary_crossentropy' model_sentiment.compile(loss=loss_dict, loss_weights=loss_w_dict, optimizer='adam') print(model_sentiment.summary()) # fit the model cls_w = {'senti:': 'auto'} for dkey in dkeys: cls_w['domain' + str(dkey) + '_pred'] = 'auto' # load the development set dev_data, _ = load_data_iter(data_name, dkeys, suffix='dev') best_dev = 0 # test data test_data, _ = load_data_iter(data_name, dkeys, suffix='test') for e in range(epoch_num): # shuffle the data np.random.shuffle(train_data) accuracy = 0.0 loss = 0.0 step = 1 print('--------------Epoch: {}--------------'.format(e)) train_iter = data_gen(train_data, dkeys) # train sentiment # train on batches for x_train, train_labels in train_iter: # skip only 1 class in the training data if len(np.unique(train_labels['senti'])) == 1: continue # train sentiment model tmp_senti = model_sentiment.train_on_batch( x_train, train_labels, class_weight=cls_w, ) # calculate loss and accuracy loss += tmp_senti[0] loss_avg = loss / step if step % 40 == 0: print('Step: {}'.format(step)) print('\tLoss: {}.'.format(loss_avg)) print('-------------------------------------------------') step += 1 # validation process y_preds_dev = [] y_devs = [] dev_iter = data_gen(dev_data, dkeys) for x_dev, y_dev in dev_iter: x_dev = np.asarray(x_dev) tmp_preds = model_sentiment.predict(x_dev) for item_tmp in tmp_preds[0]: y_preds_dev.append(np.round(item_tmp[0])) for item_tmp in y_dev['senti']: y_devs.append(int(item_tmp)) cur_dev = f1_score(y_true=y_devs, y_pred=y_preds_dev, average='weighted') # if we get better dev result, test if cur_dev > best_dev: best_dev = cur_dev test_iter = data_gen(test_data, dkeys) y_preds = [] y_tests = [] for x_test, y_test in test_iter: x_test = np.asarray(x_test) tmp_preds = model_sentiment.predict(x_test) for item_tmp in tmp_preds[0]: y_preds.append(np.round(item_tmp[0])) for item_tmp in y_test['senti']: y_tests.append(int(item_tmp)) test_result = open('./results_dann.txt', 'a') test_result.write(data_name + '\t' + ','.join(map(str, dkeys)) + '\n') test_result.write( str( f1_score( y_true=y_tests, y_pred=y_preds, average='weighted')) + '\n') test_result.write( classification_report(y_true=y_tests, y_pred=y_preds, digits=3)) test_result.write( '...............................................................\n\n' )
def run_dnn(data_pair): print('Working on: '+data_pair[1]) wt_path = './weights/'+ data_pair[1] + '.npy' train_path = './data/'+ data_pair[1] + '_source.txt' valid_path = './data/' + data_pair[1] + '_valid.txt' test_path = './data/'+ data_pair[1] + '_target.txt' epoch_num = 15 # parameters sent_len = 60 # the max length of sentence """Preprocess""" # load weights weights = np.load(wt_path) # input text_input = Input(shape=(sent_len,), dtype='int32', name='text_input') # embedding embedding = Embedding( weights.shape[0], weights.shape[1], # size of data embedding weights=[weights], input_length=sent_len, trainable=False, name='embedding' )(text_input) # CNN conv1 = Conv1D( filters=300, kernel_size=3, padding='valid', strides=1, )(embedding) conv2 = Conv1D( filters=200, kernel_size=5, padding='valid', strides=1, )(conv1) max_pool = MaxPool1D()(conv2) flatten = Flatten()(max_pool) # for sentiment clf dense_1 = Dense(128, activation='relu')(flatten) dense_dp = Dropout(0.2)(dense_1) # for domain prediction hp_lambda = 0.01 """Obtain the number of domain label""" da_num = set() with open(train_path) as data_file: for line in data_file: da_num.add(line.strip().split('\t')[1]) # domain label position flip = flipGradientTF.GradientReversal(hp_lambda)(flatten) dense_da = Dense(128, activation='relu')(flip) dense_da_dp = Dropout(0.2)(dense_da) da_preds = Dense(len(da_num), activation='softmax', name='domain')(dense_da_dp) # multiple if 'dianping' in data_pair[1] or 'amazon' in data_pair[1] or 'yelp' in data_pair[1]: sentiment_preds = Dense(3, activation='softmax', name='senti')(dense_dp) # multilabels model_sentiment = Model( inputs=[text_input], outputs=[sentiment_preds, da_preds], ) model_sentiment.compile( loss={'senti': 'categorical_crossentropy', 'domain':'categorical_crossentropy'}, loss_weights={'senti': 1, 'domain':0.01}, optimizer='adam') else: sentiment_preds = Dense(1, activation='sigmoid', name='senti')(dense_dp) # binary model_sentiment = Model( inputs=[text_input], outputs=[sentiment_preds, da_preds], ) model_sentiment.compile( loss={'senti': 'binary_crossentropy', 'domain':'categorical_crossentropy'}, loss_weights={'senti': 1, 'domain':0.01}, optimizer='adam') print(model_sentiment.summary()) best_valid_f1 = 0.0 # fit the model for e in range(epoch_num): accuracy = 0.0 loss = 0.0 step = 1 print('--------------Epoch: {}--------------'.format(e)) train_iter = load_data_iter(train_path) # train sentiment # train on batches for x_train, time_labels, y_train in train_iter: # skip only 1 class in the training data if len(np.unique(y_train)) == 1: continue if time_labels.shape[0] != y_train.shape[0]: continue # train sentiment model tmp_senti = model_sentiment.train_on_batch( x_train, {'senti': y_train, 'domain': time_labels}, class_weight={'senti:': 'auto', 'domain': 'auto'} ) # calculate loss and accuracy loss += tmp_senti[0] loss_avg = loss / step if step % 40 == 0: print('Step: {}'.format(step)) print('\tLoss: {}.'.format(loss_avg)) print('-------------------------------------------------') step += 1 # each epoch try the valid data, get the best valid-weighted-f1 score print('Validating....................................................') valid_iter = load_data_iter(valid_path, train=False) y_preds_valids = [] y_valids = [] for x_valid, y_valid in valid_iter: x_valid = np.asarray(x_valid) tmp_preds_valid = model_sentiment.predict(x_valid) for item_tmp in tmp_preds_valid[0]: y_preds_valids.append(item_tmp) for item_tmp in y_valid: y_valids.append(int(item_tmp)) if len(y_preds_valids[0]) > 2: y_preds_valids = np.argmax(y_preds_valids, axis=1) else: y_preds_valids = [np.round(item[0]) for item in y_preds_valids] f1_valid = f1_score(y_true=y_valids, y_pred=y_preds_valids, average='weighted') print('Validating f1-weighted score: ' + str(f1_valid)) # if the validation f1 score is good, then test if f1_valid > best_valid_f1: best_valid_f1 = f1_valid test_iter = load_data_iter(test_path, train=False) y_preds = [] y_tests = [] for x_test, y_test in test_iter: x_test = np.asarray(x_test) tmp_preds = model_sentiment.predict(x_test) for item_tmp in tmp_preds[0]: y_preds.append(item_tmp) for item_tmp in y_test: y_tests.append(int(item_tmp)) if len(y_preds[0]) > 2: y_preds = np.argmax(y_preds, axis=1) else: y_preds = [np.round(item[0]) for item in y_preds] test_result = open('./results.txt', 'a') test_result.write(data_pair[1] + '\n') test_result.write('Epoch ' + str(e) + '..................................................\n') test_result.write(str(f1_score(y_true=y_tests, y_pred=y_preds, average='weighted')) + '\n') test_result.write('#####\n\n') test_result.write(classification_report(y_true=y_tests, y_pred=y_preds, digits=3)) test_result.write('...............................................................\n\n') test_result.flush()