if os.path.exists(constant.save_path + record_file): checkpoint = True ## read checkpoint with open(constant.save_path + record_file, newline='') as csvfile: mLines = csvfile.readlines() ## get current split targetLine = mLines[-1] currentSplit = targetLine.split(',')[0] ##read F1 score records rLines = mLines[-currentSplit - 1:] for line in rLines: microF1s += float(line.split(',')[1]) currentSplit += 1 model = get_classifier(ty=classifier, c=parameter_list[0], n_estimators=parameter_list[1], max_depth=parameter_list[2]) for i in range(constant.num_split): ## confirm checkpoint if checkpoint == True and i < currentSplit: print("Split {} is skipped because it has been run!".format(i)) continue ## prepare feature for model X_train, y_train, X_val, y_val, X_test, ind, X_text = get_features_for_prediction( features, i, use_pca=False) print('shape of X_train', X_train.shape) print('shape of X_test', X_test.shape) print("###### Running folder %d ######" % (i + 1))
## distinguish twitter glove and common glove if item[:5] == 'glove': ty = item[6:] feature = item[:5] elif item.find('-') > 0: ty = 'common' feature = item[:(item.find('-') - 1)] else: ty = 'common' feature = item ## compute Micro F1 score for each feature for j in range(1, 10): c = j / 1000 model = get_classifier(ty='LR', c=c) microF1s = 0 for i in range(constant.num_split): ## prepare data for feature-10 folders vocab = generate_vocab() train, val, dev_no_lab = read_data(is_shuffle=True, random_state=i) ## feature_list: glove emoji elmo bert deepmoji emo2vec ## if you want twitter glove or common glove use ty='twitter' and ty='common' X_train, y_train = get_feature( train, vocab, feature_list=[feature], mode=['sum'], split="train",