Example #1
0
if os.path.exists(constant.save_path + record_file):
    checkpoint = True
    ## read checkpoint
    with open(constant.save_path + record_file, newline='') as csvfile:
        mLines = csvfile.readlines()
        ## get current split
        targetLine = mLines[-1]
        currentSplit = targetLine.split(',')[0]
        ##read F1 score records
        rLines = mLines[-currentSplit - 1:]
        for line in rLines:
            microF1s += float(line.split(',')[1])
    currentSplit += 1

model = get_classifier(ty=classifier,
                       c=parameter_list[0],
                       n_estimators=parameter_list[1],
                       max_depth=parameter_list[2])

for i in range(constant.num_split):
    ## confirm checkpoint
    if checkpoint == True and i < currentSplit:
        print("Split {} is skipped because it has been run!".format(i))
        continue

    ## prepare feature for model
    X_train, y_train, X_val, y_val, X_test, ind, X_text = get_features_for_prediction(
        features, i, use_pca=False)
    print('shape of X_train', X_train.shape)
    print('shape of X_test', X_test.shape)
    print("###### Running folder %d ######" % (i + 1))
    ## distinguish twitter glove and common glove
    if item[:5] == 'glove':
        ty = item[6:]
        feature = item[:5]
    elif item.find('-') > 0:
        ty = 'common'
        feature = item[:(item.find('-') - 1)]
    else:
        ty = 'common'
        feature = item

    ## compute Micro F1 score for each feature
    for j in range(1, 10):
        c = j / 1000
        model = get_classifier(ty='LR', c=c)

        microF1s = 0
        for i in range(constant.num_split):

            ## prepare data for feature-10 folders
            vocab = generate_vocab()
            train, val, dev_no_lab = read_data(is_shuffle=True, random_state=i)
            ## feature_list: glove emoji elmo bert deepmoji emo2vec
            ## if you want twitter glove or common glove use  ty='twitter' and ty='common'
            X_train, y_train = get_feature(
                train,
                vocab,
                feature_list=[feature],
                mode=['sum'],
                split="train",