def stacking_cross_validate(raw_df, add_sentiment=True):
    cv = KFold(n_splits=10, random_state=90051, shuffle=True)

    score = 0
    for train_index, test_index in cv.split(raw_df):
        train_df, test_df = raw_df.iloc[train_index].reset_index(
            drop=True), raw_df.iloc[test_index].reset_index(drop=True)
        y_train, y_test = train_df['ID'], test_df['ID']
        
        sgd_model = SGDClassifier(loss='hinge', penalty="l2", max_iter=10000, n_jobs=-1, tol=1e-6)
        svm_model = svm.LinearSVC(C=0.68, max_iter=1000, tol=1e-6)
        
        train_1, test_1 = predict(svm_model, train_df, test_df, wordngram=[1], pos=True, posngram=[1], addsentiment=True, min_tf_idf=1)
        train_2, test_2 = predict(sgd_model, train_df, test_df, wordngram=[2], pos=False, posngram=[1], addsentiment=True, min_tf_idf=1)
        train_3, test_3 = predict(sgd_model, train_df, test_df, wordngram=[1], pos=True, posngram=[1,1000], addsentiment=True, min_tf_idf=1)
        
        h_model = svm.LinearSVC(C=0.9, max_iter=1000)

        X_train, X_test = [], []
        for i in range(0, len(train_1)):
            X_train.append(str(train_1[i]) + ' ' +
                           str(train_2[i]) + ' ' + str(train_3[i]))
        for i in range(0, len(test_1)):
            X_test.append(str(test_1[i]) + ' ' +
                          str(test_2[i]) + ' ' + str(test_3[i]))

        X_train = np.array(X_train)
        X_test = np.array(X_test)

        stop_words = stopwords.words('english')

        cv = CountVectorizer(max_df=0.57, stop_words=stop_words, decode_error='ignore')
        trian_wc_vec = cv.fit_transform(X_train)
        test_wc_vec = cv.transform(X_test)

        # get tfidf
        transformer = TfidfTransformer(smooth_idf=True, use_idf=True)
        X_train = transformer.fit_transform(trian_wc_vec)
        X_test = transformer.transform(test_wc_vec)

        h_model.fit(X_train, y_train)
        train_acc = accuracy_score(h_model.predict(X_train), y_train)

        predicted_labels = h_model.predict(X_test)
        acc = accuracy_score(predicted_labels, y_test)
        
        sub_acc_1, sub_acc_2, sub_acc_3 = accuracy_score(train_1, y_train), accuracy_score(train_2, y_train), accuracy_score(train_3, y_train)
        #print("####INFO train error: ", train_acc, sub_acc_1, sub_acc_2, sub_acc_3)
        sub_acc_1, sub_acc_2, sub_acc_3 = accuracy_score(test_1, y_test), accuracy_score(test_2, y_test), accuracy_score(test_3, y_test)
        print("####INFO test error: ", acc, sub_acc_1, sub_acc_2, sub_acc_3)

        # uncomment to print miss labeled data
        # for i in range(0, len(predicted_labels)):
        #   if predicted_labels[i] != y_test[i]:
        #        print("#" + str(i) + "; T: " + str(y_test[i]) + "; F: " + str(predicted_labels[i]) + "; Text: " + test_df.loc[i,'Text'])

        score += acc

    avg_acc = score / 10
    print("####INFO: trainning", 'Stacking', avg_acc)