return (mse(y_t, y_p))**0.5


data_list = [
    'All Districts', 'Cluster 0', 'Cluster 1', 'Cluster 2', 'Cluster 3',
    'Cluster 4', 'Cluster 5', 'Ariyalur', 'Chennai', 'Coimbatore', 'Cuddalore',
    'Dharmapuri', 'Dindigul', 'Erode', 'Kancheepuram', 'Karur', 'Madurai',
    'Nagapattinam', 'Namakkal', 'Perambalur', 'Pudukkottai', 'Ramanathapuram',
    'Salem', 'Sivaganga', 'Thanjavur', 'Theni', 'The Nilgiris', 'Thiruvallur',
    'Thiruvarur', 'Thoothukkudi', 'Tiruchirapalli', 'Tirunelveli',
    'Tiruvannamalai', 'Vellore', 'Viluppuram', 'Virudhunagar'
]
parameters = pd.read_csv(
    'C:\\Users\\Preetham G\\Documents\\Research Projects\\Forecast of Rainfall Quantity and its variation using Envrionmental Features\\Results\\Parameters\\Parameters.csv'
)
rkf = rkf(n_splits=10, n_repeats=10)
#columns for result
dl = []
m = []
mse_ts = []
rmse_ts = []
mae_ts = []
mdae_ts = []
evs_ts = []
r2_ts = []
#iterating through datas
method = [
    'Multiple Linear Regression', 'Support Vector Regression',
    'Decision Tree Regression', 'Polynomial Regression'
]
for i in data_list:
Exemple #2
0
from sklearn.model_selection import RepeatedKFold as rkf
from sklearn.model_selection import train_test_split as tts

DATA_FOLDER = r"E:\ML Projects\Tennessee Eastman\TE_process_dataset"
NUM_FILES = 22

training_files = [
    os.path.join(DATA_FOLDER, "d{:0>2}.dat".format(i))
    for i in range(NUM_FILES)
]
test_files = [
    os.path.join(DATA_FOLDER, "d{:0>2}_te.dat".format(i))
    for i in range(NUM_FILES)
]

training_data = [
    pd.read_csv(f, sep=' ', index_col=None) for f in training_files
]
test_data = [pd.read_csv(f, sep=' ') for f in test_files]

# =============================================================================
#   Due to the relatively small training data (490 samples per fault) I think
#   a K-fold strategy is necessary per fault. For that matter, I think
#   splitting the training data to include a validation set in conjunction with
#   the KFold may be a wise move to avoid data leakage by tuning on the given
#   test set.
# =============================================================================
seed = 10
split_size = .30
rkf = rkf(n_splits=5, n_repeats=10, random_state=seed)