def func(): # Used to re-compute specific failed jobs, `None` otherwise. indices = [7, 8] index = int(os.environ["PBS_ARRAY_INDEX"]) if indices is not None: index = indices[index] print("Index:", index) X_train, X_test, y_train, y_test = data_split_cache.load() results, rf = cross_val_cache.load() job_samples = 50 tree_path_dependent_shap_interact_cache = SimpleCache( f"tree_path_dependent_shap_interact_{index}_{job_samples}", cache_dir=os.path.join(CACHE_DIR, "shap_interaction"), ) @tree_path_dependent_shap_interact_cache def get_interact_shap_values(model, X): return get_shap_values(model, X, interaction=True) get_interact_shap_values( rf, X_train[index * job_samples : (index + 1) * job_samples] )
def func(): def save_pdp_plot_2d(model, X_train, features, n_jobs): model.n_jobs = n_jobs with parallel_backend("threading", n_jobs=n_jobs): pdp_interact_out = pdp.pdp_interact( model=model, dataset=X_train, model_features=X_train.columns, features=features, num_grid_points=[20, 20], ) fig, axes = pdp.pdp_interact_plot( pdp_interact_out, features, x_quantile=True, figsize=(7, 8) ) axes["pdp_inter_ax"].xaxis.set_tick_params(rotation=45) figure_saver.save_figure(fig, "__".join(features), sub_directory="pdp_2d") X_train, X_test, y_train, y_test = data_split_cache.load() results, rf = cross_val_cache.load() columns_list = list(combinations(X_train.columns, 2)) index = int(os.environ["PBS_ARRAY_INDEX"]) print("Index:", index) print("Columns:", columns_list[index]) ncpus = get_ncpus() print("NCPUS:", ncpus) # Use the array index to select the desired columns. save_pdp_plot_2d(rf, X_train, columns_list[index], ncpus)
def func(): # Used to re-compute specific failed jobs, `None` otherwise. indices = [ 115, 989, 1102, 1163, 1236, 1275, 1276, 1277, 1377, 1378, 1514, 1515, 1516, 1716, 1771, 1772, 1773, 1774, 1867, 1940, 1955, 2095, 2174, 2301, 2827, 2908, 2947, 2958, 2960, 3138, 3168, 3169, 3782, 3783, 3784, 3785, 3786, 3787, 4108, 4128, 4129, 4130, 4131, 4132, 4224, 4281, 4302, 4357, 4435, 4436, 4579, 4673, 4865, 4904, ] index = int(os.environ["PBS_ARRAY_INDEX"]) if indices is not None: index = indices[index] print("Index:", index) X_train, X_test, y_train, y_test = data_split_cache.load() results, rf = cross_val_cache.load() job_samples = 50 tree_path_dependent_shap_interact_cache = SimpleCache( f"tree_path_dependent_shap_interact_{index}_{job_samples}", cache_dir=os.path.join(CACHE_DIR, "shap_interaction"), ) @tree_path_dependent_shap_interact_cache def get_interact_shap_values(model, X): return get_shap_values(model, X, interaction=True) get_interact_shap_values( rf, X_train[index * job_samples:(index + 1) * job_samples])
def func(): # Used to re-compute specific failed jobs, `None` otherwise. indices = [ 5, 6, 7, 8, 9, 10, 11, 12, 13, 17, 22, 23, 24, 26, 27, 28, 32, 35, 36, 42, 46, 48, 58, 59, 60, 61, 62, 64, 75, 76, 77, 78, 79, 80, 81, 82, 83, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 110, 113, 114, 115, 116, 117, 118, 119, 120, 121, 137, 138, 139, 140, 141, 142, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 182, 183, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 332, 338, 339, 340, 344, 345, 346, 348, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 375, 390, 391, 392, 393, 394, 395, 396, 399, 400, 401, 402, 403, 404, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 426, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 484, 487, 488, 527, 529, 530, 531, 533, 534, 535, 536, 537, 538, 539, 540, 572, 574, 575, 577, 578, 579, 580, 581, 584, 585, 586, 587, 588, 603, 617, 618, 619, 620, 621, 622, 623, 624, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 643, 644, 647, 648, 649, 650, 651, 652, 666, 679, 685, 686, 695, 696, 697, 698, 699, 700, 701, 702, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 717, 718, 719, 720, 721, 722, 724, 725, 727, 728, 729, 730, 731, 732, 733, 734, 745, 746, 769, 770, 771, 782, 784, 785, 786, 787, 788, 789, 790, 791, 792, 793, 794, 795, 796, 797, 798, 799, 800, 801, 802, 803, 804, 805, 806, 807, 808, 809, 818, 834, 843, 845, 846, 847, 849, 850, 851, 852, 856, 857, 868, 869, 892, 893, 894, 895, 941, 942, 963, 965, 972, 973, 974, 988, 989, ] index = int(os.environ["PBS_ARRAY_INDEX"]) if indices is not None: index = indices[index] print("Index:", index) X_train, X_test, y_train, y_test = data_split_cache.load() results, rf = cross_val_cache.load() job_samples = 2000 tree_path_dependent_shap_cache = SimpleCache( f"tree_path_dependent_shap_{index}_{job_samples}", cache_dir=os.path.join(CACHE_DIR, "shap"), ) @tree_path_dependent_shap_cache def get_interact_shap_values(model, X): return get_shap_values(model, X, interaction=False) get_interact_shap_values( rf, X_train[index * job_samples:(index + 1) * job_samples])
def func(): # Used to re-compute specific failed jobs, `None` otherwise. indices = [ 3, 4, 154, 155, 211, 240, 251, 289, 293, 294, 330, 331, 334, 345, 380, 381, 392, 410, 421, 422, 423, 439, 460, 461, 467, 468, 483, 503, 516, 522, 523, 528, 536, 539, 540, 541, 542, 543, 558, 560, 564, 567, 568, 580, 585, 586, 590, 591, 617, 618, 619, 620, 625, 626, 633, 658, 659, 660, 661, 662, 663, 668, 687, 688, 689, 691, 692, 693, 694, 695, 696, 697, 698, 700, 702, 703, 721, 746, 754, 755, 762, 763, 790, 791, 792, 793, 794, 798, 806, 814, 818, 827, 828, 829, 832, 844, 845, 861, 862, 863, 864, 866, 884, 888, 889, 890, 891, 892, 897, 900, 902, 903, 904, 905, 906, 910, 911, 912, 913, 927, 928, 929, 930, 937, 938, 948, 949, 950, 952, 953, 956, 960, 962, 974, 975, 976, 977, 978, 980, 982, 983, ] index = int(os.environ["PBS_ARRAY_INDEX"]) if indices is not None: index = indices[index] print("Index:", index) X_train, X_test, y_train, y_test = data_split_cache.load() results, rf = cross_val_cache.load() job_samples = 2000 tree_path_dependent_shap_cache = SimpleCache( f"tree_path_dependent_shap_{index}_{job_samples}", cache_dir=os.path.join(CACHE_DIR, "shap"), ) @tree_path_dependent_shap_cache def get_interact_shap_values(model, X): return get_shap_values(model, X, interaction=False) get_interact_shap_values( rf, X_train[index * job_samples:(index + 1) * job_samples])
def func(): # Used to re-compute specific failed jobs, `None` otherwise. indices = [ 14, 43, 45, 59, 62, 73, 79, 94, 104, 110, 122, 140, 142, 150, 157, 161, 187, 196, 232, 236, 247, 264, 298, 306, 311, 312, 398, 402, 408, 410, 446, 454, 456, 459, 460, 461, 462, 464, 467, 469, 470, 471, 482, 483, 484, 492, 493, 494, 495, 496, 497, 516, 517, 518, 523, 535, 537, 543, 554, 557, 565, 572, 573, 644, 645, 646, 647, 648, 671, 673, 674, 676, 677, 678, 679, 700, 701, 715, 750, 751, 752, 753, 803, 804, 805, 814, 816, 817, 820, 822, 827, 829, 830, 831, 952, 953, 954, 989, ] index = int(os.environ["PBS_ARRAY_INDEX"]) if indices is not None: index = indices[index] print("Index:", index) X_train, X_test, y_train, y_test = data_split_cache.load() results, rf = cross_val_cache.load() job_samples = 2000 tree_path_dependent_shap_cache = SimpleCache( f"tree_path_dependent_shap_{index}_{job_samples}", cache_dir=os.path.join(CACHE_DIR, "shap"), ) @tree_path_dependent_shap_cache def get_interact_shap_values(model, X): return get_shap_values(model, X, interaction=False) get_interact_shap_values( rf, X_train[index * job_samples:(index + 1) * job_samples])
def func(): # Used to re-compute specific failed jobs, `None` otherwise. indices = [ 32, 71, 277, 278, 279, 280, 281, 282, 283, 284, 339, 340, 341, 342, 343, 344, 379, 481, 483, 498, 500, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 564, 565, 566, 589, 903, 904, 905, 906, 907, 908, 914, 915, 994, 995, 1016, 1023, 1024, 1025, 1026, 1085, 1103, 1107, 1123, 1126, 1127, 1128, 1243, 1271, 1272, 1324, 1378, 1385, 1386, 1387, 1388, 1389, 1390, 1391, 1403, 1404, 1416, 1418, 1438, 1440, 1455, 1456, 1457, 1589, 1592, 1626, 1627, 1629, 1675, 1676, 2008, 2009, 2010, 2153, 2160, 2164, 2167, 2169, 2205, 2208, 2248, 2249, 2250, 2277, 2297, 2298, 2299, 2328, 2337, 2342, 2343, 2379, 2381, 2382, 2383, 2604, 2919, 2920, 3036, 3048, 3049, 3050, 3051, 3052, 3059, 3065, 3066, 3067, 3068, 3075, 3076, 3077, 3078, 3079, 3080, 3081, 3082, 3083, 3085, 3091, 3092, 3093, 3094, 3095, 3102, 3103, 3104, 3105, 3106, 3107, 3108, 3109, 3110, 3111, 3112, 3113, 3114, 3115, 3124, 3126, 3170, 3171, 3299, 3301, 3307, 3343, 3716, 3733, 3734, 3735, 3736, 3751, 3753, 3757, 3817, 3818, 3828, 4046, 4047, 4048, 4049, 4050, 4051, 4052, 4053, 4054, 4055, 4056, 4057, 4058, 4059, 4060, 4061, 4062, 4063, 4064, 4065, 4066, 4067, 4074, 4075, 4080, 4081, 4082, 4083, 4084, 4085, 4086, 4087, 4088, 4089, 4090, 4091, 4092, 4093, 4094, 4095, 4096, 4097, 4098, 4099, 4100, 4101, 4102, 4336, 4346, 4347, 4384, 4385, 4386, 4387, 4388, 4389, 4404, 4420, 4422, 4475, 4483, 4508, 4512, 4527, 4528, 4607, 4608, 4609, 4610, 4611, 4612, 4613, 4614, 4615, 4616, 4694, 4695, 4696, 4697, 4729, 4730, 4740, 4761, 4837, 4838, 4880, 4881, 4882, 4991, 4992, 5032, 5061, 5087, 5096, 5186, 5187, 5188, 5189, 5190, 5191, 5192, 5193, 5199, 5407, 5408, 5451, 5452, 5511, 5598, 5599, 5600, 5601, 5602, 5603, 5604, 5605, 5606, 5607, 5608, 5620, 5662, 5667, 5699, 5700, 5757, 5763, 5852, 5853, 5854, 5855, 5856, 5857, 5941, 5991, ] index = int(os.environ["PBS_ARRAY_INDEX"]) if indices is not None: index = indices[index] print("Index:", index) X_train, X_test, y_train, y_test = data_split_cache.load() results, rf = cross_val_cache.load() job_samples = 50 tree_path_dependent_shap_interact_cache = SimpleCache( f"tree_path_dependent_shap_interact_{index}_{job_samples}", cache_dir=os.path.join(CACHE_DIR, "shap_interaction"), ) @tree_path_dependent_shap_interact_cache def get_interact_shap_values(model, X): return get_shap_values(model, X, interaction=True) get_interact_shap_values( rf, X_train[index * job_samples:(index + 1) * job_samples])