flag_unexpected = True incomplete_processing[base_feature][weight_method][ ds_name].append(sample) else: flag_incomplete = True incomplete_processing[base_feature][weight_method][ ds_name].append(sample) # print('processing incomplete for {} {} {}'.format(ds_name, weight_method, sample)) if flag_nan_exists or flag_incomplete or flag_unexpected: pass # print('{:20} {:25} - processing unusable; totally skipping it.'.format(base_feature, weight_method)) else: print('{:20} {:5} {:25} - fully usable.'.format( base_feature, ds_name, weight_method)) dataset.description = '{}_{}'.format(base_feature, weight_method) out_path = pjoin( out_dir, '{}_{}.MLDataset.pkl'.format(base_feature, weight_method)) dataset.save(out_path) # saving with open(pjoin(out_dir, 'incomplete_unusable_processing.pkl'), 'wb') as ipf: pickle.dump([incomplete_processing, comb_nan_values], ipf) # reading with open(pjoin(out_dir, 'incomplete_unusable_processing.pkl'), 'rb') as ipf: incomplete_processing, comb_nan_values = pickle.load(ipf) # results
incomplete_processing[multi_feature][weight_method][ ds_name].append(sample) else: flag_incomplete = True sys.stdout.write('-') incomplete_processing[multi_feature][weight_method][ ds_name].append(sample) # print('processing incomplete for {} {} {}'.format(ds_name, weight_method, sample)) if flag_nan_exists or flag_incomplete or flag_unexpected: pass # print('{:20} {:25} - processing unusable; totally skipping it.'.format(base_feature, weight_method)) else: print('{:20} {:5} {:25} : fully usable.'.format( multi_feature, ds_name, weight_method)) dataset.description = '{}_{}'.format(summary_stat, weight_method) out_path = pjoin( out_dir, '{}_{}.MLDataset.pkl'.format(summary_stat, weight_method)) dataset.save(out_path) # saving with open(pjoin(out_dir, 'incomplete_unusable_processing.pkl'), 'wb') as ipf: pickle.dump([incomplete_processing, comb_nan_values], ipf) # reading with open(pjoin(out_dir, 'incomplete_unusable_processing.pkl'), 'rb') as ipf: incomplete_processing, comb_nan_values = pickle.load(ipf) # results
flag_unexpected = True incomplete_processing[base_feature][stat_method][ ds_name].append(sample) else: flag_incomplete = True incomplete_processing[base_feature][stat_method][ ds_name].append(sample) # print('processing incomplete for {} {} {}'.format(ds_name, stat_method, sample)) if flag_nan_exists or flag_incomplete or flag_unexpected: pass # print('{:20} {:25} - processing unusable; totally skipping it.'.format(base_feature, stat_method)) else: print('{:20} {} \t- fully usable.'.format(base_feature, stat_method)) dataset.description = '{}'.format(stat_method) out_path = pjoin( out_dir, '{}_roi_stats_{}.MLDataset.pkl'.format(base_feature, stat_method)) dataset.save(out_path) # saving with open(pjoin(out_dir, 'incomplete_unusable_processing.pkl'), 'wb') as ipf: pickle.dump([incomplete_processing, comb_nan_values], ipf) # reading with open(pjoin(out_dir, 'incomplete_unusable_processing.pkl'), 'rb') as ipf: incomplete_processing, comb_nan_values = pickle.load(ipf)
class_set = np.array(['C{:05d}'.format(x) for x in range(num_classes)]) feat_names = np.array([str(x) for x in range(num_features)]) test_dataset = MLDataset() for class_index, class_id in enumerate(class_set): for sub_ix in range(class_sizes[class_index]): subj_id = '{}_S{:05d}'.format(class_set[class_index], sub_ix) feat = np.random.random(num_features) test_dataset.add_sample(subj_id, feat, class_index, class_id, feat_names) out_file = os.path.join(out_dir, 'random_example_dataset.pkl') test_dataset.save(out_file) test_dataset.description = 'test dataset' print(test_dataset) print('default format:\n {}'.format(test_dataset)) print('full repr :\n {:full}'.format(test_dataset)) print('string/short :\n {:s}'.format(test_dataset)) class_set, label_set, class_sizes = test_dataset.summarize_classes() reloaded_dataset = MLDataset(filepath=out_file, description='reloaded test_dataset') copy_dataset = MLDataset(in_dataset=test_dataset) rand_index = np.random.randint(0, len(class_set), 1)[0] random_class_name = class_set[rand_index] random_class_ds = test_dataset.get_class(random_class_name)
out_file = os.path.join(out_dir,'random_example_dataset.pkl') test_dataset.save(out_file) # same IDs, new features same_ids_new_feat = MLDataset() for sub_id in test_dataset.keys: feat = np.random.random(num_features) same_ids_new_feat.add_sample(sub_id, feat, test_dataset.labels[sub_id], test_dataset.classes[sub_id]) same_ids_new_feat.feature_names = np.array([ 'new_f{}'.format(x) for x in range( num_features) ]) test_dataset.description = 'test dataset' print(test_dataset) print('default format:\n {}'.format(test_dataset)) print('full repr :\n {:full}'.format(test_dataset)) print('string/short :\n {:s}'.format(test_dataset)) class_set, label_set, class_sizes = test_dataset.summarize_classes() reloaded_dataset = MLDataset(filepath=out_file, description='reloaded test_dataset') copy_dataset = MLDataset(in_dataset=test_dataset) rand_index = np.random.randint(0,len(class_set),1)[0] random_class_name = class_set[rand_index] random_class_ds = test_dataset.get_class(random_class_name)