def intra_patient_split(patient_id, train_frac=0.8, val_frac=0.1): p = Patient(patient_id) seizures = p.get_seizures() clips = p.get_seizure_clips() test_frac = 1.0 - train_frac - val_frac n_val_seizures = max(1, int(round(val_frac * len(seizures)))) n_test_seizures = max(1, int(round(test_frac * len(seizures)))) n_train_seizures = len(seizures) - n_val_seizures - n_test_seizures return ( # Training clips clips[:n_train_seizures], # Validation clips clips[n_train_seizures:n_train_seizures + n_val_seizures], # Test clips clips[n_train_seizures + n_val_seizures:])
def load_dataset_chbmit(path_save: str, n_samples=200, random_state=42, pre_load=False) -> [array]: """Read the chbmit database, and return data and class. Split the dataset to the appropriate size. Parameters ---------- random_state : int n_samples: int path_save: str Returns ------- X : array-like, shape (n_samples, n_features) Data vectors, where n_samples is the number of samples and n_features is the number of features. y : array-like, shape (n_samples,) Target values. """ data_frame_non = [] data_frame_seiz = [] path_dataset = join(path_save, "as_dataset") name_dataset_non = join(path_dataset, "data_frame_non.parquet") name_dataset_seiz = join(path_dataset, "data_frame_seiz.parquet") if not check_exist(path_save, "as_dataset") and not pre_load: print("Loading the files to create dataset") for person_id in range(1, 11): print("Loading Patients nÂș {}".format(person_id)) pat = Patient(person_id, path_save) non_epoch_array = list(map(split_4096, pat.get_non_seizures())) data_frame_non.append(concatenate(non_epoch_array)) s_clips = pat.get_seizure_clips() if s_clips != []: seiz_epoch = list(filter_empty(list(map(split_4096, s_clips)))) data_frame_seiz.append(concatenate(seiz_epoch)) data_frame_non = DataFrame(concatenate(data_frame_non)) data_frame_non["class"] = [0] * len(data_frame_non) data_frame_non.columns = data_frame_non.columns.astype(str) data_frame_non.to_parquet(name_dataset_non, engine="pyarrow") data_frame_seiz = DataFrame(concatenate(data_frame_seiz)) data_frame_seiz["class"] = [1] * len(data_frame_seiz) data_frame_seiz.columns = data_frame_seiz.columns.astype(str) data_frame_seiz.to_parquet(name_dataset_seiz, engine="pyarrow") else: if not pre_load: print("Reading as dataframe") data_frame_non = read_parquet(name_dataset_non, engine="pyarrow") data_frame_seiz = read_parquet(name_dataset_seiz, engine="pyarrow") else: data_frame = read_parquet(path_save + "sampled_dataset.parquet", engine="pyarrow") return data_frame.drop("class", 1).to_numpy(), data_frame["class"].values sample_non = data_frame_non.sample(n=n_samples, random_state=random_state) sample_seiz = data_frame_seiz.sample(n=n_samples, random_state=random_state) data_frame = sample_non.append(sample_seiz) data_frame.columns = data_frame.columns.astype(str) data_frame.to_parquet(path_save + "sampled_dataset.parquet", engine="pyarrow") return data_frame.drop("class", 1).to_numpy(), data_frame["class"].values