def class_balancing(self): """ Repeat elements from smaller class until the class are balanced """ if len(self.params["distance"]) < 3: patient_list = np.empty(shape=(0, 2)) for ID in self.list_IDs_original: if self.params["partition"] == 'read': ID = ID.replace("]", '').replace("[", '') ID = ID.replace("'", '').split(", ") ID = np.asarray(ID) ID = ID.reshape((1, 2)) ID[0, 1] = int(ID[0, 1]) patient_list = np.append(patient_list, ID, axis=0) labels = yReadFunction(patient_list, self.params) malignant = patient_list[labels == 1] tot_num = len(labels) mal_num = int(np.sum(labels)) imbal = ((tot_num - mal_num) / mal_num) - 1 if imbal >= 1: r = np.floor(imbal).astype(int) for j in range(r): patient_list = np.append(patient_list, malignant, axis=0) imbal = imbal - r r = int(imbal * mal_num) r = random.sample(range(mal_num), r) patient_list = np.append(patient_list, malignant[r], axis=0) return patient_list else: labels = yReadFunction(self.list_IDs, self.params) malignant = self.list_IDs[labels == 1] tot_num = len(labels) mal_num = int(np.sum(labels)) imbal = ((tot_num - mal_num) / mal_num) - 1 if imbal >= 1: r = np.floor(imbal).astype(int) for j in range(r): new_patients = self.make_list() new_labels = yReadFunction(new_patients, self.params) new_malignant = new_patients[new_labels == 1] self.list_IDs = np.append(self.list_IDs, new_malignant, axis=0) imbal = imbal - r r = int(imbal * mal_num) r = random.sample(range(mal_num), r) new_patients = self.make_list() new_labels = yReadFunction(new_patients, self.params) new_malignant = new_patients[new_labels == 1] self.list_IDs = np.append(self.list_IDs, new_malignant[r], axis=0) new_labels = yReadFunction(self.list_IDs, self.params) return self.list_IDs
def check_class_balance(partition, params, balance = (0.1,0.1)) : label = params["label"] partition["train"] = np.asarray(partition["train"]) partition["validation"] = np.asarray(partition["validation"]) train_label = yReadFunction(partition["train"],params) val_label = yReadFunction(partition["validation"],params) train = np.sum(train_label)/len(train_label) val = np.sum(val_label)/len(val_label) tot = (np.sum(val_label)+np.sum(train_label))/(len(val_label)+len(train_label)) if (abs((tot-train)/tot) > balance[2]) | (abs((tot-val)/tot) > balance[1]): return False else : return True
def prepare_batch(self, list_IDs): """ Prepare a batch of data: creating a list of images and masks after having possibly augmented them saving a few examples to disk if required """ X = list() Y = list() S = list() xReadFunction = self.params['xReadFunction'] yReadFunction = self.params['yReadFunction'] for ID in list_IDs: if not isinstance(ID, np.ndarray): ID = ID.replace("]", '').replace("[", '') ID = ID.replace("'", '').split(", ") ID[1] = int(ID[1]) ID = np.asarray(ID) if len(self.params["distance"]) < 3: x, seg = xReadFunction(ID, self.params, im_mask="both", data=self.params["data"]) else : x, seg, ID[1] = xReadFunction(ID, self.params, im_mask="both", data=self.params["data"]) x, seg = self.imaugment(x, seg) if self.params["only"] == "both": x = np.concatenate([x, seg], axis=-1) y = yReadFunction(ID, self.params) X.append(x) Y.append(y) if self.params["scalars"] is True: scalarsReadFunction = self.params['scalarsReadFunction'] scalars = scalarsReadFunction(ID, self.params["source"], data=self.params["data"]) S.append(scalars) S = np.asarray(S) X = np.asarray(X) Y = np.asarray(Y) if len(self.params["distance"]) > 2: dz = self.params["distance"][2] else: dz = 1 self.save_images(X[...,0:1], X[...,1:2], list_IDs, overlay = False) if self.params["scalars"] is False: return X, Y else: return [X, S], Y
def prepare_batch(self, list_IDs): """ Prepare a batch of data: creating a list of images and masks after having possibly augmented them saving a few examples to disk if required """ X = list() Y = list() S = list() xReadFunction = self.params['xReadFunction'] yReadFunction = self.params['yReadFunction'] for ID in list_IDs: if not isinstance(ID, np.ndarray): ID = ID.replace("]", '').replace("[", '') ID = ID.replace("'", '').split(", ") ID[1] = int(ID[1]) ID = np.asarray(ID) if len(self.params["distance"]) < 3: x, seg = xReadFunction(ID, self.params, im_mask="both", data=self.params["data"]) else: x, seg, ID[1] = xReadFunction(ID, self.params, im_mask="both", data=self.params["data"]) x, seg = self.imaugment(x, seg) x = np.concatenate([x, seg], axis=-1) y = yReadFunction(ID, self.params) X.append(x) Y.append(y) X = np.asarray(X) Y = np.asarray(Y) return X, Y
def class_balancing(self): """ Repeat elements from smaller class until the class are balanced """ patient_list = self.list_IDs_original for ID in self.list_IDs_original: labels = yReadFunction(patient_list, self.params) malignant = patient_list[labels == 1] tot_num = len(labels) mal_num = int(np.sum(labels)) imbal = ((tot_num - mal_num) / mal_num) - 1 if imbal >= 1: r = np.floor(imbal).astype(int) for j in range(r): patient_list = np.append(patient_list, malignant, axis=0) imbal = imbal - r r = int(imbal * mal_num) r = random.sample(range(mal_num), r) patient_list = np.append(patient_list, malignant[r], axis=0) return patient_list