train_num = 0 train_loss = 0.0 train_accuracy = 0.0 positive_batch_size = int(BATCH_SIZE / 2) for pbi in range(0, positive_train_num, positive_batch_size): #print("training process:%d/%d" %(pbi, positive_train_num)) for pti in range( pbi, min(pbi + positive_batch_size, positive_train_num)): data_index = positive_train_indices[pti] pfile = ptfiles[data_index] patient_uid, nodule_diameter = mt.get_annotation_informations( pfile, "LUNA16/csvfiles/annotations.csv") positive_data = np.load(pfile) if "positive_batch" not in dir(): positive_batch = mt.extract_volumes( positive_data, nodule_diameter=nodule_diameter) else: positive_batch = np.concatenate( (positive_batch, mt.extract_volumes( positive_data, nodule_diameter=nodule_diameter)), axis=0) negative_batch_size = min( positive_batch.shape[0], negative_probabilities.nonzero()[0].size) if negative_batch_size > 0: negative_batch = np.zeros(shape=[ negative_batch_size, positive_batch.shape[1], positive_batch.shape[2], positive_batch.shape[3] ],
for pbi in range(0, positive_train_num, positive_batch_size): if AUGMENTATION: print("training process:%d/%d %s" %(pbi, positive_train_num, tpfiles[positive_train_indices[pbi]])) for pti in range(pbi, min(pbi+positive_batch_size, positive_train_num)): data_index = positive_train_indices[pti] pfile = tpfiles[data_index] if pfile.split('/')[0].find("luna")>=0: patient_uid, nodule_diameter = mt.get_annotation_informations(pfile, "luna_cubes_56_overbound/luna_annotations.csv") elif pfile.split('/')[0].find("tianchi")>=0: patient_uid, nodule_diameter = mt.get_annotation_informations(pfile, "tianchi_cubes_56_overbound/tianchi_annotations.csv") else: patient_uid = mt.get_volume_informations(pfile)[0] nodule_diameter = 0 positive_data = np.load(pfile) if "positive_batch" not in dir(): positive_batch = mt.extract_volumes(positive_data, nodule_diameter=nodule_diameter, scale_augment=AUGMENTATION, translation_augment=AUGMENTATION, rotation_augment=AUGMENTATION) else: positive_batch = np.concatenate((positive_batch, mt.extract_volumes(positive_data, nodule_diameter=nodule_diameter, scale_augment=AUGMENTATION, translation_augment=AUGMENTATION, rotation_augment=AUGMENTATION)), axis=0) #negative_batch_size = min(positive_batch.shape[0], negative_probabilities.nonzero()[0].size) negative_batch_size = min(int(math.ceil(positive_batch_size*np_proportion)), negative_probabilities.nonzero()[0].size) if negative_batch_size > 0: negative_batch = np.zeros(shape=[negative_batch_size, positive_batch.shape[1], positive_batch.shape[2], positive_batch.shape[3]], dtype=positive_batch.dtype) negative_candidate = np.random.choice(negative_indices, size=negative_batch_size, replace=False, p=negative_probabilities) for ni in range(negative_candidate.size): negative_batch[ni] = np.load(nfiles[negative_candidate[ni]]) negative_probabilities[negative_candidate[ni]] = 0 if negative_probabilities.sum() > 0: negative_probabilities /= negative_probabilities.sum() train_data = np.zeros(shape=(positive_batch.shape[0]+negative_batch_size, positive_batch.shape[1], positive_batch.shape[2], positive_batch.shape[3]), dtype=float) train_label = np.zeros(shape=(positive_batch.shape[0]+negative_batch_size, 2), dtype=float)
patient_uid, nodule_diameter = mt.get_annotation_informations( pfile, "luna_cubes_56_overbound/luna_annotations.csv") elif pfile.split('/')[0].find("tianchi") >= 0: patient_uid, nodule_diameter = mt.get_annotation_informations( pfile, "tianchi_cubes_56_overbound/tianchi_annotations.csv" ) else: patient_uid = mt.get_volume_informations(pfile)[0] nodule_diameter = 0 positive_data = np.load(pfile) if "positive_batch" not in dir(): positive_batch = mt.extract_volumes( positive_data, nodule_diameter=nodule_diameter, scale_augment=AUGMENTATION, translation_augment=AUGMENTATION, rotation_augment=AUGMENTATION) else: positive_batch = np.concatenate( (positive_batch, mt.extract_volumes( positive_data, nodule_diameter=nodule_diameter, scale_augment=AUGMENTATION, translation_augment=AUGMENTATION, rotation_augment=AUGMENTATION)), axis=0) negative_batch_size = min( positive_batch.shape[0], negative_probabilities.nonzero()[0].size)
if data_index < num_positive: pfile = pfiles[data_index] isnodule = True else: pfile = nfiles[data_index - num_positive] isnodule = False data_volume = np.load(pfile) if isnodule: if pfile.split('/')[0].find("luna")>=0: patient_uid, nodule_diameter = mt.get_annotation_informations(pfile, "luna_cubes_56_overbound/luna_annotations.csv") elif pfile.split('/')[0].find("tianchi")>=0: patient_uid, nodule_diameter = mt.get_annotation_informations(pfile, "tianchi_cubes_56_overbound/tianchi_annotations.csv") else: patient_uid = mt.get_volume_informations(pfile)[0] nodule_diameter = 0 data_volume = mt.extract_volumes(data_volume, nodule_diameter=nodule_diameter, scale_augment=AUGMENTATION, translation_augment=AUGMENTATION, rotation_augment=AUGMENTATION) else: data_volume = data_volume.reshape((1, data_volume.shape[0], data_volume.shape[1], data_volume.shape[2])) data_label = np.zeros(shape=(data_volume.shape[0], 2), dtype=float) data_label[:,1-int(isnodule)] = 1 if "data_volumes" not in dir(): data_volumes = data_volume data_labels = data_label else: data_volumes = np.concatenate((data_volumes, data_volume), axis=0) data_labels = np.concatenate((data_labels, data_label), axis=0) train_data = np.zeros(shape=data_volumes.shape, dtype=float) train_label = np.zeros(shape=data_labels.shape, dtype=float) batch_random = np.random.permutation(data_volumes.shape[0]) for bi in range(batch_random.size):