def loadImageAndTarget(sample, augmentation): # Load image img = image.openImage(sample[0], cfg.IM_DIM) # Resize Image img = image.resize(img, cfg.IM_SIZE[0], cfg.IM_SIZE[1], mode=cfg.RESIZE_MODE) # Do image Augmentation if augmentation: img = image.augment(img, cfg.IM_AUGMENTATION, cfg.AUGMENTATION_COUNT, cfg.AUGMENTATION_PROBABILITY) # Prepare image for net input img = image.normalize(img, cfg.ZERO_CENTERED_NORMALIZATION) img = image.prepare(img) # Get target label = sample[1] index = cfg.CLASSES.index(label) target = np.zeros((len(cfg.CLASSES)), dtype='float32') target[index] = 1.0 return img, target
def getSpecBatches(split): # Random Seed random = cfg.getRandomState() # Make predictions for every testfile for t in split: # Spec batch spec_batch = [] # Get specs for file for spec in audio.specsFromFile(t[0], cfg.SAMPLE_RATE, cfg.SPEC_LENGTH, cfg.SPEC_OVERLAP, cfg.SPEC_MINLEN, shape=(cfg.IM_SIZE[1], cfg.IM_SIZE[0]), fmin=cfg.SPEC_FMIN, fmax=cfg.SPEC_FMAX, spec_type=cfg.SPEC_TYPE): # Resize spec spec = image.resize(spec, cfg.IM_SIZE[0], cfg.IM_SIZE[1], mode=cfg.RESIZE_MODE) # Normalize spec spec = image.normalize(spec, cfg.ZERO_CENTERED_NORMALIZATION) # Prepare as input spec = image.prepare(spec) # Add to batch if len(spec_batch) > 0: spec_batch = np.vstack((spec_batch, spec)) else: spec_batch = spec # Batch too large? if spec_batch.shape[0] >= cfg.MAX_SPECS_PER_FILE: break # No specs? if len(spec_batch) == 0: spec = random.normal(0.0, 1.0, (cfg.IM_SIZE[1], cfg.IM_SIZE[0])) spec_batch = image.prepare(spec) # Shuffle spec batch spec_batch = shuffle(spec_batch, random_state=random) # yield batch, labels and filename yield spec_batch[:cfg.MAX_SPECS_PER_FILE], t[1], t[0].split(os.sep)[-1]
def getSpecBatches(split): # Random Seed random = cfg.getRandomState() # Make predictions for every testfile for t in split: # Spec batch spec_batch = [] # Keep track of timestamps pred_start = 0 # Get specs for file for spec in audio.specsFromFile(t[0], cfg.SAMPLE_RATE, cfg.SPEC_LENGTH, cfg.SPEC_OVERLAP, cfg.SPEC_MINLEN, shape=(cfg.IM_SIZE[1], cfg.IM_SIZE[0]), fmin=cfg.SPEC_FMIN, fmax=cfg.SPEC_FMAX): # Resize spec spec = image.resize(spec, cfg.IM_SIZE[0], cfg.IM_SIZE[1], mode=cfg.RESIZE_MODE) # Normalize spec spec = image.normalize(spec, cfg.ZERO_CENTERED_NORMALIZATION) # Prepare as input spec = image.prepare(spec) # Add to batch if len(spec_batch) > 0: spec_batch = np.vstack((spec_batch, spec)) else: spec_batch = spec # Batch too large? if spec_batch.shape[0] >= cfg.MAX_SPECS_PER_FILE: break # Do we have enough specs for a prediction? if len(spec_batch) >= cfg.SPECS_PER_PREDICTION: # Calculate next timestamp pred_end = pred_start + cfg.SPEC_LENGTH + ( (len(spec_batch) - 1) * (cfg.SPEC_LENGTH - cfg.SPEC_OVERLAP)) # Store prediction ts = getTimestamp(int(pred_start), int(pred_end)) # Advance to next timestamp pred_start = pred_end - cfg.SPEC_OVERLAP yield spec_batch, t[1], ts, t[0].split(os.sep)[-1] # Spec batch spec_batch = []
# Check when to switch the weights teacher_index = counter_read / t_sub if teacher_index >= trainingset_size + s_sub: if counter_switch >= t_sub * s_sub: if index_switch < len(weights_names): network.load_state_dict( torch.load(weights_names[index_switch], map_location=args.device)) counter_switch = 0 index_switch += 1 print("Switched for : ", teacher_index) # Process all frames tensor = process.normalize(process.cvToTorch(frame, device)) output = network.forward(tensor.type(torch.float)).squeeze_(0) _, output = output.max(dim=0) output = (1 - output).type(torch.uint8) * 255 output = output.to("cpu").numpy() torch.cuda.empty_cache() # Save the output mask path_mask = save_path + "/predictions_student/mask_" + str( int(counter_read)) + ".png" cv2.imwrite(path_mask, output) counter_read += 1 counter_switch += 1
boxes = predictions.bbox.numpy() # Print all segmented players whose bounding boxes intersect the field mask final = np.zeros(frame[:, :, 0].shape) for mask, label, box in zip(masks, labels, boxes): if label in args.targetclass: covering_area = np.sum(mask[0, :, :] * field_mask) if covering_area == 0: continue final = np.logical_or(final, mask[0, :, :]) final = final.astype("uint8") # Transform the image and the targets into the correct format for pytorch target = torch.from_numpy(1 - final).type( torch.LongTensor).unsqueeze_(0).to("cpu") input_tensor = process.normalize( process.cvToTorch(frame, device)).to("cpu") dataset_images.append(input_tensor) dataset_labels.append(target) # Get a numpy image for saving the results to the save folder final = final * 255 save_path_original = save_path + "/predictions_teacher/original_" + str( counter_save) + ".png" save_path_mask = save_path + "/predictions_teacher/mask_" + str( counter_save) + ".png" save_path_field = save_path + "/predictions_teacher/field_" + str( counter_save) + ".png" counter_save += 1 cv2.imwrite(save_path_original, frame) cv2.imwrite(save_path_mask, final)