def process_video(self, framerate=10, skip=0, preview_image=False, save_path=None): """ Apply image processing pipeline to each frame of the video Arguments: - framerate: frequency at which to extract video frames - skip: number of frames to skip - preview_image: whether to preview the loaded and processed images - save_path: folder to save the results in - the filename will be taken from the video """ for frame in range(self.first_frame + skip, self.last_frame, framerate): image = self.get_frame(frame) image_transparent = self.process_image(image) if preview_image: print(f'Frame {frame} ({round(frame/self.fps, 2)} seconds)') preview(image) preview(image_transparent) if save_path is not None: cv2.imwrite( os.path.join(save_path, f'{self.filename}-{frame}.png'), cv2.cvtColor(image_transparent, cv2.COLOR_RGBA2BGRA))
def get_frame(self, frame, preview_image=False): """ Fetch particular frame from video segment and preprocess it """ self.vidcap.set(cv2.CAP_PROP_POS_FRAMES, frame) _, image = self.vidcap.read() image = self._preprocess_image(image) if preview_image: preview(image) return image
def train(model, dataset, device, modelName, epochs, batch_size=1, saveEachEpoch = False, previewShow = False): """ Trains the model and saves it in the given path input: model- the model that has to be trained of type nn.Module dataset - the dataset on which the model has to be trained of type nn.Dataset. modelName - the destination name string where the model weights are saved. epochs - the number of epochs that the model has to iterate over. batch_size - the batch size of the model saveEachEpoch - to indicate whether to save at the end of each epoch. previewShow - to indicate whether to show the outputs during the training progresses. If enabled, shows output for every 100th input. """ trainloader = data.DataLoader( dataset, batch_size = batch_size, num_workers = 10) colors = cityscapeColors() # loss function criterion = nn.MSELoss() # optimizer variable opt = optim.Adam(model.parameters()) try: for epoch in tqdm(range(epochs)): if saveEachEpoch: torch.save(model, modelName) for i, (inputs, targets) in enumerate(trainloader): inputs = inputs.to(device) targets = targets.to(device) opt.zero_grad() outputs = model(inputs) loss = criterion(outputs, targets) loss.backward() opt.step() if previewShow: if i % 100 == 0: #each 100 iterations show current output preview(inputs, outputs, targets, epoch) except KeyboardInterrupt: pass torch.save(model, modelName)
def image_kmeans(img, K=2, attempts=50, color_space=None, preview_image=False, plot_results=False): # Preprocessing img = img.copy() img = cv2.blur(img, (5, 5)) if color_space is not None: img = cv2.cvtColor(img, color_space) img_flat = np.float32(img.reshape((-1, 3))) # K means _, labels, center = cv2.kmeans( img_flat, K, None, (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0), attempts, cv2.KMEANS_RANDOM_CENTERS) center = np.uint8(center) # View segmented image if preview_image: res = center[labels.flatten()] result_img = res.reshape((img.shape)) preview(result_img) # View k-means centroids in 3D if plot_results: fig = plt.figure() ax = Axes3D(fig) ax.scatter(center.T[0], center.T[1], center.T[2], c=center / 255, s=200, alpha=1) for i in range(center.shape[0]): ax.text(center.T[0, i], center.T[1, i], center.T[2, i], i, fontsize=14) ax.set_xlim(0, 256) ax.set_ylim(0, 256) ax.set_zlim(0, 256) plt.show() return labels
def pwy(settings): show_diagnostics = True; settings = utils.process_args(settings, show_diagnostics) # OPEN FILE settings['filename'] = UPLOAD_FOLDER+'/'+settings['filename'] UI_instrument_notes = settings['inst1']; UI_onset_threshold = settings['busy']; UI_instrument_chords = settings['inst2']; UI_dynamic_threshold = settings['dyn']; UI_instrument_beats = settings['inst3']; UI_beat_windowSize = settings['window']; #300 msec UI_beat_pattern = settings['pattern']; UI_chord_style = settings['style']; UI_time_signature = settings['timeSig']; y, sr = librosa.load(settings['filename']) # TRACK BEATS onsets, beats, volume_notes, times, tempo, msec_tempo = beatDetection.track_beats(y, sr, UI_onset_threshold, UI_dynamic_threshold, UI_beat_windowSize) #beatDetection.plot_beats_and_onsets(onsets, beats, times, show_diagnostics) //Breaks GUI # PREDICT CHORDS notes, reg_notes, startTimes_notes, endTimes_notes, frameIndex_notes = chordPrediction.get_chords(settings['filename'], times[beats], times) chords, reg_chords, startTimes_chords, endTimes_chords, frameIndex_chords, volume_chords = midiConversion.determine_durations(list(notes), list(reg_notes), list(startTimes_notes), list(endTimes_notes), frameIndex_notes, list(volume_notes)) chordPrediction.print_chords_and_times(chords, startTimes_chords, endTimes_chords, frameIndex_chords, times, show_diagnostics) startTimes_beats, endTimes_beats, volume_beats = beatDetection.alter_beats(startTimes_notes, endTimes_notes, volume_notes, msec_tempo, UI_beat_windowSize, settings['speed']) # NOTES TO MIDI midi_notes = midiConversion.convert_note_to_midi(notes, reg_notes) midi_chords = midiConversion.convert_chord_to_midi(chords, reg_chords, UI_chord_style) midi_beats = midiConversion.convert_beat_to_midi(notes, UI_beat_pattern, UI_time_signature, UI_instrument_beats, reg_notes, settings['speed']) # WRITE MIDI midi_tracks = [midi_notes, midi_chords, midi_beats] startTimes = [startTimes_notes, startTimes_chords, startTimes_beats] endTimes = [endTimes_notes, endTimes_chords, endTimes_beats] UI_instrument = [UI_instrument_notes, UI_instrument_chords, UI_instrument_beats] volumes = [volume_notes, volume_chords, volume_beats] duration = [0]*len(midi_tracks); program = [0]*len(midi_tracks); volume = [0]*len(midi_tracks); for i in range(len(midi_tracks)): duration[i], program[i], volume[i] = midiFileCreation.build_track(UI_instrument[i], midi_tracks[i], startTimes[i], endTimes[i], volumes[i], msec_tempo, UI_dynamic_threshold) midiFileCreation.write_midi_file(settings['filename'], midi_tracks, program, duration, tempo[0], volume) utils.preview(ntpath.basename(settings['filename']), UPLOAD_FOLDER) return render_template('download.html', filename=ntpath.basename(settings['filename'][:-4]), path=UPLOAD_FOLDER)
def draw_bbox(self, image): """ Draw bounding box on input image """ image = image.copy() kwargs = {'img': image, 'color': (255, 0, 0), 'thickness': 5} cv2.line(pt1=(self.bbox['xmin'], self.bbox['ymin']), pt2=(self.bbox['xmin'], self.bbox['ymax']), **kwargs) cv2.line(pt1=(self.bbox['xmin'], self.bbox['ymin']), pt2=(self.bbox['xmax'], self.bbox['ymin']), **kwargs) cv2.line(pt1=(self.bbox['xmax'], self.bbox['ymax']), pt2=(self.bbox['xmin'], self.bbox['ymax']), **kwargs) cv2.line(pt1=(self.bbox['xmax'], self.bbox['ymax']), pt2=(self.bbox['xmax'], self.bbox['ymin']), **kwargs) preview(image)
# NOTES TO MIDI midi_notes = midiConversion.convert_note_to_midi(notes, reg_notes) midi_chords = midiConversion.convert_chord_to_midi( chords, reg_chords, UI_chord_style) midi_beats = midiConversion.convert_beat_to_midi( notes, UI_beat_pattern, UI_time_signature, UI_instrument_beats, reg_notes, settings['speed']) # WRITE MIDI midi_tracks = [midi_notes, midi_chords, midi_beats] startTimes = [startTimes_notes, startTimes_chords, startTimes_beats] endTimes = [endTimes_notes, endTimes_chords, endTimes_beats] UI_instrument = [ UI_instrument_notes, UI_instrument_chords, UI_instrument_beats ] volumes = [volume_notes, volume_chords, volume_beats] duration = [0] * len(midi_tracks) program = [0] * len(midi_tracks) volume = [0] * len(midi_tracks) for i in range(len(midi_tracks)): duration[i], program[i], volume[i] = midiFileCreation.build_track( UI_instrument[i], midi_tracks[i], startTimes[i], endTimes[i], volumes[i], msec_tempo, UI_dynamic_threshold) midiFileCreation.write_midi_file(settings['filename'], midi_tracks, program, duration, tempo[0], volume) # PREVIEW utils.preview(filename=settings['filename'], length=settings['preview']) utils.clean(filename=settings['filename'])