Beispiel #1
0
class CPPN:
    def __init__(self,
                 height,
                 width,
                 n_inputs,
                 n_hidden,
                 n_outputs,
                 non_linearity,
                 device,
                 webcam=False):
        self.height = height
        self.width = width

        self.n_inputs = n_inputs
        self.n_hidden = n_hidden
        self.n_outputs = n_outputs
        self.non_linearity = non_linearity
        self.device = device

        self.network = Network(n_inputs, n_hidden, n_outputs, non_linearity)
        self.network.to(device)

        self.network_new = Network(n_inputs, n_hidden, n_outputs,
                                   non_linearity)
        self.network_new.to(device)

        self.sound = Sound()

        self.visualisation_input = self._create_visualisation_tensor()

        if webcam:
            self.webcam = Webcam()
            self.webcam.start()

    def _create_visualisation_tensor(self):
        visualisation_input = np.zeros(
            (self.height, self.width, self.n_inputs))

        for i in range(self.height):
            for j in range(self.width):
                visualisation_input[i, j] = [
                    i / float(self.height), j / float(self.width)
                ] + [0] * (self.n_inputs - 2)

        visualisation_input = torch.tensor(visualisation_input.reshape(
            -1, self.n_inputs),
                                           dtype=torch.float,
                                           device=self.device)

        return visualisation_input

    # def _create_kaleidoscope_visualisation_tensor(self):
    #     visualisation_input = np.zeros((self.height, self.width, self.n_inputs))
    #
    #     for i in range(self.height):
    #         for j in range(self.width):
    #             visualisation_input[i, j] = [i/float(self.height),j/float(self.width)] + [0]*(self.n_inputs-2)
    #
    #     visualisation_input = torch.tensor(visualisation_input.reshape(-1, self.n_inputs), dtype=torch.float, device=self.device)
    #
    #     return visualisation_input

    def _visualise_np(self, bands=None):
        with torch.no_grad():
            if bands is not None:
                o = torch.tensor(bands, dtype=torch.float).repeat(
                    self.visualisation_input.size(0), 1)

                self.visualisation_input[:, 2:] = o

            im = self.network(self.visualisation_input).reshape(
                self.width, self.height, self.n_outputs)

            return im.detach().cpu().numpy()

    def _visualise(self, name, save_im=False):
        with torch.no_grad():
            im = self.network(self.visualisation_input).reshape(
                self.width, self.height, 3).detach().cpu().numpy() * 255

            if save_im:
                cv2.imwrite(name + ".png", im)

            return im

    def start(self):
        cv2.namedWindow("CPPN", cv2.WINDOW_NORMAL)
        cv2.setWindowProperty("CPPN", cv2.WND_PROP_FULLSCREEN,
                              cv2.WINDOW_FULLSCREEN)

        audio_generator = self.sound.read_audio()

        t = time.time()
        interpolating = 0
        training = 0
        generator = None

        while True:
            if time.time() - t > 20. and not training and not interpolating:
                choice = np.random.choice([1])

                if choice == 0:
                    training = 1
                    target_images = glob.glob("target_images/*")
                    choice = np.random.choice(target_images)
                    image = cv2.imread(choice)
                    image = ToTensor()(image).float().to(self.device)

                    generator = self.train(image,
                                           self.height,
                                           self.width,
                                           0.001,
                                           save_im=False,
                                           image_folder="formation1",
                                           network_name="Santa",
                                           epochs=100,
                                           live=True,
                                           verbose=True)
                elif choice == 1:
                    self._random_network()
                    generator = self.interpolate(60)
                    interpolating = 1
                elif choice == 2:
                    print("webcam")
                    training = 1
                    image = self.webcam.read()

                    image = ToTensor()(image).float().to(self.device)

                    generator = self.train(image,
                                           self.height,
                                           self.width,
                                           0.001,
                                           save_im=False,
                                           image_folder="formation1",
                                           network_name="Santa",
                                           epochs=100,
                                           live=True,
                                           verbose=True)

                if choice == 0:
                    self.sound.sin = self.sound.sin == False
                    self.sound.amplitudes = np.abs(np.random.rand(8))
                    self.sound.frequencies = np.abs(np.random.rand(8))
                elif choice == 1:
                    self.sound.calculate_bands = self.sound.calculate_bands == False

                if self.sound.calculate_bands == False and self.sound.sin == False:
                    if np.random.rand() < 0.5:
                        self.sound.calculate_bands = True
                    else:
                        self.sound.sin = True

                t = time.time()

            if interpolating:
                try:
                    self.network = next(generator)
                except StopIteration:
                    interpolating = 0

            if training:
                try:
                    self.network = next(generator)
                except StopIteration:
                    training = 0
                    torch.cuda.empty_cache()

            frame = self._visualise_np(next(audio_generator))
            cv2.imshow(
                "CPPN",
                cv2.copyMakeBorder(frame, 0, 0, 0, 0, cv2.BORDER_CONSTANT, 0))

            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

        cv2.destroyAllWindows()
        self.sound.stream.stop_stream()
        self.sound.stream.close()
        self.sound.pa.terminate()

    def _resize_image(self, image, width, height):
        image = F.interpolate(image.unsqueeze(0), size=(width, height))
        image = torch.squeeze(image, 0)
        image = image.permute(1, 2, 0)

        return image

    def _random_network(self):
        self.network_new = Network(self.n_inputs, self.n_hidden,
                                   self.n_outputs, self.non_linearity)
        self.network_new.to(self.device)

    def train(self,
              image,
              width,
              height,
              loss_threshold,
              save_im=False,
              image_folder="",
              save_network=True,
              network_name="",
              epochs=1,
              live=False,
              verbose=False):
        image = self._resize_image(image, width, height)

        optimiser = torch.optim.Adam(self.network.parameters())
        criterion = torch.nn.MSELoss()

        previous_loss = None

        network = copy.copy(self.network)

        for i in range(epochs):
            loss = criterion(
                network(self.visualisation_input).reshape(width, height, 3),
                image)

            optimiser.zero_grad()
            loss.backward()

            optimiser.step()
            if verbose:
                print(i, loss)
            if save_im:
                if previous_loss is None or previous_loss - loss_threshold > loss:
                    previous_loss = loss
                    self._visualise(
                        "{}/image{:06d}.png".format(image_folder, i), save_im)
            if live:
                yield network

        if save_network:
            torch.save(self.network.state_dict(),
                       'trained_networks/network{}.pt'.format(network_name))

        torch.cuda.empty_cache()

    def load(self, network_name, new=False):
        if new:
            self.network_new.load_state_dict(
                torch.load("trained_networks/network" + network_name + ".pt"))
            self.network_new.to(self.device)
        else:
            self.network.load_state_dict(
                torch.load("trained_networks/network" + network_name + ".pt"))
            self.network.to(self.device)

    def interpolate(self, num_interpolation_frames, space="beta", beta=2.0):
        """
        beta parameter alters the steepness of the function
        """
        weights1 = []
        biases1 = []

        weights2 = []
        biases2 = []

        for idx, layer in enumerate(self.network.module_list):
            weights1.append(layer.weight.data)
            biases1.append(layer.bias.data)
            weights2.append(self.network_new.module_list[idx].weight.data)
            biases2.append(self.network_new.module_list[idx].bias.data)

        frame_distribution = frame_distribution = np.linspace(
            0, 1, num=num_interpolation_frames)

        if space != "lin":
            f = lambda x: 1 / (1 + np.power(x / (1 + np.finfo(float).eps - x),
                                            -beta))
            frame_distribution = f(frame_distribution)

        weights = []
        for weight_pair in zip(weights1, weights2):
            difference = weight_pair[0] - weight_pair[1]
            new_weights = torch.zeros(
                num_interpolation_frames,
                list(weight_pair[0].size())[0],
                list(weight_pair[0].size())[1]).float().to(self.device)
            for idx, i in enumerate(frame_distribution):
                new_weights[idx] = weight_pair[0] - i * difference
            weights.append(new_weights)

        biases = []
        for bias_pair in zip(biases1, biases2):
            difference = bias_pair[0] - bias_pair[1]
            new_biases = torch.zeros(num_interpolation_frames,
                                     list(bias_pair[0].size())[0]).float().to(
                                         self.device)
            for idx, i in enumerate(frame_distribution):
                new_biases[idx] = bias_pair[0] - i * difference
            biases.append(new_biases)

        network = copy.deepcopy(self.network)

        for i in range(num_interpolation_frames):
            for idx, layer in enumerate(network.module_list):
                network.module_list[idx].weight.data = weights[idx][i]
                network.module_list[idx].bias.data = biases[idx][i]

            yield network

    def random_walk(self):
        pass

    def animate(self):
        pass

    def alzheimer(self):
        pass
def main():
    try:

        camera_type = sys.argv[1]
        recording = False

        if len(sys.argv) == 3:
            if sys.argv[2] == "record":
                recording = True
            else:
                recording = False

        if camera_type == "webcam":

            collector = Webcam(video_width=640, video_height=480)
            collector.start()

        else:
            print("No such camera {camera_type}")
            collector = None
            exit(-1)

        if not os.path.isfile(MODEL_PATH):
            print("Downloading model, please wait...")
            download_file_from_google_drive(SOURCE, MODEL_PATH)
            print("Done downloading the model.")

        # get device
        device = torch.device(
            'cuda') if torch.cuda.is_available() else torch.device('cpu')
        # initialise model
        model = get_model_instance_segmentation(NUMBER_OF_CLASSES)
        model.load_state_dict(
            torch.load('./models/frcnn_hands.pth', map_location=device))
        model.to(device)
        model.eval()

        if recording:
            movie = cv2.VideoWriter(
                f'./recordings/hand_frcnn_{camera_type}.avi',
                cv2.VideoWriter_fourcc(*'DIVX'), 8, (640, 480))

        with torch.no_grad():

            while collector.started:

                image, _ = collector.read()

                if image is not None:

                    orig = image.copy()

                    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                    image = transforms.ToTensor()(image).to(device)

                    out = model([image])

                    boxes = get_prediction(pred=out, threshold=.7)

                    try:

                        for box in boxes:
                            cv2.rectangle(img=orig,
                                          pt1=(box[0], box[1]),
                                          pt2=(box[2], box[3]),
                                          color=(0, 255, 255),
                                          thickness=2)

                        if recording:
                            movie.write(orig)

                        cv2.imshow("mask", orig)
                        k = cv2.waitKey(1)

                        if k == ord('q'):
                            collector.stop()

                    except Exception as e:
                        print(e)

    finally:
        print("Stopping stream.")
        collector.stop()
        if recording:
            movie.release()
        cv2.destroyAllWindows()
Beispiel #3
0
class Studio(QMainWindow, Ui_MainWindow):
    def __init__(self, *args, **kwargs):
        super(Studio, self).__init__(*args, **kwargs)
        self.setupUi(self)

        # Device
        self.device_default = 0
        self.device = self.device_default

        # Webcam
        self.webcam = Webcam()

        # Image
        self.image_dir = 'outputs'
        self.image_ext = 'jpg'
        self.num_images_max_default = 10
        self.num_images_max = self.num_images_max_default
        self.num_images = 0

        self.saved_width_default = 416  # In pixel
        self.saved_height_default = 416
        self.saved_width = self.saved_width_default
        self.saved_height = self.saved_height_default

        self.flip_image = False
        self.cb_flip_image.stateChanged.connect(self.change_flip_image)

        # Filename prefix
        self.filename_prefix = 'class_memo'

        # Recording flag
        self.is_recording = False

        # Timer
        self.timer_is_on = False
        self.timer_duration = 500  # msec
        self.timer = QTimer(self)
        self.timer.timeout.connect(self.process_image)

        # Plot min/max
        self.plot_min = 0.0
        self.plot_max = -1.0

        # Initialize
        self.initialize()

    def open_webcam(self):

        # Release the resource which had been used.
        if self.webcam.is_open():
            self.webcam.release()

        self.webcam.open(self.device)
        self.process_image()

        # Show message
        self.show_message('webcam is opened.')

        # Start the timer
        if not self.timer_is_on:
            self.start_timer()

    def start_timer(self):
        self.timer_is_on = True
        self.timer.start(self.timer_duration)

    def stop_timer(self):
        self.timer_is_on = False
        self.timer.stop()

    def change_flip_image(self):

        if self.cb_flip_image.isChecked():
            self.flip_image = True
        else:
            self.flip_image = False

    def start_recording(self):

        self.is_recording = True
        self.num_images = 0
        self.show_message('recording frames.')

    def finish_recording(self):

        self.is_recording = False
        self.show_message('recording is finished.')

    def show_message(self, msg):
        text = 'Status: ' + msg
        self.lb_status.setText(text)

    def show_num_images(self):

        text = '{}/{}'.format(self.num_images, self.num_images_max)
        self.lb_num_images.setText(text)

    def get_image_path(self, n):

        str_num = to_str_digits(n, num_digits=5)
        filename = self.filename_prefix + '_' + str_num + '.' + self.image_ext

        path = os.path.join(self.image_dir, filename)

        return path

    def save_image(self):
        # Save the image.

        self.num_images += 1

        if self.num_images <= self.num_images_max:
            image_path = self.get_image_path(self.num_images)
            frame = self.webcam.get_frame()
            image = Image.fromarray(frame)
            size = (self.saved_width, self.saved_height)
            image = make_square(image)

            image = image.resize(size)
            image.save(image_path)

        else:
            self.num_images = self.num_images_max
            self.finish_recording()

        # Show the number of images
        self.show_num_images()

    def process_image(self):

        if self.webcam.is_open():

            # Show frame
            frame = self.webcam.read()
            image = QImage(frame.data, frame.shape[1], frame.shape[0],
                           QImage.Format_RGB888)

            # Flip the image horizontally
            image_flipped = image.mirrored(True, False)

            if self.flip_image:
                pixmap = QPixmap.fromImage(image_flipped)
            else:
                pixmap = QPixmap.fromImage(image)

            self.lb_image.setPixmap(pixmap)

            # Record frame
            if self.is_recording:
                self.save_image()

    def initialize(self):

        # Connect the signal and slot
        self.cb_device.activated[str].connect(self.set_device)
        self.edit_num_images_max.textChanged.connect(self.set_num_images_max)
        self.edit_saved_width.textChanged.connect(self.set_saved_width)
        self.edit_saved_height.textChanged.connect(self.set_saved_height)
        self.edit_filename_prefix.textChanged.connect(self.set_filename_prefix)

        self.btn_open.clicked.connect(self.open_webcam)
        self.btn_record.clicked.connect(self.start_recording)

        # UI
        text = str(self.num_images_max)
        self.edit_num_images_max.setText(text)

        text = str(self.saved_width)
        self.edit_saved_width.setText(text)

        text = str(self.saved_height)
        self.edit_saved_height.setText(text)

        text = str(self.filename_prefix)
        self.edit_filename_prefix.setText(text)

    def set_device(self):

        value = self.cb_device.currentIndex()

        try:
            value = int(value)
        except:
            value = self.device_default

        self.device = value

    def set_num_images_max(self):

        value = self.edit_num_images_max.text()

        try:
            value = int(value)
        except:
            value = self.num_images_max_default

        self.num_images_max = value

    def set_saved_width(self):

        value = self.edit_saved_width.text()

        try:
            value = int(value)
        except:
            value = self.saved_width_default

        self.saved_width = value

    def set_saved_height(self):

        value = self.edit_saved_height.text()

        try:
            value = int(value)
        except:
            value = self.edit_saved_height_default

        self.saved_height = value

    def set_filename_prefix(self):

        value = self.edit_filename_prefix.text()
        self.filename_prefix = value

    def add_widget(self, widget):

        widget.setParent(self.central_widget)
        self.view_layout.addWidget(widget)

    def remove_widget(self, widget):

        self.view_layout.removeWidget(widget)
        widget.setParent(None)

    def refresh_view(self):

        text = 'Remianing time: {} (sec)'.format(self.num_images)
        self.lb_num_images.setText(text)

    def closeEvent(self, event):

        self.webcam.release()