Ejemplo n.º 1
0
    def __init__(self, cuda: bool):
        """
        Loads the needed to run once for transforming frames with __call__ \n
        E.g.
            mm = MakeMask(True) \n
            new_image = mm(old_image)

        :param cuda: should the process occur on Nvidia GPU?
        """

        self.cuda = cuda
        self.model = hub_load('pytorch/vision', 'deeplabv3_resnet101', pretrained=True)
        self.people_class = 15

        self.model.eval()
        print("Model Loaded")

        self.blur = FloatTensor([[[[1.0, 2.0, 1.0], [2.0, 4.0, 2.0], [1.0, 2.0, 1.0]]]]) / 16.0

        # move the input and model to GPU for speed if available ?
        if self.cuda and cuda_available():
            print("Using GPU (CUDA) to process the images")
            self.model.to('cuda')
            self.blur = self.blur.to('cuda')

        self.preprocess = Compose(
            [Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ])
Ejemplo n.º 2
0
    def get_default_config(cuda=False):
        if cuda and not cuda_available():
            raise RuntimeError("CUDA not available")

        saurongaze_path = Path(__file__).resolve().parent

        dlib_model_dir = Path('~/.saurongaze/dlib/').expanduser()
        dlib_model_dir.mkdir(exist_ok=True, parents=True)
        dlib_model_path = dlib_model_dir / 'shape_predictor_68_face_landmarks.dat'

        model_dir = Path('~/.saurongaze/models/').expanduser()
        model_dir.mkdir(exist_ok=True, parents=True)
        model_path = model_dir / 'mpiigaze_resnet_preact.pth'    

        return NestedNamespace({
            "demo": {
                "display_on_screen": True,
                "gaze_visualization_length": 0.05,
                "head_pose_axis_length": 0.05,
                "image_path": None,
                "output_dir": None,
                "output_file_extension": "avi",
                "show_bbox": True,
                "show_head_pose": True,
                "show_landmarks": False,
                "show_normalized_image": False,
                "show_template_model": False,
                "use_camera": True,
                "video_path": None,
                "wait_time": 1,
            },
            "device": "cuda" if cuda else "cpu",
            "face_detector": {
                "dlib": {
                    "model": str(dlib_model_path),
                }, 
                "mode": "dlib",
            },
            "gaze_estimator": {
                "camera_params": str(saurongaze_path / "ptgaze" / "data" / "calib" / "sample_params.yaml"),
                "checkpoint": str(model_path),
                "normalized_camera_distance": 0.6,
                "normalized_camera_params": str(saurongaze_path / "ptgaze" / "data" / "calib" / "normalized_camera_params_eye.yaml"),
            },
            "mode": "MPIIGaze",
            "model": {
                "backbone": {
                    "name": "resnet_simple",
                    "pretrained": "resnet18",
                    "resnet_block": "basic",
                    "resnet_layers": [2, 2, 2],
                },
                "name": "resnet_preact",
            },
            "transform": {
                "mpiifacegaze_face_size": 224,
                "mpiifacegaze_gray": False,
            },
        })
Ejemplo n.º 3
0
    def __call__(self, img: ndarray) -> ndarray:
        """
        Transform a given frame to a black and white one, representing a mask for editors \n
        E.g.
            mm = MakeMask(True) \n
            new_image = mm(old_image)
        """
        frame_data = FloatTensor(img) / 255.0

        input_tensor = self.preprocess(frame_data.permute(2, 0, 1))
        input_batch = input_tensor.unsqueeze(0)  # create a mini-batch as expected by the model

        # move the input and model to GPU for speed if available ?
        if self.cuda and cuda_available():
            input_batch = input_batch.to('cuda')

        with no_grad():
            output = self.model(input_batch)['out'][0]

        segmentation = output.argmax(0)

        bgout = output[0:1][:][:]
        a = (1.0 - relu(tanh(bgout * 0.30 - 1.0))).pow(0.5) * 2.0

        people = segmentation.eq(ones_like(segmentation).long().fill_(self.people_class)).float()

        people.unsqueeze_(0).unsqueeze_(0)

        for i in range(3):
            people = conv2d(people, self.blur, stride=1, padding=1)

        # combined_mask = tnf.hardtanh(a * b)
        combined_mask = relu(hardtanh(a * (people.squeeze().pow(1.5))))
        combined_mask = combined_mask.expand(1, 3, -1, -1)

        newimg = (combined_mask * 255.0).cpu().squeeze().byte().permute(1, 2, 0).numpy()

        return newimg
Ejemplo n.º 4
0
import torch
from data import *
from model import *
import random
import time
import math
from tqdm import tqdm
from torch.cuda import is_available as cuda_available
import matplotlib.pyplot as plt
import torch.optim as optim

use_gpu = True
cuda = use_gpu and cuda_available()
filename = './data/combined_technology_news_stocks.csv'
from torch.cuda import is_available as cuda_available

data = Data(filename, cuda)
n_letters = data.n_letters
n_categories = data.n_categories
n_hidden = 144

def categoryFromOutput(output):
    top_n, top_i = output.data.topk(1) # Tensor out of Variable with .data
    category_i = top_i[0][0]
    return category_i

def randomChoice(l):
    return l[random.randint(0, len(l) - 1)]

def randomTrainingPair():                                                                                                               
    category = randomChoice(all_categories)
 def __init__(self, mode: str, **kwargs):
     if mode not in ['train', 'inference']:
         warn(
             "Invalid argument mode, expect 'train' or 'inference' but got '%s'"
             % mode)
     self.mode = mode
     self.enable_grad = mode == 'train'
     self.init_time = timestr('%Y%m%d.%H%M%S')
     for key, value in kwargs.items():
         if hasattr(self, key):
             setattr(self, key, value)
         else:
             warn("{} has no attribute {}:{}".format(
                 type(self), key, value))
     # data config
     assert os.path.isfile(
         self.classes_path), "%s is not a valid file" % self.classes_path
     self.classes = []
     with open(self.classes_path, "r") as f:
         for cls in f.readlines():
             self.classes.append(cls.strip())
     self.num_classes = len(self.classes)
     # efficiency config
     if self.use_gpu:
         from torch.cuda import is_available as cuda_available, device_count
         if cuda_available():
             self.num_gpu = device_count()
             self.gpu_list = list(range(self.num_gpu))
             assert self.batch_size % self.num_gpu == 0, \
                 "Can't split a batch of data with batch_size {} averagely into {} gpu(s)" \
                     .format(self.batch_size, self.num_gpu)
         else:
             warn(
                 "Can't find available cuda devices, use_gpu will be automatically set to False."
             )
             self.use_gpu = False
             self.num_gpu = 0
             self.gpu_list = []
     else:
         from torch.cuda import is_available as cuda_available
         if cuda_available():
             warn(
                 "Available cuda devices were found, please switch use_gpu to True for acceleration."
             )
         self.num_gpu = 0
         self.gpu_list = []
     if self.use_gpu:
         self.map_location = lambda storage, loc: storage
     else:
         self.map_location = "cpu"
     # weight S/L config
     self.vis_env_path = os.path.join(self.log_root, 'visdom')
     os.makedirs(os.path.dirname(self.weight_save_path), exist_ok=True)
     os.makedirs(self.log_root, exist_ok=True)
     os.makedirs(self.vis_env_path, exist_ok=True)
     assert os.path.isdir(self.log_root)
     self.temp_weight_path = os.path.join(
         self.log_root, 'tmpmodel{}.pth'.format(self.init_time))
     self.temp_optim_path = os.path.join(
         self.log_root, 'tmp{}{}.pth'.format(self.optimizer,
                                             self.init_time))
     self.log_file = os.path.join(
         self.log_root, '{}.{}.log'.format(self.mode, self.init_time))
     self.val_result = os.path.join(
         self.log_root, 'validation_result{}.txt'.format(self.init_time))
     self.train_record_file = os.path.join(self.log_root,
                                           'train.record.jsonlist')
     self.debug_flag_file = os.path.abspath(self.debug_flag_file)
     """
    record training process by core.make_checkpoint() with corresponding arguments of
    [epoch, start time, elapsed time, loss value, train accuracy, validate accuracy]
    DO NOT CHANGE IT unless you know what you're doing!!!
    """
     self.__record_fields__ = [
         'init', 'epoch', 'start', 'elapsed', 'loss', 'train_acc', 'val_acc'
     ]
     if len(self.__record_fields__) == 0:
         warn(
             '{}.__record_fields__ is empty, this may cause unknown issues when save checkpoint into {}' \
                 .format(type(self), self.train_record_file))
         self.__record_dict__ = '{{}}'
     else:
         self.__record_dict__ = '{{'
         for field in self.__record_fields__:
             self.__record_dict__ += '"{}":"{{}}",'.format(field)
         self.__record_dict__ = self.__record_dict__[:-1] + '}}'
     # module config
     if isinstance(self.image_resize, int):
         self.image_resize = [self.image_resize, self.image_resize]
     self.loss_type = self.loss_type.lower()
     assert self.loss_type in [
         "mse", "cross_entropy", "crossentropy", "cross", "ce"
     ]
     self.optimizer = self.optimizer.lower()
     assert self.optimizer in ["sgd", "adam"]
Ejemplo n.º 6
0
    def __init__(self, **kwargs):
        self.init_time = timestr('%Y%m%d.%H%M%S')
        # Parse kwargs
        for key, value in kwargs.items():
            if hasattr(self, key):
                setattr(self, key, value)
            else:
                warn("{} has no attribute {}:{}".format(type(self), key, value))

        if self.mode not in ['train', 'inference']:
            warn("Invalid argument mode, expect 'train' or 'inference' but got '%s'" % self.mode)
        self.enable_grad = self.mode == 'train'

        # efficiency config
        if self.use_gpu:
            from torch.cuda import is_available as cuda_available, device_count
            if cuda_available():
                self.num_gpu = device_count()
                self.gpu_list = list(range(self.num_gpu))
                assert self.batch_size % self.num_gpu == 0, \
                    "Can't split a batch of data with batch_size {} averagely into {} gpu(s)" \
                        .format(self.batch_size, self.num_gpu)
            else:
                warn("Can't find available cuda devices, use_gpu will be automatically set to False.")
                self.use_gpu = False
                self.num_gpu = 0
                self.gpu_list = []
        else:
            from torch.cuda import is_available as cuda_available
            if cuda_available():
                warn("Available cuda devices were found, please switch use_gpu to True for acceleration.")
            self.num_gpu = 0
            self.gpu_list = []
        if self.use_gpu:
            self.map_location = lambda storage, loc: storage
        else:
            self.map_location = "cpu"

        # weight S/L config
        self.vis_env_path = os.path.join(self.log_root, 'visdom')
        os.makedirs(os.path.dirname(self.weight_save_path), exist_ok=True)
        os.makedirs(self.log_root, exist_ok=True)
        os.makedirs(self.vis_env_path, exist_ok=True)
        assert os.path.isdir(self.log_root)
        self.temp_ckpt_path = os.path.join(self.log_root, 'ckpt-{time}.pth'.format(time=self.init_time))
        self.log_file = os.path.join(self.log_root, '{}.{}.log'.format(self.mode, self.init_time))
        self.val_result = os.path.join(self.log_root, 'validation_result{}.txt'.format(self.init_time))
        self.train_record_file = os.path.join(self.log_root, 'train.record.jsons')
        """
       record training process by core.make_checkpoint() with corresponding arguments of
       [epoch, start time, elapsed time, loss value, train accuracy, validate accuracy]
       DO NOT CHANGE IT unless you know what you're doing!!!
       """
        self.__record_fields__ = ['epoch', 'start', 'elapsed', 'loss', 'train_score', 'val_score']
        if len(self.__record_fields__) == 0:
            warn(
                '{}.__record_fields__ is empty, this may cause unknown issues when save checkpoint into {}' \
                    .format(type(self), self.train_record_file))
            self.__record_dict__ = '{{}}'
        else:
            self.__record_dict__ = '{{'
            for field in self.__record_fields__:
                self.__record_dict__ += '"{}":"{{}}",'.format(field)
            self.__record_dict__ = self.__record_dict__[:-1] + '}}'

        # visualize config
        if self.visual_engine in ["visdom", "vis"]:
            self.port = 8097 if self.port is None else self.port
        elif self.visual_engine in ["tensorboardx", "tensorboard", "tb"]:
            self.port = 6006 if self.port is None else self.port
        else:
            raise RuntimeError("Invalid parameter value of visual_engine :", self.visual_engine)
Ejemplo n.º 7
0
def get_args() -> Namespace:
    """
    Setups up all the relavant command line arguments, and returns the parsed
    arguments.
    """
    parser = ArgumentParser("SpeedRunnersAI running configuration.")

    # File args
    parser.add_argument(
        "-x", "--experiment_path", type=str,
        help="the path to save experiment results and models"
    )
    parser.add_argument(
        "--load_path", type=str,
        help="the path of the saved model to load"
    )
    
    # Environment Arguments
    parser.add_argument(
        "--episode_length", type=int, default=120,
        metavar="NUM_SECONDS", help="the duration of each episode in seconds"
    )
    parser.add_argument(
        "--action_delay", type=float, default=0.2,
        help="the time in seconds between each agent action"
    )
    parser.add_argument(
        "--state_size", type=int, nargs=2, default=(64, 64),
        metavar=("HEIGHT", "WIDTH"),
        help="the size of each state in (height width)"
    )
    parser.add_argument(
        "--rgb", action="store_true",
        help="if the images should be rgb instead of grayscale, default false"
    )
    parser.add_argument(
        "--stacked_frames", type=int, default=4, metavar="NUM_FRAMES",
        help="the number of frames in a row for each model input"
    )
    parser.add_argument(
        "--read_memory", action="store_true",
        help="if this process should attach to the game and read memory from "
            + "it, default false"
    )
    parser.add_argument(
        "--window_size", type=int, nargs=4, default=None,
        metavar=("LEFT", "TOP", "RIGHT", "BOTTOM"),
        help="the region of the screen the game is in, in "
            + "(left top right bottom), by default your entire screen"
    )

    # Model parameter arguments
    parser.add_argument(
		"--device", type=torch.device,
        default="cuda" if cuda_available() else "cpu",
		help="the device tensors should be stored on, if not given, will use "
            + "cuda if available"
	)
    parser.add_argument(
        "--hidden_size", type=int, default=512,
        help="the size of each hidden layer"
    )
    parser.add_argument(
        "--num_layers", type=int, default=2,
        help="the number of layers before the output layers"
    )

    # Algorithm arguments
    parser.add_argument(
        "--exploration", choices=["rnd"],
        help="The type of exploration to use [rnd]"
    )
    parser.add_argument(
		"--discount", type=float, default=0.995,
		help="the next state reward discount factor"
	)
    parser.add_argument(
        "--polyak", type=float, default=5e-3,
        help="the polyak constant for the target network updates"
    )
    parser.add_argument(
		"--n_quantiles", type=float, default=64,
		help="the number of quantile samples for IQN"
	)
    parser.add_argument(
		"--embedding_dim", type=float, default=64,
		help="the dimension of the quantile distribution for IQN"
	)
    parser.add_argument(
		"--huber_threshold", type=float, default=1,
		help="the threshhold of the huber loss (kappa) for IQN"
	)
    parser.add_argument(
        "--target_update_interval", type=float, default=1,
        help="the number of training steps in-between target network updates"
    )
    parser.add_argument(
		"--lr", type=float, default=3e-4, help="the learning rate"
	)

    # Training/Playing arguments
    parser.add_argument(
		"--episodes", type=int, default=10000,
		help="the number of episodes to play for if playing"
	)
    parser.add_argument(
		"--batch_size", type=int, default=256,
		help="the batch size of the training set"
	)
    parser.add_argument(
		"--start_size", type=int, default=1024,
		help="the size of the replay buffer before training"
	)
    parser.add_argument(
        "--training_steps", type=int, default=200000000,
        help="the number of training steps to train for"
    )
    parser.add_argument(
		"--save_interval", type=int, default=5000,
		help="the number of batches in between saves"
	)

    # Agent arguments
    parser.add_argument(
		"--decay", type=float, default=0.99,
		help="the gamma decay for the target Q-values"
	)
    parser.add_argument(
		"--n_steps", type=int, default=20,
		help="the number of decay steps"
	)
    parser.add_argument(
        "--silent", action="store_true",
        help="will run without standard output from agents"
    )

    # Experience Replay args
    parser.add_argument(
		"--er_capacity", type=float, default=7000,
		help="the maximum amount of episodes in the replay buffer"
	)
    parser.add_argument(
		"--er_alpha", type=float, default=0.6,
		help="the alpha value for PER"
	)
    parser.add_argument(
		"--er_beta", type=float, default=0.4,
		help="the beta value for PER"
	)
    parser.add_argument(
		"--er_beta_increment", type=float, default=1e-6,
		help="the increment of the beta value on each sample for PER"
	)
    parser.add_argument(
		"--er_epsilon", type=float, default=1e-3,
		help="the epsilon value for PER"
	)

    args = parser.parse_args()

    return args