Beispiel #1
0
    def __init__(self, backbone: nn.Module, num_modes: int,
                 n_hidden_layers: List[int] = None,
                 input_shape: Tuple[int, int, int] = (3, 500, 500)):
        """
        Inits Covernet.
        :param backbone: Backbone model. Typically ResNetBackBone or MobileNetBackbone
        :param num_modes: Number of modes in the lattice
        :param n_hidden_layers: List of dimensions in the fully connected layers after the backbones.
            If None, set to [4096]
        :param input_shape: Shape of image input. Used to determine the dimensionality of the feature
            vector after the CNN backbone.
        """

        if n_hidden_layers and not isinstance(n_hidden_layers, list):
            raise ValueError(f"Param n_hidden_layers must be a list. Received {type(n_hidden_layers)}")

        super().__init__()

        if not n_hidden_layers:
            n_hidden_layers = [4096]

        self.backbone = backbone

        backbone_feature_dim = calculate_backbone_feature_dim(backbone, input_shape)
        n_hidden_layers = [backbone_feature_dim + ASV_DIM] + n_hidden_layers + [num_modes]

        linear_layers = [nn.Linear(in_dim, out_dim)
                         for in_dim, out_dim in zip(n_hidden_layers[:-1], n_hidden_layers[1:])]

        self.head = nn.ModuleList(linear_layers)
Beispiel #2
0
    def __init__(self, backbone: nn.Module, num_modes: int,
                 seconds: float = 6, frequency_in_hz: float = 2,
                 n_hidden_layers: int = 4096, input_shape: Tuple[int, int, int] = (3, 500, 500)):
        """
        Inits the MTP network.
        :param backbone: CNN Backbone to use.
        :param num_modes: Number of predicted paths to estimate for each agent.
        :param seconds: Number of seconds into the future to predict.
            Default for the challenge is 6.
        :param frequency_in_hz: Frequency between timesteps in the prediction (in Hz).
            Highest frequency is nuScenes is 2 Hz.
        :param n_hidden_layers: Size of fully connected layer after the CNN
            backbone processes the image.
        :param input_shape: Shape of the input expected by the network.
            This is needed because the size of the fully connected layer after
            the backbone depends on the backbone and its version.

        Note:
            Although seconds and frequency_in_hz are typed as floats, their
            product should be an int.
        """

        super().__init__()

        self.backbone = backbone
        self.num_modes = num_modes
        backbone_feature_dim = calculate_backbone_feature_dim(backbone, input_shape)
        self.fc1 = nn.Linear(backbone_feature_dim + ASV_DIM, n_hidden_layers)
        # self.fc1 = nn.Linear(backbone_feature_dim, n_hidden_layers)
        predictions_per_mode = int(seconds * frequency_in_hz) * 2

        self.fc2 = nn.Linear(n_hidden_layers, int(num_modes * predictions_per_mode + num_modes))