예제 #1
0
    def build_cell_stems(self, in_shapes:TensorShapesList, conf_cell:Config,
                   cell_index:int)\
                       ->Tuple[TensorShapes, List[OpDesc]]:

        # expect two stems, both should have same channels
        # TODO: support multiple stems
        assert len(
            in_shapes
        ) >= 2, "we must have outputs from at least two previous modules"

        # Get channels for previous two layers.
        # At start we have only one layer, i.e., model stems.
        # Typically model stems should have same channel count but for imagenet we do
        #   reduction at model stem so stem1 will have twice channels as stem0
        p_ch_out = self.get_ch(in_shapes[-1][0])
        pp_ch_out = self.get_ch(in_shapes[-2][0])

        # was the previous layer reduction layer?
        reduction_p = p_ch_out == pp_ch_out * 2 or in_shapes[-2][0][
            2] == in_shapes[-1][0][2] * 2

        # find out the node channels for this cell
        node_ch_out = self.node_channels[cell_index][
            0]  # init with first node in cell

        # Cell stemps will take prev channels and out sameput channels as nodes would.
        # If prev cell was reduction then we need to increase channels of prev-prev
        # by 2X. This is done by prepr_reduce stem.
        s0_op = OpDesc(
            'prepr_reduce' if reduction_p else 'prepr_normal',
            params={'conv': ConvMacroParams(pp_ch_out, node_ch_out)},
            in_len=1,
            trainables=None)

        s1_op = OpDesc('prepr_normal',
                       params={'conv': ConvMacroParams(p_ch_out, node_ch_out)},
                       in_len=1,
                       trainables=None)

        # output two shapes with proper channels setup
        # for default model desc, cell stems have same shapes and channels
        out_shape0 = copy.deepcopy(in_shapes[-1][0])
        # set channels and reset shapes to -1 to indicate unknown
        # for imagenet HxW would be floating point numbers due to one input reduced
        out_shape0[0], out_shape0[2], out_shape0[3] = node_ch_out, -1, -1
        out_shape1 = copy.deepcopy(out_shape0)

        return [out_shape0, out_shape1], [s0_op, s1_op]
예제 #2
0
    def _build_cell(self, cell_desc: CellDesc, rand_ops: RandOps) -> None:
        # sanity check: we have random ops for each node
        assert len(cell_desc.nodes()) == len(rand_ops.ops_and_ins)

        reduction = (cell_desc.cell_type == CellType.Reduction)

        # Add random op for each edge
        for node, (op_names, to_states) in zip(cell_desc.nodes(),
                                               rand_ops.ops_and_ins):
            # as we want cell to be completely random, remove previous edges
            node.edges.clear()

            # add random edges
            for op_name, to_state in zip(op_names, to_states):
                op_desc = OpDesc(op_name,
                                 params={
                                     'conv':
                                     cell_desc.conv_params,
                                     'stride':
                                     2 if reduction and to_state < 2 else 1
                                 },
                                 in_len=1,
                                 trainables=None,
                                 children=None)
                edge = EdgeDesc(op_desc, input_ids=[to_state])
                node.edges.append(edge)
예제 #3
0
    def build_nodes(self, stem_shapes:TensorShapes, conf_cell:Config,
                    cell_index:int, cell_type:CellType, node_count:int,
                    in_shape:TensorShape, out_shape:TensorShape) \
                        ->Tuple[TensorShapes, List[NodeDesc]]:

        assert in_shape[0]==out_shape[0]

        reduction = (cell_type==CellType.Reduction)
        nodes:List[NodeDesc] =  []
        conv_params = ConvMacroParams(in_shape[0], out_shape[0])

        # add xnas op for each edge
        for i in range(node_count):
            edges=[]
            for j in range(i+2):
                op_desc = OpDesc('xnas_op',
                                    params={
                                        'conv': conv_params,
                                        'stride': 2 if reduction and j < 2 else 1
                                    }, in_len=1, trainables=None, children=None)
                edge = EdgeDesc(op_desc, input_ids=[j])
                edges.append(edge)
            nodes.append(NodeDesc(edges=edges, conv_params=conv_params))

        out_shapes = [copy.deepcopy(out_shape) for _  in range(node_count)]

        return out_shapes, nodes
    def build_nodes(self, stem_shapes:TensorShapes, conf_cell:Config,
                    cell_index:int, cell_type:CellType, node_count:int,
                    in_shape:TensorShape, out_shape:TensorShape) \
                        ->Tuple[TensorShapes, List[NodeDesc]]:

        assert in_shape[0]==out_shape[0]

        nodes:List[NodeDesc] =  []
        conv_params = ConvMacroParams(in_shape[0], out_shape[0])

        for i in range(node_count):
            edges = []
            input_ids = []
            first_proj = False # if input node is connected then it needs projection
            if self._cell_matrix[0, i+1]: # nadbench internal node starts at 1
                input_ids.append(0) # connect to s0
                first_proj = True
            for j in range(i): # look at all internal vertex before us
                if self._cell_matrix[j+1, i+1]: # if there is connection
                    input_ids.append(j+2) # offset because of s0, s1

            op_desc = OpDesc('nasbench101_op',
                                params={
                                    'conv': conv_params,
                                    'stride': 1,
                                    'vertex_op': self._vertex_ops[i+1], # offset because of input node
                                    'first_proj': first_proj
                                }, in_len=len(input_ids), trainables=None, children=None) # TODO: should we pass children here?
            edge = EdgeDesc(op_desc, input_ids=input_ids)
            edges.append(edge)
            nodes.append(NodeDesc(edges=edges, conv_params=conv_params))

        out_shapes = [copy.deepcopy(out_shape) for _  in range(node_count)]
        return out_shapes, nodes
예제 #5
0
    def build_logits_op(self, in_shapes:TensorShapesList, conf_model_desc:Config)->OpDesc:
        n_classes = self.get_conf_dataset()['n_classes']

        return OpDesc('linear',
                        params={'n_ch':in_shapes[-1][0][0],
                                'n_classes': n_classes},
                        in_len=1, trainables=None)
예제 #6
0
    def finalize(self) -> Tuple[OpDesc, Optional[float]]:
        with torch.no_grad():  # probably this is not needed
            l = self._flatten_ops_alphas()

            # select 3 largest ops by alpha
            sel = heapq.nlargest(3, l, key=lambda t: t[0])  # TODO: add config

        # multi_op needs to know each input and associated primitive
        final_op_desc = OpDesc(
            name='multi_op',
            params={
                # copy convolution parameters
                'conv': self.desc.params['conv']
            },
            # Number of inputs remains same although only 3 of
            # them will be used.
            in_len=self.desc.in_len,
            trainables=None,
            # primitive's finalize call also records its
            # weights in description. finalize call returns
            # (desc, rank) where rank for primitive is None
            children=[op.finalize()[0] for a, i, op in sel],
            children_ins=[i for a, i, op in sel])

        # rank=None to indicate no further selection needed as in darts
        return final_op_desc, None
예제 #7
0
파일: divop.py 프로젝트: wayne9qiu/archai
    def __init__(self, op_desc: OpDesc, arch_params: Optional[ArchParams],
                 affine: bool):
        super().__init__()

        # assume last PRIMITIVE is 'none'
        assert DivOp.PRIMITIVES[-1] == 'none'

        conf = get_conf()
        trainer = conf['nas']['search']['divnas']['archtrainer']
        finalizer = conf['nas']['search']['finalizer']

        if trainer == 'noalpha' and finalizer == 'default':
            raise NotImplementedError(
                'noalpha trainer is not implemented for the default finalizer')

        if trainer != 'noalpha':
            self._setup_arch_params(arch_params)
        else:
            self._alphas = None

        self._ops = nn.ModuleList()
        for primitive in DivOp.PRIMITIVES:
            op = Op.create(OpDesc(primitive,
                                  op_desc.params,
                                  in_len=1,
                                  trainables=None),
                           affine=affine,
                           arch_params=None)
            self._ops.append(op)

        # various state variables for diversity
        self._collect_activations = False
        self._forward_counter = 0
        self._batch_activs = None
예제 #8
0
    def build_nodes(self, stem_shapes:TensorShapes, conf_cell:Config,
                    cell_index:int, cell_type:CellType, node_count:int,
                    in_shape:TensorShape, out_shape:TensorShape) \
                        ->Tuple[TensorShapes, List[NodeDesc]]:

        assert in_shape[0] == out_shape[0]

        reduction = (cell_type == CellType.Reduction)
        ops = self._reduction_ops if reduction else self._normal_ops
        assert node_count == len(ops.ops_and_ins)

        nodes: List[NodeDesc] = []
        conv_params = ConvMacroParams(in_shape[0], out_shape[0])

        for op_names, to_states in ops.ops_and_ins:
            edges = []
            # add random edges
            for op_name, to_state in zip(op_names, to_states):
                op_desc = OpDesc(op_name,
                                 params={
                                     'conv':
                                     conv_params,
                                     'stride':
                                     2 if reduction and to_state < 2 else 1
                                 },
                                 in_len=1,
                                 trainables=None,
                                 children=None)
                edge = EdgeDesc(op_desc, input_ids=[to_state])
                edges.append(edge)
            nodes.append(NodeDesc(edges=edges, conv_params=conv_params))

        out_shapes = [copy.deepcopy(out_shape) for _ in range(node_count)]

        return out_shapes, nodes
예제 #9
0
    def build_model_stems(self, in_shapes:TensorShapesList,
            conf_model_desc:Config)->List[OpDesc]:
        # TODO: why do we need stem_multiplier?
        # TODO: in original paper stems are always affine

        conf_model_stems = self.get_conf_model_stems()

        init_node_ch:int = conf_model_stems['init_node_ch']
        stem_multiplier:int = conf_model_stems['stem_multiplier']
        ops:List[str] = conf_model_stems['ops']

        out_channels = init_node_ch*stem_multiplier

        conv_params = ConvMacroParams(self.get_ch(in_shapes[-1][0]), # channels of first input tensor
                                      init_node_ch*stem_multiplier)

        stems = [OpDesc(name=op_name, params={'conv': conv_params},
                          in_len=1, trainables=None) \
                for op_name in ops]

        # get reduction factors  done by each stem, typically they should be same but for
        # imagenet they can differ
        stem_reductions = ModelDescBuilder._stem_reductions(stems)

        # Each cell takes input from previous and 2nd previous cells.
        # To be consistence we create two outputs for model stems: [[s1, s0], [s0, s1]
        # This way when we access first element of each output we get s1, s0.
        # Normailly s0==s1 but for networks like imagenet, s0 will have twice the channels
        #   of s1.
        for stem_reduction in stem_reductions:
            in_shapes.append([[out_channels, -1, -1.0/stem_reduction, -1.0/stem_reduction]])

        return stems
예제 #10
0
    def _build_cell(self, cell_desc: CellDesc) -> None:
        for i, node in enumerate(cell_desc.nodes()):
            input_ids = []
            first_proj = False  # if input node is connected then it needs projection
            if self._cell_matrix[0,
                                 i + 1]:  # nadbench internal node starts at 1
                input_ids.append(0)  # connect to s0
                first_proj = True

            for j in range(i):  # look at all internal vertex before us
                if self._cell_matrix[j + 1, i + 1]:  # if there is connection
                    input_ids.append(j + 2)  # offset because of s0, s1

            op_desc = OpDesc(
                'nasbench101_op',
                params={
                    'conv': cell_desc.conv_params,
                    'stride': 1,
                    'vertex_op':
                    self._vertex_ops[i + 1],  # offset because of input node
                    'first_proj': first_proj
                },
                in_len=len(input_ids),
                trainables=None,
                children=None)  # TODO: should we pass children here?
            edge = EdgeDesc(op_desc, input_ids=input_ids)
            node.edges.append(edge)
예제 #11
0
    def build_nodes(self, stem_shapes:TensorShapes, conf_cell:Config,
                    cell_index:int, cell_type:CellType, node_count:int,
                    in_shape:TensorShape, out_shape:TensorShape) \
                        ->Tuple[TensorShapes, List[NodeDesc]]:

        # For petridish we add one node with identity to s1.
        # This will be our seed model to start with.
        # Later in PetridishSearcher, we will add one more node in parent after each sampling.

        assert in_shape[0] == out_shape[0]

        reduction = (cell_type == CellType.Reduction)

        # channels for conv filters
        conv_params = ConvMacroParams(in_shape[0], out_shape[0])

        # identity op to connect S1 to the node
        op_desc = OpDesc('skip_connect',
                         params={
                             'conv': conv_params,
                             'stride': 2 if reduction else 1
                         },
                         in_len=1,
                         trainables=None,
                         children=None)
        edge = EdgeDesc(op_desc, input_ids=[1])
        new_node = NodeDesc(edges=[edge], conv_params=conv_params)
        nodes = [new_node]

        # each node has same out channels as in channels
        out_shapes = [copy.deepcopy(out_shape) for _ in nodes]

        return out_shapes, nodes
예제 #12
0
    def _build_cell(self, cell_desc: CellDesc) -> None:
        reduction = (cell_desc.cell_type == CellType.Reduction)

        self._ensure_nonempty_nodes(cell_desc)

        # we operate on last node, inserting another node before it
        new_nodes = [n.clone() for n in cell_desc.nodes()]
        petridish_node = NodeDesc(edges=[])
        new_nodes.insert(len(new_nodes) - 1, petridish_node)

        input_ids = list(range(len(new_nodes)))  # 2 + len-2
        assert len(input_ids) >= 2
        op_desc = OpDesc('petridish_reduction_op' if reduction else 'petridish_normal_op',
                            params={
                                'conv': cell_desc.conv_params,
                                # specify strides for each input, later we will
                                # give this to each primitive
                                '_strides':[2 if reduction and j < 2 else 1 \
                                           for j in input_ids],
                            }, in_len=len(input_ids), trainables=None, children=None)
        edge = EdgeDesc(op_desc, input_ids=input_ids)
        petridish_node.edges.append(edge)

        # note that post op will be recreated which means there is no
        # warm start for post op when number of nodes changes
        cell_desc.reset_nodes(new_nodes, cell_desc.node_ch_out,
                              cell_desc.post_op.name)
예제 #13
0
    def seed(self, model_desc: ModelDesc) -> None:
        # for petridish we add one node with identity to s1
        # this will be our seed model
        for cell_desc in model_desc.cell_descs():
            node_count = len(cell_desc.nodes())
            assert node_count >= 1
            first_node = cell_desc.nodes()[0]
            # if there are no edges for 1st node, add identity to s1
            if len(first_node.edges) == 0:
                op_desc = OpDesc(
                    'skip_connect',
                    params={
                        'conv':
                        cell_desc.conv_params,
                        'stride':
                        2 if cell_desc.cell_type == CellType.Reduction else 1
                    },
                    in_len=1,
                    trainables=None,
                    children=None)
                edge = EdgeDesc(op_desc, input_ids=[1])
                first_node.edges.append(edge)

            # remove empty nodes
            new_nodes = [
                n.clone() for n in cell_desc.nodes() if len(n.edges) > 0
            ]
            if len(new_nodes) != len(cell_desc.nodes()):
                cell_desc.reset_nodes(new_nodes, cell_desc.node_ch_out,
                                      cell_desc.post_op.name)

            self._ensure_nonempty_nodes(cell_desc)
예제 #14
0
    def build_nodes(self, stem_shapes:TensorShapes, conf_cell:Config,
                    cell_index:int, cell_type:CellType, node_count:int,
                    in_shape:TensorShape, out_shape:TensorShape) \
                        ->Tuple[TensorShapes, List[NodeDesc]]:

        assert in_shape[0] == out_shape[0]

        reduction = (cell_type == CellType.Reduction)

        nodes: List[NodeDesc] = []
        conv_params = ConvMacroParams(in_shape[0], out_shape[0])

        # add div op for each edge in each node
        # how does the stride works? For all ops connected to s0 and s1, we apply
        # reduction in WxH. All ops connected elsewhere automatically gets
        # reduced WxH (because all subsequent states are derived from s0 and s1).
        # Note that channel is increased via conv_params for the cell
        for i in range(node_count):
            edges = []
            for j in range(i + 2):
                op_desc = OpDesc('div_op',
                                 params={
                                     'conv': conv_params,
                                     'stride': 2 if reduction and j < 2 else 1
                                 },
                                 in_len=1,
                                 trainables=None,
                                 children=None)
                edge = EdgeDesc(op_desc, input_ids=[j])
                edges.append(edge)
            nodes.append(NodeDesc(edges=edges, conv_params=conv_params))

        out_shapes = [copy.deepcopy(out_shape) for _ in range(node_count)]

        return out_shapes, nodes
예제 #15
0
    def build_model_pool(self, in_shapes:TensorShapesList, conf_model_desc:Config)\
            ->OpDesc:

        model_post_op = conf_model_desc['model_post_op']
        last_shape = in_shapes[-1][0]

        in_shapes.append([copy.deepcopy(last_shape)])

        return OpDesc(model_post_op,
                         params={'conv': ConvMacroParams(last_shape[0], last_shape[0])},
                         in_len=1, trainables=None)
예제 #16
0
    def _build_cell(self, cell_desc:CellDesc)->None:
        reduction = (cell_desc.cell_type==CellType.Reduction)

        # add xnas op for each edge
        for i, node in enumerate(cell_desc.nodes()):
            for j in range(i+2):
                op_desc = OpDesc('xnas_op',
                                    params={
                                        'conv': cell_desc.conv_params,
                                        'stride': 2 if reduction and j < 2 else 1
                                    }, in_len=1, trainables=None, children=None)
                edge = EdgeDesc(op_desc, input_ids=[j])
                node.edges.append(edge)
예제 #17
0
    def __init__(self, op_desc: OpDesc, arch_params: Optional[ArchParams],
                 affine: bool):
        super().__init__()

        vertex_op_name = op_desc.params['vertex_op']
        proj_first = op_desc.params[
            'proj_first']  # first input needs projection

        self._vertex_op = Op.create(OpDesc(vertex_op_name,
                                           params=op_desc.params,
                                           in_len=1,
                                           trainables=None),
                                    affine=affine,
                                    arch_params=None)

        self._in_len = op_desc.in_len


        self._proj_op = Op.create(OpDesc('convbnrelu_1x1', params=op_desc.params,
                                                in_len=1, trainables=None),
                                        affine=affine, arch_params=None) \
                        if proj_first else None
예제 #18
0
    def __init__(self, op_desc:OpDesc, arch_params:Optional[ArchParams],
                 affine:bool):
        super().__init__()

        # assume last PRIMITIVE is 'none'
        assert GsOp.PRIMITIVES[-1] == 'none'

        self._gs_num_sample = op_desc.params['gs_num_sample']

        self._ops = nn.ModuleList()
        for primitive in GsOp.PRIMITIVES:
            op = Op.create(
                OpDesc(primitive, op_desc.params, in_len=1, trainables=None),
                affine=affine, arch_params=None)
            self._ops.append(op)
        # we do this at the end so that we can capture all arch params registered by
        # any previous child modules
        self._setup_arch_params(arch_params)
예제 #19
0
    def __init__(self, op_desc: OpDesc, arch_params: Optional[ArchParams],
                 reduction: bool, affine: bool):
        super().__init__()

        # assume last PRIMITIVE is 'none' (this is used for finalize)
        assert PetridishOp.PRIMITIVES[-1] == 'none'

        # create edges for the op, each edge connects input state,
        # within each edge we will have all N primitives
        self._edges = nn.ModuleList()

        for i in range(op_desc.in_len):
            # edge contains all primitives with alphas
            edge = nn.ModuleList()
            self._edges.append(edge)

            # for each input stride could be different,
            # so we will make copy of our params and then set stride for this input
            params = deepcopy(op_desc.params)
            params['stride'] = op_desc.params['_strides'][i]

            # create primitives for the edge
            for primitive in PetridishOp.PRIMITIVES:
                primitive_op = Op.create(OpDesc(primitive,
                                                params=params,
                                                in_len=1,
                                                trainables=None),
                                         affine=affine,
                                         arch_params=None)
                # wrap primitive with sg
                op = nn.Sequential(StopGradient(), primitive_op)
                edge.append(op)

        # TODO: check with Dey: Do we really need StopForwardReductionOp
        #   or StopGradientReductionOp because these two will only make sense
        #   for cell stems.
        # NOTE: Consider the case where prev_prev is normal, prev is reduction
        # then s_0 is twice as big in each dimension as s_1 and the number of channels
        # won't match. So you have to use StopGradientReductionOp on s_1 to make it match.
        self._sf = StopForward()

        # we do this at the end so that we can capture all arch params registered by
        # any previous child modules
        self._setup_arch_params(arch_params, op_desc.in_len)
예제 #20
0
    def build_cell_post_op(self, stem_shapes:TensorShapes,
            node_shapes:TensorShapes, conf_cell:Config, cell_index:int)\
                -> Tuple[TensorShape, OpDesc]:

        post_op_name = conf_cell['cell_post_op']
        op_ch_in, cell_ch_out, out_states = self._post_op_ch(post_op_name,
                                                             node_shapes)

        post_op_desc = OpDesc(post_op_name,
            {
                'conv': ConvMacroParams(op_ch_in, cell_ch_out),
                'out_states': out_states
            },
            in_len=1, trainables=None, children=None)

        out_shape = copy.deepcopy(node_shapes[-1])
        out_shape[0] = cell_ch_out

        return out_shape, post_op_desc
예제 #21
0
    def _build_cell(self, cell_desc: CellDesc) -> None:
        reduction = (cell_desc.cell_type == CellType.Reduction)

        # add mixed op for each edge in each node
        # how does the stride works? For all ops connected to s0 and s1, we apply
        # reduction in WxH. All ops connected elsewhere automatically gets
        # reduced WxH (because all subsequent states are derived from s0 and s1).
        # Note that channel is increased via conv_params for the cell
        for i, node in enumerate(cell_desc.nodes()):
            for j in range(i + 2):
                op_desc = OpDesc('mixed_op',
                                 params={
                                     'conv': cell_desc.conv_params,
                                     'stride': 2 if reduction and j < 2 else 1
                                 },
                                 in_len=1,
                                 trainables=None,
                                 children=None)
                edge = EdgeDesc(op_desc, input_ids=[j])
                node.edges.append(edge)
예제 #22
0
파일: gs_op.py 프로젝트: wayne9qiu/archai
    def finalize(self, sampled_weights) -> Tuple[OpDesc, Optional[float]]:
        # finalization where each edge gets to keep as many
        # unique operations that are **sampled at the node level**
        assert sampled_weights.shape[0] == len(GsOp.PRIMITIVES)

        # we can't handle empty op
        assert sampled_weights.bool().any()

        greater_than_0 = sampled_weights > 0
        children = []
        children_ins = []
        selected_alphas = []

        for i in range(greater_than_0.size()[0]):
            if greater_than_0[i]:
                children.append(self._ops[i].finalize()[0])
                selected_alphas.append(self._alphas[0][i].item())
                # all the ops will operate on the single node input
                children_ins.append(0)

        final_op_desc = OpDesc(
            name='multi_op',
            params={
                # copy convolution parameters
                'conv': self.desc.params['conv']
            },
            # number of inputs remains same and in this
            # case should be 1
            in_len=self.desc.in_len,
            trainables=None,
            # primitive's finalize call also records its
            # weights in description. finalize call returns
            # (desc, rank) where rank for primitive is None
            children=children,
            children_ins=children_ins)

        max_alpha = 0.0
        if selected_alphas:
            max_alpha = max(selected_alphas)

        return final_op_desc, max_alpha
예제 #23
0
    def __init__(self, op_desc: OpDesc, affine: bool) -> None:
        """MultiOp combines multiple ops to one op. The set of ops to combine
            if passed through op_desc.children and each of children's inputs are
            passed through op_desc.children_ins. This op will receive list of
            inputs in forward() and each of the children works on one of these
            inputs and generates an output. All outputs of children are then
            combined using projection operation to produce final output of the
            overall op.
        """
        super().__init__()

        # get list of inputs and associated primitives
        iop_descs = op_desc.children
        ins = op_desc.children_ins
        assert iop_descs is not None and ins is not None and len(
            iop_descs) == len(ins)

        # conv params typically specified by macro builder
        conv_params: ConvMacroParams = op_desc.params['conv']

        self._ops = nn.ModuleList()
        self._ins: List[int] = []

        for i, iop_desc in zip(ins, iop_descs):
            iop_desc.params['conv'] = conv_params
            self._ops.append(Op.create(iop_desc, affine=affine))
            self._ins.append(i)

        # number of channels as we will concate output of ops
        ch_out_sum = conv_params.ch_out * len(self._ins)
        ch_adj_desc = OpDesc(
            'proj_channels', {
                'conv': ConvMacroParams(ch_out_sum, conv_params.ch_out),
                'out_states': len(self._ins)
            },
            in_len=1,
            trainables=None,
            children=None)
        self._ch_adj = Op.create(ch_adj_desc, affine=affine)
예제 #24
0
    def _add_node(self, model_desc: ModelDesc,
                  model_desc_builder: ModelDescBuilder) -> None:
        for ci, cell_desc in enumerate(model_desc.cell_descs()):
            reduction = (cell_desc.cell_type == CellType.Reduction)

            nodes = cell_desc.nodes()

            # petridish must seed with one node
            assert len(nodes) > 0
            # input/output channels for all nodes are same
            conv_params = nodes[0].conv_params

            # assign input IDs to nodes, s0 and s1 have IDs 0 and 1
            # however as we will be inserting new node before last one
            input_ids = list(range(len(nodes) + 1))
            assert len(input_ids) >= 2  # 2 stem inputs
            op_desc = OpDesc('petridish_reduction_op' if reduction else 'petridish_normal_op',
                                params={
                                    'conv': conv_params,
                                    # specify strides for each input, later we will
                                    # give this to each primitive
                                    '_strides':[2 if reduction and j < 2 else 1 \
                                            for j in input_ids],
                                }, in_len=len(input_ids), trainables=None, children=None)
            edge = EdgeDesc(op_desc, input_ids=input_ids)
            new_node = NodeDesc(edges=[edge], conv_params=conv_params)
            nodes.insert(len(nodes) - 1, new_node)

            # output shape of all nodes are same
            node_shapes = cell_desc.node_shapes
            new_node_shape = copy.deepcopy(node_shapes[-1])
            node_shapes.insert(len(node_shapes) - 1, new_node_shape)

            # post op needs rebuilding because number of inputs to it has changed so input/output channels may be different
            post_op_shape, post_op_desc = model_desc_builder.build_cell_post_op(
                cell_desc.stem_shapes, node_shapes, cell_desc.conf_cell, ci)
            cell_desc.reset_nodes(nodes, node_shapes, post_op_desc,
                                  post_op_shape)
예제 #25
0
    def finalize(self) -> Tuple[OpDesc, Optional[float]]:
        # finalization where each edge gets to keep as many
        # unique operations that are sampled
        sample_storage = []
        for i in range(self._gs_num_sample):
            sampled = F.gumbel_softmax(self._alphas[0], tau=1, hard=True, eps=1e-10, dim=-1)
            sample_storage.append(sampled)

        samples_summed = torch.sum(torch.stack(sample_storage, dim=0), dim=0)
        greater_than_0 = samples_summed > 0
        children = []
        children_ins = []

        for i in range(greater_than_0.size()[0]):
            if greater_than_0[i]:
                children.append(self._ops[i].finalize()[0])
                # all the ops will operate on the single node input
                children_ins.append(0)

        final_op_desc = OpDesc(name='multi_op',
                                params={
                                    # copy convolution parameters
                                    'conv': self.desc.params['conv']
                                },
                                # number of inputs remains same and in this
                                # case should be 1
                                in_len=self.desc.in_len,
                                trainables=None,
                                # primitive's finalize call also records its
                                # weights in description. finalize call returns
                                # (desc, rank) where rank for primitive is None
                                children = children,
                                children_ins = children_ins
                               )

        return final_op_desc, None
예제 #26
0
    def __init__(self, op_desc: OpDesc, arch_params: Optional[ArchParams],
                 affine: bool):
        super().__init__()

        # assume last PRIMITIVE is 'none'
        assert XnasOp.PRIMITIVES[-1] == 'none'

        self._ops = nn.ModuleList()
        for primitive in XnasOp.PRIMITIVES:
            op = Op.create(OpDesc(primitive,
                                  op_desc.params,
                                  in_len=1,
                                  trainables=None),
                           affine=affine,
                           arch_params=None)
            self._ops.append(op)

        # for getting gradients to non-leaf node
        self._is_first_call = True
        self._avg_grad_meter = AverageMeter()

        # we do this at the end so that we can capture all arch params registered by
        # any previous child modules
        self._setup_arch_params(arch_params)