Esempio n. 1
0
 def JsonSchemaToSearchSpaceHelper(
     self,
     longName: str,
     path: str,
     schema: JsonSchema,
     relevantFields: Optional[Set[str]],
     pgo_freqs: pgo_part = None,
     sub_space: bool = True,
 ) -> Dict[str, SearchSpace]:
     if "properties" not in schema:
         return {}
     props = schema["properties"]
     hyp: Dict[str, SearchSpace] = {}
     for p, s in props.items():
         if relevantFields is None or p in relevantFields:
             # We would need to specify what is correct in that case
             sub_freqs = freqs_wrapper_lookup(pgo_freqs, p)
             sub_sch = self.schemaToSearchSpaceHelper_(longName,
                                                       path + "_" + p,
                                                       s,
                                                       None,
                                                       pgo_freqs=sub_freqs)
             if sub_sch is None:
                 # if it is a required field, this entire thing should be None
                 hyp[p] = SearchSpaceConstant(None)
             else:
                 hyp[p] = sub_sch
         else:
             logger.debug(
                 f"schemaToSearchSpace: skipping not relevant field {p}")
     return hyp
Esempio n. 2
0
    def visitSearchSpaceSum(self,
                            op: SearchSpaceSum) -> SearchSpaceGridInternalType:
        sub_spaces: List[SearchSpace] = op.sub_spaces

        sub_grids: Iterable[SearchSpaceGridInternalType] = [
            accept(cur_space, self) for cur_space in sub_spaces
        ]

        if len(sub_spaces) == 1:
            return list(sub_grids)[0]
        else:
            fixed_grids: Iterable[List[SearchSpaceGrid]] = (
                SearchSpaceToGridVisitor.fixupDegenerateSearchSpaces(grid)
                for grid in sub_grids)
            final_grids: List[SearchSpaceGrid] = []
            for i, grids in enumerate(fixed_grids):
                if not grids:
                    grids = [{}]
                else:
                    # we need to add in this nesting
                    # in case a higher order operator directly contains
                    # another
                    grids = nest_choice_all_HPparams(grids)

                discriminated_grids: List[SearchSpaceGrid] = [{
                    **d, discriminant_name:
                    SearchSpaceConstant(i)
                } for d in grids]
                final_grids.extend(discriminated_grids)
            return final_grids
Esempio n. 3
0
    def _searchSpaceList(self, space: SearchSpaceArray, *,
                         size: int) -> List[SearchSpaceGrid]:
        sub_spaces = space.items(max=size)

        param_grids: List[List[SearchSpaceGrid]] = [
            nest_all_HPparams(
                str(index), self.fixupDegenerateSearchSpaces(accept(sub,
                                                                    self)))
            for index, sub in enumerate(sub_spaces)
        ]

        param_grids_product: Iterable[
            Iterable[SearchSpaceGrid]] = itertools.product(*param_grids)
        chained_grids: List[SearchSpaceGrid] = [
            dict(ChainMap(*gridline, )) for gridline in param_grids_product
        ]

        if space.is_tuple:
            st_val = structure_type_tuple
        else:
            st_val = structure_type_list

        discriminated_grids: List[SearchSpaceGrid] = [{
            **d, structure_type_name:
            SearchSpaceConstant(st_val)
        } for d in chained_grids]

        return discriminated_grids
Esempio n. 4
0
def add_sub_space(space, k, v):
    """ Given a search space and a "key",
        if the defined subschema does not exist,
        set it to be the constant v space
   """
    # TODO!
    # I should parse __ and such and walk down the schema
    if isinstance(space, SearchSpaceObject):
        if k not in space.keys:
            space.keys.append(k)
            space.choices = (c + (SearchSpaceConstant(v),) for c in space.choices)
            return
Esempio n. 5
0
    def visitSearchSpaceDict(
            self, op: SearchSpaceDict) -> SearchSpaceGridInternalType:

        sub_spaces = op.space_dict.items()

        param_grids: List[List[SearchSpaceGrid]] = [
            nest_all_HPparams(
                name,
                self.fixupDegenerateSearchSpaces(accept(space, self)),
            ) for name, space in sub_spaces
        ]

        param_grids_product: Iterable[
            Iterable[SearchSpaceGrid]] = itertools.product(*param_grids)
        chained_grids: List[SearchSpaceGrid] = [
            dict(ChainMap(*gridline)) for gridline in param_grids_product
        ]

        discriminated_grids: List[SearchSpaceGrid] = [{
            **d, structure_type_name:
            SearchSpaceConstant(structure_type_dict)
        } for d in chained_grids]

        return discriminated_grids
Esempio n. 6
0
    def schemaToSearchSpaceHelper_(
        self,
        longName,
        path: str,
        schema: JsonSchema,
        relevantFields: Optional[Set[str]],
        pgo_freqs: pgo_part = None,
        sub_space: bool = True,
    ) -> Optional[SearchSpace]:
        # TODO: handle degenerate cases
        # right now, this handles only a very fixed form

        if is_false_schema(schema):
            return None

        typ: Optional[str] = None
        typ = schema.get("laleType", None)
        if typ is None:
            typ = schema.get("type", None)
        else:
            typ = typ

        if "enum" in schema and typ != "operator":
            vals = schema["enum"]
            return SearchSpaceEnum(vals,
                                   pgo=asFreqs(pgo_freqs),
                                   default=get_default(schema))

        if typ is not None:
            if typ == "boolean":
                return SearchSpaceBool(pgo=asFreqs(pgo_freqs),
                                       default=get_default(schema))
            elif typ == "number" or typ == "integer":
                exclusive_minimum = False
                minimum = schema.get("minimumForOptimizer", None)
                if minimum is not None:
                    exclusive_minimum = schema.get(
                        "exclusiveMinimumForOptimizer", False)
                else:
                    minimum = schema.get("minimum", None)
                    if minimum is not None:
                        exclusive_minimum = schema.get("exclusiveMinimum",
                                                       False)

                exclusive_maximum = False
                maximum = schema.get("maximumForOptimizer", None)
                if maximum is not None:
                    exclusive_maximum = schema.get(
                        "exclusiveMaximumForOptimizer", False)
                else:
                    maximum = schema.get("maximum", None)
                    if maximum is not None:
                        exclusive_maximum = schema.get("exclusiveMaximum",
                                                       False)

                distribution = schema.get("distribution", None)

                laleType = schema.get("laleType", None)
                if laleType is None:
                    laleType = typ

                if laleType == "number":
                    discrete = False
                elif laleType == "integer":
                    discrete = True
                else:
                    raise OperatorSchemaError(
                        path,
                        f"specified laleType should be a number or integer, not: {laleType}.",
                    )

                pgo: Freqs

                return SearchSpaceNumber(
                    minimum=minimum,
                    exclusiveMinimum=exclusive_minimum,
                    maximum=maximum,
                    exclusiveMaximum=exclusive_maximum,
                    discrete=discrete,
                    distribution=distribution,
                    pgo=asFreqs(pgo_freqs),
                    default=get_default(schema),
                )
            elif typ == "array" or typ == "tuple":
                laleType = schema.get("laleType", None)
                if laleType is None:
                    laleType = typ

                is_tuple: bool = laleType == "tuple"

                min_items = schema.get("minItemsForOptimizer", None)
                if min_items is None:
                    min_items = schema.get("minItems", None)
                    if min_items is None:
                        min_items = 0
                max_items = schema.get("maxItemsForOptimizer", None)
                if max_items is None:
                    max_items = schema.get("maxItems", None)

                items_schema = schema.get("itemsForOptimizer", None)
                if items_schema is None:
                    items_schema = schema.get("items", None)
                    if items_schema is None:
                        raise OperatorSchemaError(
                            path,
                            f"An array type was found without a provided schema for the items in the schema {schema}. Please provide a schema for the items (consider using itemsForOptimizer)",
                        )

                # we can search an empty list even without schemas
                if max_items == 0:
                    if is_tuple:
                        return SearchSpaceConstant([()])
                    else:
                        return SearchSpaceConstant([[]])

                prefix: Optional[List[SearchSpace]] = None
                additional: Optional[SearchSpace] = None
                if isinstance(items_schema, list):
                    prefix = []
                    for i, sub_schema in enumerate(items_schema):
                        sub = self.schemaToSearchSpaceHelper_(
                            longName, path + "_" + str(i), sub_schema,
                            relevantFields)
                        if sub is None:
                            return None
                        else:
                            prefix.append(sub)
                    prefix_len = len(prefix)
                    additional_items_schema = schema.get(
                        "additionalItemsForOptimizer", None)
                    if additional_items_schema is None:
                        additional_items_schema = schema.get(
                            "additionalItems", None)
                    if additional_items_schema is None:
                        if max_items is None or max_items > prefix_len:
                            raise OperatorSchemaError(
                                path,
                                f"An array type was found with provided schemas for {prefix_len} elements, but either an unspecified or too high a maxItems, and no schema for the additionalItems.  Please constraing maxItems to <= {prefix_len} (you can set maxItemsForOptimizer), or provide a schema for additionalItems",
                            )
                    elif additional_items_schema is False:
                        if max_items is None:
                            max_items = prefix_len
                        else:
                            max_items = min(max_items, prefix_len)
                    else:
                        additional = self.schemaToSearchSpaceHelper_(
                            longName,
                            path + "-",
                            additional_items_schema,
                            relevantFields,
                        )
                        # if items_schema is None:
                        #     raise ValueError(f"an array type was found without a provided schema for the items in the schema {schema}.  Please provide a schema for the items (consider using itemsForOptimizer)")
                else:
                    additional = self.schemaToSearchSpaceHelper_(
                        longName, path + "-", items_schema, relevantFields)

                if max_items is None:
                    raise OperatorSchemaError(
                        path,
                        f"An array type was found without a provided maximum number of items in the schema {schema}, and it is not a list with 'additionalItems' set to False.  Please provide a maximum (consider using maxItemsForOptimizer), or, if you are using a list, set additionalItems to False",
                    )

                return SearchSpaceArray(
                    prefix=prefix,
                    minimum=min_items,
                    maximum=max_items,
                    additional=additional,
                    is_tuple=is_tuple,
                )

            elif typ == "object":
                if "properties" not in schema:
                    return SearchSpaceObject(longName, [], [])
                o = self.JsonSchemaToSearchSpaceHelper(
                    longName,
                    path,
                    schema,
                    relevantFields,
                    pgo_freqs=pgo_freqs,
                    sub_space=sub_space,
                )
                if sub_space:
                    return SearchSpaceDict(o)
                else:
                    all_keys = list(o.keys())
                    all_keys.sort()
                    o_choice = tuple([o.get(k, None) for k in all_keys])
                    return SearchSpaceObject(longName, all_keys, [o_choice])

            elif typ == "string":
                pass
            elif typ == "operator":
                # TODO: If there is a default, we could use it
                vals = schema.get("enum", None)
                if vals is None:
                    logger.error(
                        "An operator is required by the schema but was not provided"
                    )
                    return None

                sub_schemas: List[SearchSpace] = [
                    accept(op, self)
                    if isinstance(op, Operator) else SearchSpaceConstant(op)
                    for op in vals
                ]
                combined_sub_schema: SearchSpace
                if len(sub_schemas) == 1:
                    combined_sub_schema = sub_schemas[0]
                    if isinstance(combined_sub_schema, SearchSpaceConstant):
                        return combined_sub_schema
                else:
                    combined_sub_schema = SearchSpaceSum(sub_schemas)
                    if all((isinstance(x, SearchSpaceConstant)
                            for x in sub_schemas)):
                        return combined_sub_schema
                return SearchSpaceOperator(combined_sub_schema)

            elif typ == "Any":
                raise OperatorSchemaError(
                    path,
                    f"A search space was found with laleType ({typ}), which is not searchable.  Please mark the relevant hyperparameter as not relevant for the optimizer.  schema: {schema}",
                )
            else:
                raise OperatorSchemaError(
                    path,
                    f"An unknown type ({typ}) was found in the schema {schema}"
                )

        if "anyOf" in schema:
            objs = []
            for s_obj in schema["anyOf"]:
                if "type" in s_obj and s_obj["type"] == "object":
                    o = self.JsonSchemaToSearchSpaceHelper(
                        longName,
                        path,
                        s_obj,
                        relevantFields,
                        pgo_freqs=pgo_freqs,
                        sub_space=sub_space,
                    )
                    if o:
                        objs.append(o)
            if objs:
                # First, gather a list of all the properties
                keys_list = [set(o.keys()) for o in objs]
                # make sure the iterator is deterministic
                all_keys = list(set.union(*keys_list))
                # and we might as well make it sorted
                all_keys.sort()

                def as_str(k, c):
                    if c is None:
                        return "None"
                    else:
                        return search_space_to_str_for_comparison(
                            c, path + "_" + k)

                anys: Dict[str, Any] = {}
                for o in objs:
                    o_choice = tuple([o.get(k, None) for k in all_keys])
                    k = str([
                        as_str(all_keys[idx], c)
                        for idx, c in enumerate(o_choice)
                    ])
                    if k in anys:
                        logger.info(
                            f"Ignoring Duplicate SearchSpace entry {k}")
                    anys[k] = o_choice
                return SearchSpaceObject(longName, all_keys, anys.values())
            else:
                return SearchSpaceObject(longName, [], [])

        if "allOf" in schema:
            # if all but one are negated constraints, we will just ignore them
            pos_sub_schema: List[JsonSchema] = []
            for sub_schema in schema["allOf"]:
                if "not" not in sub_schema:
                    pos_sub_schema.append(sub_schema)

            if len(pos_sub_schema) > 1:
                raise OperatorSchemaError(
                    path,
                    f"schemaToSearchSpaceHelper does not yet know how to compile the given schema {schema}, because it is an allOf with more than one non-negated schemas ({pos_sub_schema})",
                )
            if len(pos_sub_schema) == 0:
                raise OperatorSchemaError(
                    path,
                    f"schemaToSearchSpaceHelper does not yet know how to compile the given schema {schema}, because it is an allOf with only negated schemas",
                )

            logger.debug(
                f"[{path}]: schemaToSearchSpaceHelper: ignoring negated schemas in the conjunction {schema}"
            )
            return self.schemaToSearchSpaceHelper_(
                longName,
                path,
                pos_sub_schema[0],
                relevantFields,
                pgo_freqs=pgo_freqs,
                sub_space=sub_space,
            )
        # TODO: handle degenerate cases
        raise OperatorSchemaError(
            path,
            f"schemaToSearchSpaceHelper does not yet know how to compile the given schema {schema}",
        )