Example #1
0
def SearchSpaceNumberToSMAC(key: str, hp: SearchSpaceNumber) -> Hyperparameter:
    """Returns either a list of values intended to be sampled uniformly or a frozen scipy.stats distribution"""
    dist = "uniform"
    if hp.distribution:
        dist = hp.distribution
    if hp.maximum is None:
        raise ValueError(
            f"maximum not specified for a number with distribution {dist} for {key}"
        )
    max = hp.getInclusiveMax()
    if hp.minimum is None:
        raise ValueError(
            f"minimum not specified for a number with distribution {dist} for {key}"
        )
    min = hp.getInclusiveMin()

    log: bool
    if dist == "uniform" or dist == "integer":
        log = False
    elif dist == "loguniform":
        log = True
    else:
        raise ValueError(f"unknown/unsupported distribution {dist} for {key}")

    if hp.discrete:
        return UniformIntegerHyperparameter(key, min, max, log=log)
    else:
        return UniformFloatHyperparameter(key, min, max, log=log)
Example #2
0
    def visitSearchSpaceNumber(self, space: SearchSpaceNumber, path: str, counter=None):
        label = self.mk_label(path, counter)

        if space.pgo is not None:
            return scope.pgo_sample(
                space.pgo, hp.quniform(label, 0, len(space.pgo) - 1, 1)
            )

        dist = "uniform"
        if space.distribution:
            dist = space.distribution

        if space.maximum is None:
            raise SearchSpaceError(
                path, f"maximum not specified for a number with distribution {dist}"
            )
        max = space.getInclusiveMax()
        # if the maximum is not None, the inclusive maximum should not be none
        assert max is not None

        # These distributions need only a maximum
        if dist == "integer":
            if not space.discrete:
                raise SearchSpaceError(
                    path,
                    "integer distribution specified for a non discrete numeric type",
                )
            return hp.randint(label, max)

        if space.minimum is None:
            raise SearchSpaceError(
                path, f"minimum not specified for a number with distribution {dist}"
            )
        min = space.getInclusiveMin()
        # if the minimum is not None, the inclusive minimum should not be none
        assert min is not None

        if dist == "uniform":
            if space.discrete:
                return scope.int(hp.quniform(label, min, max, 1))
            else:
                return hp.uniform(label, min, max)
        elif dist == "loguniform":
            # for log distributions, hyperopt requires that we provide the log of the min/max
            if min <= 0:
                raise SearchSpaceError(
                    path,
                    f"minimum of 0 specified with a {dist} distribution.  This is not allowed; please set it (possibly using minimumForOptimizer) to be positive",
                )
            if min > 0:
                min = math.log(min)
            if max > 0:
                max = math.log(max)
            if space.discrete:
                return scope.int(hp.qloguniform(label, min, max, 1))
            else:
                return hp.loguniform(label, min, max)

        else:
            raise SearchSpaceError(path, f"Unknown distribution type: {dist}")
Example #3
0
    def visitSearchSpaceNumber(self,
                               space: SearchSpaceNumber,
                               path: str,
                               counter=None,
                               useCounter=True):
        label = self.mk_label(path, counter, useCounter=useCounter)

        if space.pgo is not None:
            self.pgo_dict[label] = space.pgo
            return f"scope.pgo_sample(pgo_{label}, hp.quniform('{label}', {0}, {len(space.pgo)-1}, 1))"

        dist = "uniform"
        if space.distribution:
            dist = space.distribution

        if space.maximum is None:
            SearchSpaceError(
                path,
                f"maximum not specified for a number with distribution {dist}")
        max = space.getInclusiveMax()

        # These distributions need only a maximum
        if dist == "integer":
            if not space.discrete:
                raise SearchSpaceError(
                    path,
                    "integer distribution specified for a non discrete numeric type....",
                )

            return f"hp.randint('{label}', {max})"

        if space.minimum is None:
            raise SearchSpaceError(
                path,
                f"minimum not specified for a number with distribution {dist}")
        min = space.getInclusiveMin()

        if dist == "uniform":
            if space.discrete:
                return f"hp.quniform('{label}', {min}, {max}, 1)"
            else:
                return f"hp.uniform('{label}', {min}, {max})"
        elif dist == "loguniform":
            # for log distributions, hyperopt requires that we provide the log of the min/max
            if min <= 0:
                raise SearchSpaceError(
                    path,
                    f"minimum of 0 specified with a {dist} distribution.  This is not allowed; please set it (possibly using minimumForOptimizer) to be positive",
                )
            if min > 0:
                min = math.log(min)
            if max > 0:
                max = math.log(max)

            if space.discrete:
                return f"hp.qloguniform('{label}', {min}, {max}, 1)"
            else:
                return f"hp.loguniform('{label}', {min}, {max})"
        else:
            raise SearchSpaceError(path, f"Unknown distribution type: {dist}")
Example #4
0
def SearchSpaceNumberToGSValues(
        key: str,
        hp: SearchSpaceNumber,
        num_samples: Optional[int] = None) -> List[GSValue]:
    """Returns either a list of values intended to be sampled uniformly"""
    samples: int
    if num_samples is None:
        samples = DEFAULT_SAMPLES_PER_DISTRIBUTION
    else:
        samples = num_samples

    # Add preliminary support for PGO
    if hp.pgo is not None:
        ret = list(hp.pgo.samples(samples))
        return ret

    # if we are not doing PGO
    dist = "uniform"
    if hp.distribution:
        dist = hp.distribution
    if hp.maximum is None:
        raise ValueError(
            f"maximum not specified for a number with distribution {dist} for {key}"
        )
    max = hp.getInclusiveMax()
    assert max is not None
    if hp.minimum is None:
        raise ValueError(
            f"minimum not specified for a number with distribution {dist} for {key}"
        )
    min = hp.getInclusiveMin()
    assert min is not None

    dt: np.dtype
    if hp.discrete:
        dt = np.dtype(int)
    else:
        dt = np.dtype(float)

    default = hp.default()
    if default is not None:
        # always use the default as one of the samples
        # TODO: ensure that the default is valid according to the schema
        if samples <= 1:
            return [default]
        samples = samples - 1
    if dist == "uniform" or dist == "integer":
        ret = np.linspace(min, max, num=samples, dtype=dt).tolist()
    elif dist == "loguniform":
        ret = np.logspace(min, max, num=samples, dtype=dt).tolist()
    else:
        raise ValueError(f"unknown/unsupported distribution {dist} for {key}")
    if default is not None:
        ret.append(default)
    return ret
Example #5
0
    def schemaToSearchSpaceHelper_(
        self,
        longName,
        path: str,
        schema: JsonSchema,
        relevantFields: Optional[Set[str]],
        pgo_freqs: pgo_part = None,
        sub_space: bool = True,
    ) -> Optional[SearchSpace]:
        # TODO: handle degenerate cases
        # right now, this handles only a very fixed form

        if is_false_schema(schema):
            return None

        typ: Optional[str] = None
        typ = schema.get("laleType", None)
        if typ is None:
            typ = schema.get("type", None)
        else:
            typ = typ

        if "enum" in schema and typ != "operator":
            vals = schema["enum"]
            return SearchSpaceEnum(vals,
                                   pgo=asFreqs(pgo_freqs),
                                   default=get_default(schema))

        if typ is not None:
            if typ == "boolean":
                return SearchSpaceBool(pgo=asFreqs(pgo_freqs),
                                       default=get_default(schema))
            elif typ == "number" or typ == "integer":
                exclusive_minimum = False
                minimum = schema.get("minimumForOptimizer", None)
                if minimum is not None:
                    exclusive_minimum = schema.get(
                        "exclusiveMinimumForOptimizer", False)
                else:
                    minimum = schema.get("minimum", None)
                    if minimum is not None:
                        exclusive_minimum = schema.get("exclusiveMinimum",
                                                       False)

                exclusive_maximum = False
                maximum = schema.get("maximumForOptimizer", None)
                if maximum is not None:
                    exclusive_maximum = schema.get(
                        "exclusiveMaximumForOptimizer", False)
                else:
                    maximum = schema.get("maximum", None)
                    if maximum is not None:
                        exclusive_maximum = schema.get("exclusiveMaximum",
                                                       False)

                distribution = schema.get("distribution", None)

                laleType = schema.get("laleType", None)
                if laleType is None:
                    laleType = typ

                if laleType == "number":
                    discrete = False
                elif laleType == "integer":
                    discrete = True
                else:
                    raise OperatorSchemaError(
                        path,
                        f"specified laleType should be a number or integer, not: {laleType}.",
                    )

                pgo: Freqs

                return SearchSpaceNumber(
                    minimum=minimum,
                    exclusiveMinimum=exclusive_minimum,
                    maximum=maximum,
                    exclusiveMaximum=exclusive_maximum,
                    discrete=discrete,
                    distribution=distribution,
                    pgo=asFreqs(pgo_freqs),
                    default=get_default(schema),
                )
            elif typ == "array" or typ == "tuple":
                laleType = schema.get("laleType", None)
                if laleType is None:
                    laleType = typ

                is_tuple: bool = laleType == "tuple"

                min_items = schema.get("minItemsForOptimizer", None)
                if min_items is None:
                    min_items = schema.get("minItems", None)
                    if min_items is None:
                        min_items = 0
                max_items = schema.get("maxItemsForOptimizer", None)
                if max_items is None:
                    max_items = schema.get("maxItems", None)

                items_schema = schema.get("itemsForOptimizer", None)
                if items_schema is None:
                    items_schema = schema.get("items", None)
                    if items_schema is None:
                        raise OperatorSchemaError(
                            path,
                            f"An array type was found without a provided schema for the items in the schema {schema}. Please provide a schema for the items (consider using itemsForOptimizer)",
                        )

                # we can search an empty list even without schemas
                if max_items == 0:
                    if is_tuple:
                        return SearchSpaceConstant([()])
                    else:
                        return SearchSpaceConstant([[]])

                prefix: Optional[List[SearchSpace]] = None
                additional: Optional[SearchSpace] = None
                if isinstance(items_schema, list):
                    prefix = []
                    for i, sub_schema in enumerate(items_schema):
                        sub = self.schemaToSearchSpaceHelper_(
                            longName, path + "_" + str(i), sub_schema,
                            relevantFields)
                        if sub is None:
                            return None
                        else:
                            prefix.append(sub)
                    prefix_len = len(prefix)
                    additional_items_schema = schema.get(
                        "additionalItemsForOptimizer", None)
                    if additional_items_schema is None:
                        additional_items_schema = schema.get(
                            "additionalItems", None)
                    if additional_items_schema is None:
                        if max_items is None or max_items > prefix_len:
                            raise OperatorSchemaError(
                                path,
                                f"An array type was found with provided schemas for {prefix_len} elements, but either an unspecified or too high a maxItems, and no schema for the additionalItems.  Please constraing maxItems to <= {prefix_len} (you can set maxItemsForOptimizer), or provide a schema for additionalItems",
                            )
                    elif additional_items_schema is False:
                        if max_items is None:
                            max_items = prefix_len
                        else:
                            max_items = min(max_items, prefix_len)
                    else:
                        additional = self.schemaToSearchSpaceHelper_(
                            longName,
                            path + "-",
                            additional_items_schema,
                            relevantFields,
                        )
                        # if items_schema is None:
                        #     raise ValueError(f"an array type was found without a provided schema for the items in the schema {schema}.  Please provide a schema for the items (consider using itemsForOptimizer)")
                else:
                    additional = self.schemaToSearchSpaceHelper_(
                        longName, path + "-", items_schema, relevantFields)

                if max_items is None:
                    raise OperatorSchemaError(
                        path,
                        f"An array type was found without a provided maximum number of items in the schema {schema}, and it is not a list with 'additionalItems' set to False.  Please provide a maximum (consider using maxItemsForOptimizer), or, if you are using a list, set additionalItems to False",
                    )

                return SearchSpaceArray(
                    prefix=prefix,
                    minimum=min_items,
                    maximum=max_items,
                    additional=additional,
                    is_tuple=is_tuple,
                )

            elif typ == "object":
                if "properties" not in schema:
                    return SearchSpaceObject(longName, [], [])
                o = self.JsonSchemaToSearchSpaceHelper(
                    longName,
                    path,
                    schema,
                    relevantFields,
                    pgo_freqs=pgo_freqs,
                    sub_space=sub_space,
                )
                if sub_space:
                    return SearchSpaceDict(o)
                else:
                    all_keys = list(o.keys())
                    all_keys.sort()
                    o_choice = tuple([o.get(k, None) for k in all_keys])
                    return SearchSpaceObject(longName, all_keys, [o_choice])

            elif typ == "string":
                pass
            elif typ == "operator":
                # TODO: If there is a default, we could use it
                vals = schema.get("enum", None)
                if vals is None:
                    logger.error(
                        "An operator is required by the schema but was not provided"
                    )
                    return None

                sub_schemas: List[SearchSpace] = [
                    accept(op, self)
                    if isinstance(op, Operator) else SearchSpaceConstant(op)
                    for op in vals
                ]
                combined_sub_schema: SearchSpace
                if len(sub_schemas) == 1:
                    combined_sub_schema = sub_schemas[0]
                    if isinstance(combined_sub_schema, SearchSpaceConstant):
                        return combined_sub_schema
                else:
                    combined_sub_schema = SearchSpaceSum(sub_schemas)
                    if all((isinstance(x, SearchSpaceConstant)
                            for x in sub_schemas)):
                        return combined_sub_schema
                return SearchSpaceOperator(combined_sub_schema)

            elif typ == "Any":
                raise OperatorSchemaError(
                    path,
                    f"A search space was found with laleType ({typ}), which is not searchable.  Please mark the relevant hyperparameter as not relevant for the optimizer.  schema: {schema}",
                )
            else:
                raise OperatorSchemaError(
                    path,
                    f"An unknown type ({typ}) was found in the schema {schema}"
                )

        if "anyOf" in schema:
            objs = []
            for s_obj in schema["anyOf"]:
                if "type" in s_obj and s_obj["type"] == "object":
                    o = self.JsonSchemaToSearchSpaceHelper(
                        longName,
                        path,
                        s_obj,
                        relevantFields,
                        pgo_freqs=pgo_freqs,
                        sub_space=sub_space,
                    )
                    if o:
                        objs.append(o)
            if objs:
                # First, gather a list of all the properties
                keys_list = [set(o.keys()) for o in objs]
                # make sure the iterator is deterministic
                all_keys = list(set.union(*keys_list))
                # and we might as well make it sorted
                all_keys.sort()

                def as_str(k, c):
                    if c is None:
                        return "None"
                    else:
                        return search_space_to_str_for_comparison(
                            c, path + "_" + k)

                anys: Dict[str, Any] = {}
                for o in objs:
                    o_choice = tuple([o.get(k, None) for k in all_keys])
                    k = str([
                        as_str(all_keys[idx], c)
                        for idx, c in enumerate(o_choice)
                    ])
                    if k in anys:
                        logger.info(
                            f"Ignoring Duplicate SearchSpace entry {k}")
                    anys[k] = o_choice
                return SearchSpaceObject(longName, all_keys, anys.values())
            else:
                return SearchSpaceObject(longName, [], [])

        if "allOf" in schema:
            # if all but one are negated constraints, we will just ignore them
            pos_sub_schema: List[JsonSchema] = []
            for sub_schema in schema["allOf"]:
                if "not" not in sub_schema:
                    pos_sub_schema.append(sub_schema)

            if len(pos_sub_schema) > 1:
                raise OperatorSchemaError(
                    path,
                    f"schemaToSearchSpaceHelper does not yet know how to compile the given schema {schema}, because it is an allOf with more than one non-negated schemas ({pos_sub_schema})",
                )
            if len(pos_sub_schema) == 0:
                raise OperatorSchemaError(
                    path,
                    f"schemaToSearchSpaceHelper does not yet know how to compile the given schema {schema}, because it is an allOf with only negated schemas",
                )

            logger.debug(
                f"[{path}]: schemaToSearchSpaceHelper: ignoring negated schemas in the conjunction {schema}"
            )
            return self.schemaToSearchSpaceHelper_(
                longName,
                path,
                pos_sub_schema[0],
                relevantFields,
                pgo_freqs=pgo_freqs,
                sub_space=sub_space,
            )
        # TODO: handle degenerate cases
        raise OperatorSchemaError(
            path,
            f"schemaToSearchSpaceHelper does not yet know how to compile the given schema {schema}",
        )