예제 #1
0
def is_transformer(cls, verbose=False):
    """Determine if `cls` corresponds to something that resembles an sklearn general transformer.
    If True, returns the valid (input, output) types.

    Examples:

    >>> from sklearn.feature_extraction.text import CountVectorizer
    >>> is_transformer(CountVectorizer)
    (True, (List(Sentence()), MatrixContinuousSparse()))
    >>> from sklearn.decomposition.pca import PCA
    >>> is_transformer(PCA)
    (True, (MatrixContinuousDense(), MatrixContinuousDense()))

    """
    if not is_algorithm(cls, verbose=verbose):
        return False, None

    allowed_inputs = set()
    allowed_outputs = set()

    for input_type in [
            kb.MatrixContinuousDense(),
            kb.MatrixContinuousSparse(),
            kb.List(kb.Sentence()),
    ]:
        for output_type in [
                kb.MatrixContinuousDense(),
                kb.MatrixContinuousSparse(),
                kb.List(kb.Sentence()),
        ]:
            try:
                X = DATA_TYPE_EXAMPLES[input_type]

                clf = cls()
                X = clf.fit_transform(X)

                assert is_data_type(X, output_type)

                allowed_inputs.add(input_type)
                allowed_outputs.add(output_type)
            except Exception as e:
                if verbose:
                    warnings.warn(str(e))

    if len(allowed_outputs) != 1:
        return False, None

    inputs = combine_types(*allowed_inputs)

    if allowed_inputs:
        return True, (inputs, list(allowed_outputs)[0])
    else:
        return False, None
예제 #2
0
def combine_types(*types):
    if len(types) == 1:
        return types[0]

    types = set(types)

    if types == {kb.MatrixContinuousDense(), kb.MatrixContinuousSparse()}:
        return kb.MatrixContinuous()

    return None
예제 #3
0
def is_clusterer(cls, verbose=False):
    """Determine if `cls` corresponds to something that resembles an nltk clusterer.
    If True, returns the valid (input, output) types.

    Examples:

    >>> from sklearn.linear_model import LogisticRegression
    >>> from nltk.cluster import GAAClusterer
    >>> is_clusterer(GAAClusterer)
    (True, (MatrixContinuousDense(), CategoricalVector()))
    >>> is_clusterer(LogisticRegression)
    (False, None)

    """
    if not _is_clusterer(cls, verbose=verbose):
        return False, None

    inputs = []

    for input_type in [
            kb.MatrixContinuousDense(),
            kb.MatrixContinuousSparse()
    ]:
        try:
            X = DATA_TYPE_EXAMPLES[input_type]
            y = DATA_TYPE_EXAMPLES[kb.CategoricalVector()]

            clusterer = cls()
            clusterer.cluster(X)
            y = [clusterer.classify(x) for x in X]

            assert is_categorical(y)
            inputs.append(input_type)
        except Exception as e:
            if verbose:
                warnings.warn(str(e))

    inputs = combine_types(*inputs)

    if inputs:
        return True, (inputs, kb.CategoricalVector())
    else:
        return False, None
예제 #4
0
def is_clusterer(cls, verbose=False):
    """Determine if `cls` corresponds to something that resembles an sklearn clustering algorithm.
    If True, returns the valid (input, output) types.

    Examples:

    >>> from sklearn.linear_model import LogisticRegression, LinearRegression
    >>> is_clusterer(LogisticRegression)
    (False, None)
    >>> is_clusterer(LinearRegression)
    (False, None)
    >>> from sklearn.cluster import KMeans
    >>> is_clusterer(KMeans)
    (True, (MatrixContinuous(), DiscreteVector()))

    """
    if not is_algorithm(cls, verbose=verbose):
        return False, None

    inputs = []

    for input_type in [
            kb.MatrixContinuousDense(),
            kb.MatrixContinuousSparse()
    ]:
        try:
            X = DATA_TYPE_EXAMPLES[input_type]

            clf = cls()
            y = clf.fit_predict(X)

            assert is_discrete(y)
            inputs.append(input_type)
        except Exception as e:
            if verbose:
                warnings.warn(str(e))

    inputs = combine_types(*inputs)

    if inputs:
        return True, (inputs, kb.DiscreteVector())
    else:
        return False, None
예제 #5
0
        assert len(obj.shape) == 1

        original_length = len(obj)
        obj = set(obj)

        return len(obj) > 0.1 * original_length and all(
            isinstance(x, str) for x in obj)
    except:
        return False


from autogoal import kb

DATA_RESOLVERS = {
    kb.MatrixContinuousDense(): is_matrix_continuous_dense,
    kb.MatrixContinuousSparse(): is_matrix_continuous_sparse,
    kb.CategoricalVector(): is_categorical,
    kb.ContinuousVector(): is_continuous,
    kb.List(kb.Sentence()): is_string_list,
}

DATA_TYPE_EXAMPLES = {
    kb.MatrixContinuousDense(): np.random.rand(10, 10),
    kb.MatrixContinuousSparse(): sp.rand(10, 10),
    kb.CategoricalVector(): np.asarray(["A"] * 5 + ["B"] * 5),
    kb.ContinuousVector(): np.random.rand(10),
    kb.DiscreteVector(): np.random.randint(0, 10, (10, ), dtype=int),
    kb.List(kb.Sentence()): ["abc bcd def feg geh hij jkl lmn nop pqr"] * 10,
}

예제 #6
0
    kb.List(kb.Chunktag()): [(("lorem", "ipsum"), "ipsum")] *
    10,  # [((str, str), str), ((str, str), str)] List of IOB Tagged token
    kb.List(kb.List(kb.Chunktag())): [
        [(("lorem", "ipsum"), "ipsum")] * 2
    ],  # [[((str, str), str), ((str, str), str)], [((str, str), str), ((str, str), str)]] List of IOB Tagged Sentences
    kb.Stem():
    "ips",
    kb.Word():
    "ipsum",
    kb.Sentence():
    "It is the best of all movies.",
    kb.Document():
    "It is the best of all movies. I actually love that action scene.",
    kb.MatrixContinuousDense():
    np.random.rand(10, 10),
    kb.MatrixContinuousSparse():
    sp.rand(10, 10),
    kb.CategoricalVector():
    np.asarray(["A"] * 5 + ["B"] * 5),
    kb.ContinuousVector():
    np.random.rand(10),
    kb.DiscreteVector():
    np.random.randint(0, 10, (10, ), dtype=int),
    kb.List(kb.Word()): ["ipsu", "lorem"],
    kb.List(kb.Document()):
    ["abc ipsu lorem say hello", "ipsum lorem", "abc"] * 2,
    kb.List(kb.List(kb.Stem())): [["abc", "ipsu", "lorem"] * 10],
    kb.List(kb.List(kb.Word())): [["abc", "ipsu", "lorem"] * 10],
    kb.List(kb.List(kb.Sentence())): [["abc a sentence lorem"],
                                      ["ipsum lorem"], ["abc"]]
}