Example #1
0
class CorexText_Hyperparams(hyperparams.Hyperparams):
    # number of Corex latent factors
    n_hidden = UniformInt(
        lower=1,
        upper=301,
        default=30,
        description='number of topics',
        semantic_types=[
            "http://schema.org/Integer",
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])

    #
    threshold = Uniform(
        lower=0,
        upper=10000,
        default=0,
        q=1,
        description=
        'threshold for number of columns in the tfidf matrix below which we don`t call CorEx',
        semantic_types=[
            "http://schema.org/Integer",
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])

    #
    n_grams = UniformInt(
        lower=1,
        upper=10,
        default=1,
        description=
        'n_grams parameter to use before feeding in text to TfidfVectorizer',
        semantic_types=[
            "http://schema.org/Integer",
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])

    # max_df @ http://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html
    max_df = Uniform(
        lower=0.0,
        upper=1.00,
        default=.9,
        q=.05,
        description='max percent document frequency of analysed terms',
        semantic_types=[
            "http://schema.org/Float",
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])

    # min_df @ http://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html
    min_df = Uniform(
        lower=0.0,
        upper=1.00,
        default=.02,
        q=.01,
        description='min percent document frequency of analysed terms',
        semantic_types=[
            "http://schema.org/Float",
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])
Example #2
0
class CorexSAE_Hyperparams(hyperparams.Hyperparams):
    label_beta = Uniform(
        lower=0,
        upper=1000,
        default=1,
        q=.01,
        description=
        'Lagrange multiplier for beta : 1 tradeoff btwn label relevance : compression.'
    )
    epochs = Uniform(lower=1,
                     upper=1000,
                     default=100,
                     description='number of epochs to train')
Example #3
0
class SM_Hyperparams(hyperparams.Hyperparams):
    reg_val = Uniform(
        lower=0,
        upper=1e-2,
        q=1e-3,
        default=1e-4,
        description='l2 regularization penalty',
        semantic_types=[
            'http://schema.org/Float',
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])
Example #4
0
class CorexText_Hyperparams(hyperparams.Hyperparams):
    n_hidden = Uniform(lower=0,
                       upper=100,
                       default=10,
                       q=1,
                       description='number of topics')
    max_df = Uniform(
        lower=.10,
        upper=1.01,
        default=.9,
        q=.05,
        description='max percent document frequency of analysed terms')
    min_df = Union(OrderedDict([
        ('int df',
         Uniform(
             lower=1,
             upper=20,
             default=2,
             q=1,
             description='min integer document frequency of analysed terms')),
        ('pct df',
         Uniform(
             lower=0,
             upper=.10,
             default=.02,
             q=.01,
             description='min percent document frequency of analysed terms'))
    ]),
                   default='pct df')
    chunking = Uniform(
        lower=0,
        upper=2000,
        default=0,
        q=100,
        description=
        'number of tfidf-filtered terms to include as a document, 0 => no chunking.  last chunk may be > param value to avoid small documents'
    )
    max_features = Union(OrderedDict([
        ('none', Enumeration([None], default=None)),
        ('int mf',
         Uniform(lower=1000,
                 upper=50001,
                 default=50000,
                 q=1000,
                 description='max number of terms to use'))
    ]),
                         default='none')
Example #5
0
class EchoRegressor_Hyperparams(hyperparams.Hyperparams):
    # regularization strength
    alpha = Uniform(
        lower=0,
        upper=10,
        default=1,
        q=.1,
        description='regularization strength',
        semantic_types=[
            "http://schema.org/Float",
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])

    #
    diagonal = UniformBool(
        default=False,
        description=
        'assume diagonal covariance, leading to sparsity in data basis (instead of covariance eigenbasis)',
        semantic_types=[
            "http://schema.org/Integer",
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])
Example #6
0
class EchoIB_Hyperparams(hyperparams.Hyperparams):
    n_hidden = UniformInt(
        lower=1,
        upper=401,
        default=200,
        description='number of hidden factors learned',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])
    beta = Uniform(
        lower=0,
        upper=1000,
        default=.1,
        q=.01,
        description=
        'Lagrange multiplier for beta (applied to regularizer I(X:Z)): defining tradeoff btwn label relevance : compression.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])
    epochs = UniformInt(
        lower=1,
        upper=10000,
        default=100,
        description='number of epochs to train',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])
    batch = UniformInt(
        lower=10,
        upper=1000,
        default=50,
        description='batch_size',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])

    lr = LogUniform(
        lower=0.00001,
        upper=0.101,
        default=0.001,
        description='learning rate for Adam optimization',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])

    activation = Enumeration(
        values=['relu', 'tanh', 'elu'],
        default='tanh',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description="activation to use for intermediate activations")

    convolutional = UniformBool(
        default=False,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description="whether to use a convolutional architecture")

    task = Enumeration(
        values=['CLASSIFICATION', 'REGRESSION'],
        default='CLASSIFICATION',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description='task type')

    use_as_modeling = UniformBool(
        default=False,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "whether to return constructed features AND predictions (else, used for modeling i.e. only predictions"
    )

    units = UniformInt(
        lower=10,
        upper=401,
        default=200,
        description='# neurons in FC intermediate layers',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])

    layers = UniformInt(
        lower=1,
        upper=8,
        default=2,
        description='# of layers',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])

    error_on_no_input = hyperparams.UniformBool(
        default=True,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False."
    )
    gpus = Uniform(
        lower=0,
        upper=5,
        q=1,
        default=1,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ResourcesUseParameter'
        ],
        description='GPUs to Use')
Example #7
0
class SDNE_Hyperparams(hyperparams.Hyperparams):
    dimension = UniformInt(
        lower=10,
        upper=200,
        default=10,
        #q = 5,
        description='dimension of latent embedding',
        semantic_types=[
            "http://schema.org/Integer",
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])
    epochs = UniformInt(
        lower=1,
        upper=500,
        default=50,
        #q = 5e-8,
        description='number of epochs to train',
        semantic_types=[
            "http://schema.org/Integer",
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])
    beta = UniformInt(
        lower=1,
        upper=20,
        default=5,
        #q = 1,
        description=
        'seen edge reconstruction weight (to account for sparsity in links for reconstructing adjacency.  matrix B in Wang et al 2016',
        semantic_types=[
            "http://schema.org/Integer",
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])
    alpha = Uniform(
        lower=1e-8,
        upper=1,
        default=1e-5,
        #q = 5e-8,
        description='first order proximity weight',
        semantic_types=[
            "http://schema.org/Integer",
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])
    lr = Uniform(
        lower=1e-5,
        upper=1e-2,
        default=5e-4,
        #q = 5e-8,
        description='learning rate (constant across training)',
        semantic_types=[
            "http://schema.org/Integer",
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])
    depth = UniformInt(
        lower=1,
        upper=10,
        default=3,
        #q = 5,
        description='number of hidden layers',
        semantic_types=[
            "http://schema.org/Integer",
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
    return_list = UniformBool(
        default=False,
        description='for testing',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])