class CorexText_Hyperparams(hyperparams.Hyperparams): # number of Corex latent factors n_hidden = UniformInt( lower=1, upper=301, default=30, description='number of topics', semantic_types=[ "http://schema.org/Integer", 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ]) # threshold = Uniform( lower=0, upper=10000, default=0, q=1, description= 'threshold for number of columns in the tfidf matrix below which we don`t call CorEx', semantic_types=[ "http://schema.org/Integer", 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ]) # n_grams = UniformInt( lower=1, upper=10, default=1, description= 'n_grams parameter to use before feeding in text to TfidfVectorizer', semantic_types=[ "http://schema.org/Integer", 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ]) # max_df @ http://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html max_df = Uniform( lower=0.0, upper=1.00, default=.9, q=.05, description='max percent document frequency of analysed terms', semantic_types=[ "http://schema.org/Float", 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ]) # min_df @ http://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html min_df = Uniform( lower=0.0, upper=1.00, default=.02, q=.01, description='min percent document frequency of analysed terms', semantic_types=[ "http://schema.org/Float", 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ])
class CorexSAE_Hyperparams(hyperparams.Hyperparams): label_beta = Uniform( lower=0, upper=1000, default=1, q=.01, description= 'Lagrange multiplier for beta : 1 tradeoff btwn label relevance : compression.' ) epochs = Uniform(lower=1, upper=1000, default=100, description='number of epochs to train')
class SM_Hyperparams(hyperparams.Hyperparams): reg_val = Uniform( lower=0, upper=1e-2, q=1e-3, default=1e-4, description='l2 regularization penalty', semantic_types=[ 'http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ])
class CorexText_Hyperparams(hyperparams.Hyperparams): n_hidden = Uniform(lower=0, upper=100, default=10, q=1, description='number of topics') max_df = Uniform( lower=.10, upper=1.01, default=.9, q=.05, description='max percent document frequency of analysed terms') min_df = Union(OrderedDict([ ('int df', Uniform( lower=1, upper=20, default=2, q=1, description='min integer document frequency of analysed terms')), ('pct df', Uniform( lower=0, upper=.10, default=.02, q=.01, description='min percent document frequency of analysed terms')) ]), default='pct df') chunking = Uniform( lower=0, upper=2000, default=0, q=100, description= 'number of tfidf-filtered terms to include as a document, 0 => no chunking. last chunk may be > param value to avoid small documents' ) max_features = Union(OrderedDict([ ('none', Enumeration([None], default=None)), ('int mf', Uniform(lower=1000, upper=50001, default=50000, q=1000, description='max number of terms to use')) ]), default='none')
class EchoRegressor_Hyperparams(hyperparams.Hyperparams): # regularization strength alpha = Uniform( lower=0, upper=10, default=1, q=.1, description='regularization strength', semantic_types=[ "http://schema.org/Float", 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ]) # diagonal = UniformBool( default=False, description= 'assume diagonal covariance, leading to sparsity in data basis (instead of covariance eigenbasis)', semantic_types=[ "http://schema.org/Integer", 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ])
class EchoIB_Hyperparams(hyperparams.Hyperparams): n_hidden = UniformInt( lower=1, upper=401, default=200, description='number of hidden factors learned', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ]) beta = Uniform( lower=0, upper=1000, default=.1, q=.01, description= 'Lagrange multiplier for beta (applied to regularizer I(X:Z)): defining tradeoff btwn label relevance : compression.', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ]) epochs = UniformInt( lower=1, upper=10000, default=100, description='number of epochs to train', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ]) batch = UniformInt( lower=10, upper=1000, default=50, description='batch_size', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ]) lr = LogUniform( lower=0.00001, upper=0.101, default=0.001, description='learning rate for Adam optimization', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ]) activation = Enumeration( values=['relu', 'tanh', 'elu'], default='tanh', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], description="activation to use for intermediate activations") convolutional = UniformBool( default=False, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], description="whether to use a convolutional architecture") task = Enumeration( values=['CLASSIFICATION', 'REGRESSION'], default='CLASSIFICATION', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], description='task type') use_as_modeling = UniformBool( default=False, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], description= "whether to return constructed features AND predictions (else, used for modeling i.e. only predictions" ) units = UniformInt( lower=10, upper=401, default=200, description='# neurons in FC intermediate layers', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ]) layers = UniformInt( lower=1, upper=8, default=2, description='# of layers', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ]) error_on_no_input = hyperparams.UniformBool( default=True, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], description= "Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False." ) gpus = Uniform( lower=0, upper=5, q=1, default=1, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ResourcesUseParameter' ], description='GPUs to Use')
class SDNE_Hyperparams(hyperparams.Hyperparams): dimension = UniformInt( lower=10, upper=200, default=10, #q = 5, description='dimension of latent embedding', semantic_types=[ "http://schema.org/Integer", 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ]) epochs = UniformInt( lower=1, upper=500, default=50, #q = 5e-8, description='number of epochs to train', semantic_types=[ "http://schema.org/Integer", 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ]) beta = UniformInt( lower=1, upper=20, default=5, #q = 1, description= 'seen edge reconstruction weight (to account for sparsity in links for reconstructing adjacency. matrix B in Wang et al 2016', semantic_types=[ "http://schema.org/Integer", 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ]) alpha = Uniform( lower=1e-8, upper=1, default=1e-5, #q = 5e-8, description='first order proximity weight', semantic_types=[ "http://schema.org/Integer", 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ]) lr = Uniform( lower=1e-5, upper=1e-2, default=5e-4, #q = 5e-8, description='learning rate (constant across training)', semantic_types=[ "http://schema.org/Integer", 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ]) depth = UniformInt( lower=1, upper=10, default=3, #q = 5, description='number of hidden layers', semantic_types=[ "http://schema.org/Integer", 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ]) return_list = UniformBool( default=False, description='for testing', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ])