class TLSCMDConfig: key: str = field( "Path to key file", default="server.key", ) cert: str = field( "Path to cert file", default="server.pem", )
class DNNClassifierModelConfig: predict: Feature = field("Feature name holding predict value") classifications: List[str] = field("Options for value of classification") features: Features = field("Features to train on") clstype: Type = field("Data type of classifications values", default=str) batchsize: int = field( "Number repos to pass through in an epoch", default=20 ) shuffle: bool = field("Randomise order of repos in a batch", default=True) steps: int = field("Number of steps to train the model", default=3000) epochs: int = field( "Number of iterations to pass over all repos in a source", default=30 ) directory: str = field( "Directory where state should be saved", default=os.path.join( os.path.expanduser("~"), ".cache", "dffml", "tensorflow" ), ) hidden: List[int] = field( "List length is the number of hidden layers in the network. Each entry in the list is the number of nodes in that hidden layer", default_factory=lambda: [12, 40, 15], ) def __post_init__(self): self.classifications = list(map(self.clstype, self.classifications))
class SLRConfig: predict: Feature = field("Label or the value to be predicted") features: Features = field("Features to train on") directory: str = field( "Directory where state should be saved", default=os.path.join(os.path.expanduser("~"), ".cache", "dffml", "scratch"), )
class MultiCommCMDConfig: mc_config: str = field( "MultiComm config directory", default=None, ) mc_atomic: bool = field( "No routes other than dataflows registered at startup", action="store_true", default=False, )
class DataFlowSourceConfig: source: BaseSource = field("Source to wrap") dataflow: DataFlow = field("DataFlow to use for preprocessing") features: Features = field( "Features to pass as definitions to each context from each " "record to be preprocessed", default=Features(), ) inputs: List[str] = field( "Other inputs to add under each ctx (record's key will " + "be used as the context)", action=ParseInputsAction, default_factory=lambda: [], ) record_def: str = field( "Definition to be used for record.key." "If set, record.key will be added to the set of inputs " "under each context (which is also the record's key)", default=None, ) length: str = field("Definition name to add as source length", default=None) all_for_single: bool = field( "Run all records through dataflow before grabing " "results of desired record on a call to record()", default=False, ) no_strict: bool = field( "Do not exit on operation exceptions, just log errors", default=False, ) orchestrator: BaseOrchestrator = field( "Orchestrator", default=MemoryOrchestrator.withconfig({}), )
class DNNClassifierModelConfig(TensorflowBaseConfig): classifications: List[str] = field("Options for value of classification", default=None) clstype: Type = field("Data type of classifications values", default=str) batchsize: int = field("Number records to pass through in an epoch", default=20) shuffle: bool = field("Randomise order of records in a batch", default=True) def __post_init__(self): self.classifications = list(map(self.clstype, self.classifications))
class PyTorchPreTrainedModelConfig(PyTorchModelConfig): pretrained: bool = field( "Load Pre-trained model weights", default=True, ) trainable: bool = field("Tweak pretrained model by training again", default=False) add_layers: bool = field( "Add layers on top of pretrained model", default=False, ) layers: dict = field("Extra layers to be added on top of pretrained model", default=None)
class PyTorchPreTrainedModelConfig(PyTorchModelConfig): pretrained: bool = field( "Load Pre-trained model weights", default=True, ) trainable: bool = field( "Tweak pretrained model by training again", default=False ) add_layers: bool = field( "Replace the last layer of the pretrained model", default=False, ) layers: dict = field( "Extra layers to replace the last layer of the pretrained model", default=None, )
class ServerConfig(TLSCMDConfig, MultiCommCMDConfig): port: int = field( "Port to bind to", default=8080, ) addr: str = field( "Address to bind to", default="127.0.0.1", ) upload_dir: str = field( "Directory to store uploaded files in", default=None, ) static: str = field( "Directory to serve static content from", default=None, ) js: bool = field( "Serve JavaScript API file at /api.js", default=False, action="store_true", ) insecure: bool = field( "Start without TLS encryption", action="store_true", default=False, ) cors_domains: List[str] = field( "Domains to allow CORS for (see keys in defaults dict for aiohttp_cors.setup)", default_factory=lambda: [], ) models: Model = field( "Models configured on start", default_factory=lambda: AsyncContextManagerList(), action=list_action(AsyncContextManagerList), labeled=True, ) sources: Sources = field( "Sources configured on start", default_factory=lambda: Sources(), action=list_action(Sources), labeled=True, ) redirect: List[str] = field( "list of METHOD SOURCE_PATH DESTINATION_PATH pairs, number of elements must be divisible by 3", action=ParseRedirectsAction, default_factory=lambda: [], )
class FakeTestingConfig2: name: str = field("Name of FakeTesting2") num: float features: Features = Features( DefFeature("default", int, 1), DefFeature("features", int, 10) ) label: str = "unlabeled"
def scikit_doc_to_field(type_str, param): default = param.default if default is inspect.Parameter.empty: default = scikit_get_default(type_str) type_cls = Any # Set of choices if "{'" in type_str and "'}" in type_str: type_cls = str elif "{" in type_str and "}" in type_str: type_cls = int if "." in type_str: type_cls = float else: type_split = list( map(lambda x: x.lower(), type_str.replace(",", "").split())) for scikit_type_name, python_type in SCIKIT_DOCS_TYPE_MAP.items(): if scikit_type_name in type_split: type_cls = python_type if type_cls == Any and default != None: type_cls = type(default) return type_cls, field(type_str, default=default)
class FakeTestingConfig: num: float files: List[str] features: Features name: str = field("Name of FakeTesting") label: str = "unlabeled" readonly: bool = False source: BaseSource = JSONSource
class CreateTLSClientConfig: bits: int = field( "Number of bits to use for key", default=4096, ) key: str = field( "Path to client key file", default="client.key", ) cert: str = field( "Path to client cert file", default="client.pem", ) csr: str = field( "Path to client csr file", default="client.csr", ) server_key: str = field( "Path to server key file", default="server.key", ) server_cert: str = field( "Path to server cert file", default="server.pem", )
def inspect_pytorch_params(cls: Callable): parameters = inspect.signature(cls).parameters args = {} for param_name, param in parameters.items(): args[param_name] = ( param.annotation, field( param_name, default=param.default if param.default is not inspect.Parameter.empty else None, ), ) return args
class TensorflowBaseConfig: predict: Feature = field("Feature name holding target values") features: Features = field("Features to train on") directory: pathlib.Path = field("Directory where state should be saved") steps: int = field("Number of steps to train the model", default=3000) epochs: int = field( "Number of iterations to pass over all records in a source", default=30 ) hidden: List[int] = field( "List length is the number of hidden layers in the network. Each entry in the list is the number of nodes in that hidden layer", default_factory=lambda: [12, 40, 15], )
class DNNRegressionModelConfig: predict: str = field("Feature name holding target values") features: Features = field("Features to train on") steps: int = field("Number of steps to train the model", default=3000) epochs: int = field( "Number of iterations to pass over all repos in a source", default=30) directory: str = field( "Directory where state should be saved", default=os.path.join(os.path.expanduser("~"), ".cache", "dffml", "tensorflow"), ) hidden: List[int] = field( "List length is the number of hidden layers in the network. Each entry in the list is the number of nodes in that hidden layer", default_factory=lambda: [12, 40, 15], )
class HFClassificationModelConfig: features: Features = field("Feature to train on") predict: Feature = field("Feature holding target values") label_list: List[str] = field("List of target labels") cache_dir: str = field( "Directory to store the pre-trained models downloaded from s3") model_name_or_path: str = field( "Path to pretrained model or model identifier from huggingface.co/models", ) directory: str = field( "The output directory where the model predictions and checkpoints will be written.", ) logging_dir: str = field("Tensorboard log dir.") from_pt: bool = field( "Whether to load model from pytorch checkpoint or .bin file", default=False, ) clstype: Type = field("Data type of classifications values", default=str) max_seq_length: int = field( "The maximum total input sequence length after tokenization. Sequences longer than this will be truncated, sequences shorter will be padded.", default=128, ) overwrite_cache: bool = field( "Overwrite the cached training and evaluation sets", default=False, ) config_name: str = field( "Pretrained config name or path if not the same as model_name", default=None, ) tokenizer_name: str = field( "Pretrained tokenizer name or path if not the same as model_name", default=None, ) use_fast: bool = field("Set this flag to use fast tokenization.", default=False) doc_stride: int = field( "When splitting up a long document into chunks, how much stride to take between chunks.", default=128, ) optimizer_name: str = field( 'Name of a Tensorflow optimizer among "adadelta, adagrad, adam, adamax, ftrl, nadam, rmsprop, sgd, adamw"', default="adam", ) loss_name: str = field( "Name of a Tensorflow loss. For the list see: https://www.tensorflow.org/api_docs/python/tf/keras/losses", default="SparseCategoricalCrossentropy", ) gpus: str = field( "List of gpu devices. If only one, switch to single gpu strategy, if None takes all availabel gpus", default="0", ) no_cuda: bool = field("Avoid using CUDA when available", default=False) end_lr: float = field("End learning rate for optimizer", default=0) debug: bool = field( "Activate the trace to record computation graphs and profiling information", default=False, ) overwrite_directory: bool = field( "Overwrite the content of the output directory.Use this to continue training if directory points to a checkpoint directory.", default=False, ) evaluate_during_training: bool = field( "Run evaluation during training at each logging step.", default=False, ) per_device_train_batch_size: int = field( "Batch size per GPU/TPU core/CPU for training.", default=8, ) per_device_eval_batch_size: int = field( "Batch size per GPU/TPU core/CPU for evaluation.", default=8, ) gradient_accumulation_steps: int = field( "Number of updates steps to accumulate before performing a backward/update pass.", default=1, ) learning_rate: float = field( "The initial learning rate for Adam.", default=5e-5, ) weight_decay: float = field( "Weight decay if we apply some.", default=0.0, ) adam_epsilon: float = field( "Epsilon for Adam optimizer.", default=1e-8, ) max_grad_norm: float = field( "Max gradient norm.", default=1.0, ) num_train_epochs: float = field( "Total number of training epochs to perform.", default=1, ) max_steps: int = field( "If > 0: set total number of training steps to perform. Override num_train_epochs.", default=-1, ) warmup_steps: int = field( "Linear warmup over warmup_steps.", default=0, ) logging_first_step: bool = field( "Log and eval the first global_step", default=False, ) logging_steps: int = field( "Log every X updates steps.", default=500, ) save_steps: int = field( "Save checkpoint every X updates steps.", default=500, ) save_total_limit: int = field( "Limit the total amount of checkpoints.Deletes the older checkpoints in the directory. Default is unlimited checkpoints", default=None, ) no_cuda: bool = field( "Do not use CUDA even when it is available", default=False, ) seed: int = field( "random seed for initialization", default=42, ) fp16: bool = field( "Whether to use 16-bit (mixed) precision (through NVIDIA apex) instead of 32-bit", default=False, ) fp16_opt_level: str = field( "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." "See details at https://nvidia.github.io/apex/amp.html", default="O1", ) local_rank: int = field( "For distributed training: local_rank", default=-1, ) dataloader_drop_last: bool = field( "Drop the last incomplete batch if the length of the dataset is not divisible by the batch size", default=False, ) past_index: int = field( "Some models can make use of the past hidden states for their predictions. If this argument is set to a positive int, the `Trainer` will use the corresponding output (usually index 2) as the past state and feed it to the model at the next training step under the keyword argument `mems` ", default=-1, ) def to_json_string(self): config_dict = export(self) [config_dict.pop(key) for key in ["features", "predict", "clstype"]] return json.dumps(config_dict, indent=2) def __post_init__(self): self.output_dir = self.directory self.tf = importlib.import_module("tensorflow") self.label_list = list(map(self.clstype, self.label_list)) self.task_name = "sst-2" self.mode = "text-classification" if len(self.features) > 1: raise ValueError("Found more than one feature to train on") if self.fp16: self.tf.config.optimizer.set_experimental_options( {"auto_mixed_precision": True}) if len(self.gpus.split(",")) > 1: self.n_replicas = len( [f"/gpu:{gpu}" for gpu in self.gpus.split(",")]) self.strategy = self.tf.distribute.MirroredStrategy( devices=[f"/gpu:{gpu}" for gpu in self.gpus.split(",")]) elif self.no_cuda: self.n_replicas = 1 self.strategy = self.tf.distribute.OneDeviceStrategy( device="/cpu:0") else: self.n_replicas = len(self.gpus.split(",")) self.strategy = self.tf.distribute.OneDeviceStrategy( device="/gpu:" + self.gpus.split(",")[0]) self.train_batch_size = self.per_device_train_batch_size * max( 1, self.n_replicas) self.eval_batch_size = self.per_device_eval_batch_size * max( 1, self.n_replicas)
class NERModelConfig: sid: Feature = field( "Unique Id to identify words of each sentence (Sentence ID)" ) words: Feature = field("Tokens to train NER model") predict: Feature = field("NER Tags (B-MISC, I-PER, O etc.) for tokens") model_architecture_type: str = field( "Model architecture selected in the : " + ", ".join(ORIGINAL_NER_MODELS.keys()) ) model_name_or_path: str = field( "Path to pre-trained model or shortcut name selected in the list: " + ", ".join(ALL_MODELS) ) output_dir: str = field( "The output directory where the model checkpoints will be written", default=str( pathlib.Path("~", ".cache", "dffml", "transformers", "checkpoints") ), ) config_name: str = field( "Pretrained config name or path if not the same as model_name", default=None, ) tokenizer_name: str = field( "Pretrained tokenizer name or path if not the same as model_name", default=None, ) cache_dir: str = field( "Directory to store the pre-trained models downloaded from s3", default=str(pathlib.Path("~", ".cache", "dffml", "transformers")), ) max_seq_length: int = field( "The maximum total input sentence length after tokenization.Sequences longer than this will be truncated, sequences shorter will be padded", default=128, ) max_steps: int = field( "If greater than zero then sets total number of training steps to perform. Overrides `epochs`", default=0, ) use_fp16: bool = field( "Whether to use 16-bit (mixed) precision instead of 32-bit", default=False, ) ner_tags: List[str] = field( "List of all distinct NER Tags", default_factory=lambda: [ "O", "B-MISC", "I-MISC", "B-PER", "I-PER", "B-ORG", "I-ORG", "B-LOC", "I-LOC", ], ) do_lower_case: bool = field( "Set this flag if using uncased model.", default=False ) gradient_accumulation_steps: int = field( "Number of updates steps to accumulate before performing a backward pass.", default=1, ) learning_rate: float = field( "The initial learning rate for Adam", default=5e-5 ) weight_decay: float = field("Weight decay", default=0.0) adam_epsilon: float = field("Epsilon for Adam optimizer", default=1e-8) max_grad_norm: float = field("Max gradient norm.", default=1.0) epochs: int = field( "Total number of training epochs to perform.", default=1 ) warmup_steps: int = field("Linear warmup over warmup_steps.", default=0) save_steps: int = field( "Save checkpoint every X update steps.", default=10 ) seed: int = field("Random seed for initialization", default=2020) gpus: str = field( "List of gpu devices. If only one, switch to single gpu strategy, if None takes all availabel gpus", default="0", ) tpu: str = field( "The Cloud TPU to use for training. This should be either the name used when creating the Cloud TPU, or a grpc://ip.address.of.tpu:8470 url", default=None, ) num_tpu_cores: int = field("Total number of TPU cores to use.", default=8) per_device_train_batch_size: int = field( "Batch size per GPU/CPU/TPU for training", default=8 ) per_device_eval_batch_size: int = field( "Batch size per GPU/CPU/TPU for assessing accuracy", default=8 ) no_cuda: bool = field("Avoid using CUDA when available", default=False) eval_all_checkpoints: bool = field( "Evaluate all checkpoints starting with the same prefix as model_name ending and ending with step number", default=False, ) def __post_init__(self): self.tf = importlib.import_module("tensorflow") if self.use_fp16: self.tf.config.optimizer.set_experimental_options( {"auto_mixed_precision": True} ) if self.tpu: resolver = self.tf.distribute.cluster_resolver.TPUClusterResolver( tpu=self.parent.config.tpu ) self.tf.config.experimental_connect_to_cluster(resolver) self.tf.tpu.experimental.initialize_tpu_system(resolver) self.strategy = self.tf.distribute.experimental.TPUStrategy( resolver ) self.n_device = self.num_tpu_cores elif len(self.gpus.split(",")) > 1: self.n_device = len( [f"/gpu:{gpu}" for gpu in self.gpus.split(",")] ) self.strategy = self.tf.distribute.MirroredStrategy( devices=[f"/gpu:{gpu}" for gpu in self.gpus.split(",")] ) elif self.no_cuda: self.n_device = 1 self.strategy = self.tf.distribute.OneDeviceStrategy( device="/cpu:0" ) else: self.n_device = len(self.gpus.split(",")) self.strategy = self.tf.distribute.OneDeviceStrategy( device="/gpu:" + self.gpus.split(",")[0] )
class XGBRegressorModelConfig: directory: pathlib.Path = field("Directory where model should be saved") features: Features = field("Features on which we train the model") predict: Feature = field("Value to be predicted") learning_rate: float = field("Learning rate to train with", default=0.05) n_estimators: int = field( "Number of gradient boosted trees. Equivalent to the number of boosting rounds", default=1000, ) max_depth: int = field("Maximium tree depth for base learners", default=6) subsample: float = field("Subsample ratio of the training instance", default=1) gamma: float = field( "Minimium loss reduction required to make a furthre partition on a leaf node", default=0, ) n_jobs: int = field("Number of parallel threads used to run xgboost", default=-1) colsample_bytree: float = field( "Subsample ratio of columns when constructing each tree", default=1) booster: str = field( "Specify which booster to use: gbtree, gblinear or dart", default="gbtree", ) min_child_weight: float = field( "Minimum sum of instance weight(hessian) needed in a child", default=0) reg_lambda: float = field( "L2 regularization term on weights. Increasing this value will make model more conservative", default=1, ) reg_alpha: float = field( "L1 regularization term on weights. Increasing this value will make model more conservative", default=0, )
class TextClassifierConfig: predict: Feature = field("Feature name holding classification value") classifications: List[str] = field("Options for value of classification") features: Features = field("Features to train on") trainable: str = field("Tweak pretrained model by training again", default=True) batch_size: int = field("Batch size", default=120) max_seq_length: int = field( "Length of sentence, used in preprocessing of input for bert embedding", default=256, ) add_layers: bool = field("Add layers on the top of pretrianed model/layer", default=False) embedType: str = field( "Type of pretrained embedding model, required to be set to `bert` to use bert pretrained embedding", default=None, ) layers: List[str] = field( "Extra layers to be added on top of pretrained model", default=None) model_path: str = field( "Pretrained model path/url", default= "https://tfhub.dev/google/tf2-preview/gnews-swivel-20dim-with-oov/1", ) optimizer: str = field("Optimizer used by model", default="adam") metrics: str = field("Metric used to evaluate model", default="accuracy") clstype: Type = field("Data type of classifications values", default=str) epochs: int = field( "Number of iterations to pass over all records in a source", default=10) directory: str = field( "Directory where state should be saved", default=os.path.join(os.path.expanduser("~"), ".cache", "dffml", "tensorflow_hub"), ) def __post_init__(self): self.classifications = list(map(self.clstype, self.classifications)) if self.add_layers: # Temperory solution self.layers = parse_layers(self.layers)
class QAModelConfig: model_type: str = field("Model type in the list: " + ", ".join(MODEL_TYPES)) model_name_or_path: str = field( "Path to pretrained model or model identifier from huggingface.co/models", ) output_dir: str = field( "The output directory where the model checkpoints and predictions will be written.", ) cache_dir: str = field( "Where do you want to store the pre-trained models downloaded from s3", ) log_dir: str = field("Directory used by SummaryWriter for logging") tokenizer_name: str = field( "Pretrained tokenizer name or path if not the same as model_name", default=None, ) from_tf: bool = field( "Whether to load model from tensorflow checkpoint or .h5 file", default=False, ) config_name: str = field( "Pretrained config name or path if not the same as model_name", default=None, ) null_score_diff_threshold: str = field( "If null_score - best_non_null is greater than the threshold predict null.", default=0.0, ) max_seq_length: int = field( "The maximum total input sequence length after WordPiece tokenization. Sequences longer than this will be truncated, and sequences shorter than this will be padded.", default=384, ) doc_stride: int = field( "When splitting up a long document into chunks, how much stride to take between chunks.", default=128, ) max_query_length: int = field( "The maximum number of tokens for the question. Questions longer than this will be truncated to this length", default=64, ) do_lower_case: bool = field("Set this flag while using uncased model", default=False) per_gpu_train_batch_size: int = field( "Batch size per GPU/CPU for training", default=8) per_gpu_eval_batch_size: int = field( "Batch size per GPU/CPU for evaluation", default=8) learning_rate: float = field("The initial learning rate for Adam", default=5e-5) gradient_accumulation_steps: int = field( "Number of updates steps to accumulate before performing a backward/update pass", default=1, ) weight_decay: float = field("Weight decay if we apply some.", default=0.0) adam_epsilon: float = field("Epsilon for Adam optimizer", default=1e-8) max_grad_norm: float = field("Max gradient norm.", default=1.0) num_train_epochs: float = field( "Total number of training epoches to perform", default=1.0) max_steps: int = field( "If > 0: set total number of training steps to perform. Override num_train_epochs.", default=-1, ) warmup_steps: int = field("Linear warmup over warmup_steps.", default=0) n_best_size: int = field( "The total number of n-best predictions to generate", default=20) max_answer_length: int = field( "The maximum length of an answer that can be generated. This is needed because the start and end predictions are not conditioned on one another.", default=30, ) lang_id: int = field( "language id of input for language-specific xlm models (see tokenization_xlm.PRETRAINED_INIT_CONFIGURATION)", default=0, ) logging_steps: int = field("Log every X updates steps.", default=500) save_steps: int = field("Save checkpoint every X update steps", default=500) no_cuda: bool = field("Whether not to use CUDA when available", default=False) overwrite_output_dir: bool = field( "Overwrite the content of the output directory", default=False) seed: int = field("random seed for initialization", default=2020) local_rank: int = field("local_rank for distributed training on gpus", default=-1) fp16: int = field( "Whether to use 16-bit (mixed) precision (through NVIDIA apex) insted of 32-bit", default=False, ) fp16_opt_level: str = field( "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']. See details at https://nvidia.github.io/apex/amp.html", default="O1", ) threads: int = field("Multiple threads for converting example to features", default=1) def __post_init__(self): if self.doc_stride >= self.max_seq_length - self.max_query_length: logger.warning( "WARNING - You've set a doc stride which may be superior to the document length in some " "examples. This could result in errors when building features from the examples. Please reduce the doc " "stride or increase the maximum length to ensure the features are correctly built." ) if self.local_rank == -1 or self.no_cuda: device = torch.device("cuda" if torch.cuda.is_available() and not self.no_cuda else "cpu") self.n_gpu = 0 if self.no_cuda else torch.cuda.device_count() else: torch.cuda.set_device(self.local_rank) device = torch.device("cuda", self.local_rank) torch.distributed.init_process_group(backend="nccl") self.n_gpu = 1 self.device = device set_seed(self.seed, self.n_gpu) if self.local_rank not in [-1, 0]: torch.distributed.barrier() self.model_type = self.model_type.lower()
class CMDTestConfig(CMDConfig): nope: bool = field("test field", default=False)
class PyTorchModelConfig: predict: Feature = field("Feature name holding classification value") features: Features = field("Features to train on") directory: pathlib.Path = field("Directory where state should be saved") classifications: List[str] = field( "Options for value of classification", default=None ) clstype: Type = field("Data type of classifications values", default=str) imageSize: int = field( "Common size for all images to resize and crop to", default=None ) enableGPU: bool = field("Utilize GPUs for processing", default=False) epochs: int = field( "Number of iterations to pass over all records in a source", default=20 ) batch_size: int = field("Batch size", default=32) validation_split: float = field( "Split training data for Validation", default=0.0 ) patience: int = field( "Early stops the training if validation loss doesn't improve after a given patience", default=5, ) loss: PyTorchLoss = field( "Loss Functions available in PyTorch", default=CrossEntropyLossFunction, ) optimizer: str = field( "Optimizer Algorithms available in PyTorch", default="SGD" ) normalize_mean: List[float] = field( "Mean values for normalizing Tensor image", default=None ) normalize_std: List[float] = field( "Standard Deviation values for normalizing Tensor image", default=None ) def __post_init__(self): if self.classifications is not None: self.classifications = list( map(self.clstype, self.classifications) )
( "scikitac", "AgglomerativeClustering", AgglomerativeClustering, applicable_features, ), ("scikitoptics", "OPTICS", OPTICS, applicable_features), ]: estimator_type = cls._estimator_type config_fields = dict() if estimator_type in supervised_estimators: parentContext = ScikitContext parentModel = Scikit config_fields["predict"] = ( Feature, field("Label or the value to be predicted"), ) elif estimator_type in unsupervised_estimators: parentContext = ScikitContextUnsprvised parentModel = ScikitUnsprvised config_fields["tcluster"] = ( Feature, field( "True cluster label for evaluating clustering models", default=None, ), ) dffml_config_properties = { **{ "directory": ( pathlib.Path,
("scikitbirch", "Birch", Birch), ("scikitmbkmeans", "MiniBatchKMeans", MiniBatchKMeans), ("scikitap", "AffinityPropagation", AffinityPropagation), ("scikims", "MeanShift", MeanShift), ("scikitsc", "SpectralClustering", SpectralClustering), ("scikitac", "AgglomerativeClustering", AgglomerativeClustering), ("scikitoptics", "OPTICS", OPTICS), ]: estimator_type = cls._estimator_type config_fields = dict() if estimator_type in supervised_estimators: parentContext = ScikitContext parentModel = Scikit config_fields["predict"] = ( Feature, field("Label or the value to be predicted"), ) elif estimator_type in unsupervised_estimators: parentContext = ScikitContextUnsprvised parentModel = ScikitUnsprvised config_fields["tcluster"] = ( Feature, field( "True cluster label for evaluating clustering models", default=None, ), ) dffml_config_properties = { **{ "directory": ( pathlib.Path,
class PyTorchNeuralNetworkConfig(PyTorchModelConfig): network: Network = field("Model", default=None)
), ("scikitridge", "Ridge", Ridge, applicable_features), ("scikitlars", "Lars", Lars, applicable_features), ]: dffml_config = mkscikit_config_cls( name + "ModelConfig", cls, properties={ "directory": ( str, field( "Directory where state should be saved", default=os.path.join( os.path.expanduser("~"), ".cache", "dffml", f"scikit-{entry_point_name}", ), ), ), "predict": (str, field("Label or the value to be predicted")), "features": (Features, field("Features to train on")), }, ) dffml_cls_ctx = type( name + "ModelContext", (ScikitContext, ), {"applicable_features": applicable_features_function}, )
class FakeSubCMDConfig: test: str = field("test field")
class CreateTLSServerConfig(TLSCMDConfig): bits: int = field( "Number of bits to use for key", default=4096, )
class MiscServicesConfig: integer: int = field( f"Port to do nothing with", default=0, required=True, )