def fit_cnn(train_data: InputData, model, epochs: int = 10, batch_size: int = 128, optimizer_params: dict = None, logger: Log = None): x_train, y_train = train_data.features, train_data.target transformed_x_train, transform_flag = check_input_array(x_train) if logger is None: logger = default_log(__name__) if transform_flag: logger.warn( 'Train data set was not scaled. The data was divided by 255.') if len(x_train.shape) < 4: transformed_x_train = np.expand_dims(x_train, -1) le = preprocessing.OneHotEncoder() y_train = le.fit_transform(y_train.reshape(-1, 1)).toarray() if optimizer_params is None: optimizer_params = { 'loss': "categorical_crossentropy", 'optimizer': "adam", 'metrics': ["accuracy"] } model.compile(**optimizer_params) if logger is None: logger = default_log(__name__) if logger.verbosity_level < 4: verbose = 0 else: verbose = 2 if epochs is None: logger.warn( 'The number of training epochs was not set. The selected number of epochs is 10.' ) model.fit(transformed_x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1, verbose=verbose) return model
def predict_cnn(trained_model, predict_data: InputData, output_mode: str = 'labels', logger=None) -> OutputData: x_test, y_test = predict_data.features, predict_data.target transformed_x_test, transform_flag = check_input_array(x_test) if logger is None: logger = default_log(__name__) if np.max(transformed_x_test) > 1: logger.warn( 'Test data set was not scaled. The data was divided by 255.') transformed_x_test = np.expand_dims(x_test, -1) if output_mode == 'labels': prediction = trained_model.predict(transformed_x_test) elif output_mode in ['probs', 'full_probs', 'default']: prediction = trained_model.predict_proba(transformed_x_test) if predict_data.num_classes < 2: logger.error( 'Data set contain only 1 target class. Please reformat your data.' ) raise NotImplementedError() elif predict_data.num_classes == 2 and output_mode != 'full_probs': prediction = prediction[:, 1] else: raise ValueError(f'Output model {output_mode} is not supported') return prediction
def test_classification_models_fit_correct(data_fixture, request): data = request.getfixturevalue(data_fixture) train_data, test_data = train_test_data_setup(data=data) roc_threshold = 0.95 logger = default_log('default_test_logger') with OperationTypesRepository() as repo: model_names, _ = repo.suitable_operation( task_type=TaskTypesEnum.classification, data_type=data.data_type, tags=['ml']) for model_name in model_names: logger.info(f"Test classification model: {model_name}.") model = Model(operation_type=model_name) _, train_predicted = model.fit(data=train_data) test_pred = model.predict(fitted_operation=_, data=test_data, is_fit_pipeline_stage=False) roc_on_test = get_roc_auc(valid_data=test_data, predicted_data=test_pred) if model_name not in ['bernb', 'multinb']: assert roc_on_test >= roc_threshold else: assert roc_on_test >= 0.5
def __init__(self, nodes_from: Optional[List['Node']], operation_type: Optional[Union[str, 'Operation']] = None, log: Log = None, **kwargs): passed_content = kwargs.get('content') if passed_content: operation_type = passed_content if not operation_type: raise ValueError('Operation is not defined in the node') if not isinstance(operation_type, str): # AtomizedModel operation = operation_type else: # Define appropriate operation or data operation operation_factory = OperationFactory(operation_name=operation_type) operation = operation_factory.get_operation() super().__init__(content=operation, nodes_from=nodes_from) if not log: self.log = default_log(__name__) else: self.log = log self._fitted_operation = None self.rating = None
def test_edge_mutation_for_graph(): """ Tests edge mutation can add edge between nodes """ graph_without_edge = \ OptGraph(OptNode('logit', [OptNode('one_hot_encoding', [OptNode('scaling')])])) primary = OptNode('scaling') graph_with_edge = \ OptGraph(OptNode('logit', [OptNode('one_hot_encoding', [primary]), primary])) composer_requirements = GPComposerRequirements( primary=['scaling', 'one_hot_encoding'], secondary=['logit', 'scaling'], mutation_prob=1) graph_params = GraphGenerationParams( adapter=DirectAdapter(), rules_for_constraint=DEFAULT_DAG_RULES) successful_mutation_edge = False for _ in range(100): graph_after_mutation = mutation(types=[MutationTypesEnum.single_edge], params=graph_params, ind=Individual(graph_without_edge), requirements=composer_requirements, log=default_log(__name__), max_depth=graph_with_edge.depth).graph if not successful_mutation_edge: successful_mutation_edge = \ graph_after_mutation.root_node.descriptive_id == graph_with_edge.root_node.descriptive_id else: break assert successful_mutation_edge
def test_intermediate_add_mutation_for_linear_graph(): """ Tests single_add mutation can add node between two existing nodes """ linear_two_nodes = OptGraph(OptNode('logit', [OptNode('scaling')])) linear_three_nodes_inner = \ OptGraph(OptNode('logit', [OptNode('one_hot_encoding', [OptNode('scaling')])])) composer_requirements = GPComposerRequirements( primary=['scaling'], secondary=['one_hot_encoding'], mutation_prob=1) graph_params = GraphGenerationParams( adapter=DirectAdapter(), rules_for_constraint=DEFAULT_DAG_RULES) successful_mutation_inner = False for _ in range(100): graph_after_mutation = mutation(types=[MutationTypesEnum.single_add], params=graph_params, ind=Individual(linear_two_nodes), requirements=composer_requirements, log=default_log(__name__), max_depth=3).graph if not successful_mutation_inner: successful_mutation_inner = \ graph_after_mutation.root_node.descriptive_id == linear_three_nodes_inner.root_node.descriptive_id else: break assert successful_mutation_inner
def test_drop_mutation_for_linear_graph(): """ Tests single_drop mutation can remove node """ linear_two_nodes = OptGraph(OptNode('logit', [OptNode('scaling')])) linear_one_node = OptGraph(OptNode('logit')) composer_requirements = GPComposerRequirements(primary=['scaling'], secondary=['logit'], mutation_prob=1) graph_params = GraphGenerationParams( adapter=DirectAdapter(), rules_for_constraint=DEFAULT_DAG_RULES) successful_mutation_drop = False for _ in range(100): graph_after_mutation = mutation(types=[MutationTypesEnum.single_drop], params=graph_params, ind=Individual(linear_two_nodes), requirements=composer_requirements, log=default_log(__name__), max_depth=2).graph if not successful_mutation_drop: successful_mutation_drop = \ graph_after_mutation.root_node.descriptive_id == linear_one_node.root_node.descriptive_id else: break assert successful_mutation_drop
def __init__(self, timeout: datetime.timedelta = None, log: Log = None): self.process_terminated = False if not log: self.log = default_log(__name__) else: self.log = log self.timeout = timeout
def __init__(self, log: Log = None): self.model_id = None self.model_type = None self.nodes_from = None if not log: self.log = default_log(__name__) else: self.log = log
def __init__(self, max_lead_time: datetime.timedelta = None, log: Log = None): self.process_terminated = False if not log: self.log = default_log(__name__) else: self.log = log self.max_lead_time = max_lead_time
def __init__(self, log: Log = default_log(__name__)): default_data_dir = default_fedot_data_dir() self.temp_path = os.path.join(default_data_dir, 'composing_history') if 'composing_history' not in os.listdir(default_data_dir): os.mkdir(self.temp_path) self.log = log self.chains_imgs = [] self.convergence_imgs = [] self.best_chains_imgs = [] self.merged_imgs = []
def __init__(self, model_type: str, log: Log = None): self.model_type = model_type self._eval_strategy, self._data_preprocessing = None, None self.params = DEFAULT_PARAMS_STUB self.log = log if not log: self.log = default_log(__name__) else: self.log = log
def __init__(self, log: Log = None, **params: Optional[dict]): super().__init__() self.params = params self.model = None # Define logger object if not log: self.log = default_log(__name__) else: self.log = log
def __init__(self, log: Log = default_log(__name__)): default_data_dir = default_fedot_data_dir() self.temp_path = os.path.join(default_data_dir, 'composing_history') if 'composing_history' not in os.listdir(default_data_dir): os.mkdir(self.temp_path) self.log = log self.pipelines_imgs = [] self.convergence_imgs = [] self.best_pipelines_imgs = [] self.merged_imgs = [] self.graph_visualizer = GraphVisualiser(log=log)
def __init__(self, chain=None, log: Log = None): self.total_chain_models = Counter() self.depth = chain.depth self.model_templates = [] self.unique_chain_id = str(uuid4()) if not log: self.log = default_log(__name__) else: self.log = log self._chain_to_template(chain)
def configure_experiment(): """ Generates a time series of 100 elements. The prediction is performed for five elements ahead """ # Default number of validation blocks validation_blocks = 3 forecast_len = 5 time_series, _ = get_ts_data(n_steps=105, forecast_length=forecast_len) log = default_log(__name__) return log, forecast_len, validation_blocks, time_series
def __init__(self, operation_type: str, log: Log = None): self.operation_type = operation_type self.log = log self._eval_strategy = None self.operations_repo = None self.params = DEFAULT_PARAMS_STUB if not log: self.log = default_log(__name__) else: self.log = log
def __init__(self, initial_chain, requirements, chain_generation_params, metrics: List[MetricsEnum], parameters: Optional[GPChainOptimiserParameters] = None, log: Log = None, archive_type=None): self.chain_generation_params = chain_generation_params self.primary_node_func = self.chain_generation_params.primary_node_func self.secondary_node_func = self.chain_generation_params.secondary_node_func self.chain_class = self.chain_generation_params.chain_class self.requirements = requirements self.archive = archive_type self.parameters = GPChainOptimiserParameters( ) if parameters is None else parameters self.max_depth = self.requirements.start_depth \ if self.parameters.with_auto_depth_configuration and self.requirements.start_depth \ else self.requirements.max_depth self.generation_num = 0 self.num_of_gens_without_improvements = 0 if not log: self.log = default_log(__name__) else: self.log = log generation_depth = self.max_depth if self.requirements.start_depth is None else self.requirements.start_depth self.chain_generation_function = partial( random_chain, chain_generation_params=self.chain_generation_params, requirements=self.requirements, max_depth=generation_depth) necessary_attrs = ['add_node', 'root_node', 'update_node'] if not all( [hasattr(self.chain_class, attr) for attr in necessary_attrs]): ex = f'Object chain_class has no required attributes for gp_optimizer' self.log.error(ex) raise AttributeError(ex) if not self.requirements.pop_size: self.requirements.pop_size = 10 if initial_chain and type(initial_chain) != list: self.population = [ deepcopy(initial_chain) for _ in range(self.requirements.pop_size) ] else: self.population = initial_chain self.history = ComposingHistory(metrics)
def test_node_analysis_init_defined_approaches_and_log(): # given approaches = [NodeDeletionAnalyze, NodeReplaceOperationAnalyze] test_log_object = default_log('test_log_node_sa') node_analyzer = NodeAnalysis(approaches=approaches, log=test_log_object) # then assert isinstance(node_analyzer, NodeAnalysis) assert len(node_analyzer.approaches) == 2 assert node_analyzer.log is test_log_object
def __init__(self, log: Log = None, **params: Optional[dict]): super().__init__() self.window_size = None self.n_components = None self.gain_tolerance = None self.sparse_transform = False # Define logger object if not log: self.log = default_log(__name__) else: self.log = log
def __init__(self, optimiser=None, composer_requirements: Optional[ComposerRequirements] = None, metrics: Union[List[MetricsEnum], MetricsEnum] = None, initial_pipeline: Optional[Pipeline] = None, logger: Log = None): self.metrics = metrics self.composer_requirements = composer_requirements self.initial_pipeline = initial_pipeline if not logger: self.log = default_log(__name__) else: self.log = logger
def __init__(self, chain=None, log: Log = None): self.total_chain_operations = Counter() self.depth = chain.depth self.operation_templates = [] self.unique_chain_id = str(uuid4()) self.computation_time = chain.computation_time if not log: self.log = default_log(__name__) else: self.log = log self._chain_to_template(chain)
def __init__(self, operation_type: str, params: Optional[dict] = None, log=None): self.params_for_fit = params self.operation_type = operation_type self.output_mode = False if not log: self.log: Log = default_log(__name__) else: self.log: Log = log
def __init__(self, content: str = '', nodes_from: Optional[List['OptNode']] = None, log: Optional[Log] = None): self.log = log if not log: self.log = default_log(__name__) else: self.log = log self.nodes_from = nodes_from if nodes_from is not None else [] self.content = content self._operator = NodeOperator(self)
def test_pipeline_sensitivity_facade_init(): # given pipeline, train_data, test_data, node_to_analyze, result_dir = given_data() test_log_object = default_log('test_log_pipeline_sa') # when sensitivity_facade = PipelineSensitivityAnalysis(pipeline=pipeline, train_data=train_data, test_data=test_data, nodes_to_analyze=[node_to_analyze], path_to_save=result_dir, log=test_log_object) # then assert type(sensitivity_facade) is PipelineSensitivityAnalysis
def __init__(self, log: Log = None, **params: Optional[dict]): super().__init__() if not params: # Default parameters self.window_size = 10 else: self.window_size = int(round(params.get('window_size'))) # Define logger object if not log: self.log = default_log(__name__) else: self.log = log
def __init__(self, nodes: Optional[Union[Node, List[Node]]] = None, log: Log = None): self.computation_time = None self.template = None self.fitted_on_data = {} self.log = log if not log: self.log = default_log(__name__) else: self.log = log super().__init__(nodes)
def test_chain_structure_analyze_init_log_defined(): # given chain, train_data, test_data, node_ids, _ = given_data() approaches = [NodeDeletionAnalyze] test_log_object = default_log('test_log_chain_sa') # when chain_analyzer = ChainStructureAnalyze(chain=chain, train_data=train_data, test_data=test_data, approaches=approaches, nodes_ids_to_analyze=[node_ids], log=test_log_object) assert isinstance(chain_analyzer, ChainStructureAnalyze)
def run_log_example(log_file_name): train_file_path, test_file_path = get_scoring_case_data_paths() current_path = os.path.dirname(__name__) train_data, test_data = get_case_train_test_data() # Use default_log if you do not have json config file for log log = default_log('chain_log', log_file=os.path.join(current_path, log_file_name)) log.info('start creating chain') chain = get_simple_chain(log=log) log.info('start fitting chain') chain.fit(train_data, use_cache=False)
def __init__(self, metrics: Union[List[MetricsEnum], MetricsEnum], composer_requirements: ComposerRequirements, optimiser_parameters: Any = None, initial_chain: Optional[Chain] = None, log: Log = None): self.metrics = metrics self.composer_requirements = composer_requirements self.optimiser_parameters = optimiser_parameters self.initial_chain = initial_chain if not log: self.log = default_log(__name__) else: self.log = log