def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) self.output = named_outputs.get('output data', 'out_task_{}'.format(self.order)) if self.L1_PARAM not in parameters or self.L2_PARAM not in parameters: raise ValueError( gettext('Parameters {l1} and {l2} are required.').format( l1=self.L1_PARAM, l2=self.L2_PARAM)) self.l1 = float(parameters.get(self.L1_PARAM, 0.0)) self.l2 = float(parameters.get(self.L2_PARAM, 0.0)) self.task_name = self.parameters.get('task').get('name') self.parent = "" self.var_name = "" self.has_code = True self.parents_by_port = parameters.get('my_ports', []) self.python_code_to_remove = self.remove_python_code_parent() self.treatment() self.import_code = { 'layer': 'ActivityRegularization', 'callbacks': [], 'model': None, 'preprocessing_image': None, 'others': None }
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) if self.MIN_SUPPORT_PARAM not in parameters: raise ValueError(_( 'Support must be informed for classifier {}').format( self.__class__)) self.min_support = float(parameters.get(self.MIN_SUPPORT_PARAM)) if self.min_support < .0001 or self.min_support > 1.0: raise ValueError('Support must be greater or equal ' 'to 0.0001 and smaller than 1.0') self.output = self.named_outputs.get( 'output data', 'freq_items_{}'.format(self.order)) self.rules = self.named_outputs.get( 'rules output', 'rules_{}'.format(self.order)) self.attribute = parameters.get(self.ATTRIBUTE_PARAM) if not self.attribute: raise ValueError( 'Missing parameter {}'.format(self.ATTRIBUTE_PARAM)) self.attribute = self.attribute[0] self.has_code = len(self.named_inputs) == 1 self.confidence = float(parameters.get(self.CONFIDENCE_PARAM, 0.9))
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) self.has_code = all([ len(named_inputs) == 1, self.contains_results() or len(named_outputs) > 0 ]) if self.has_code: self.output = named_outputs.get( 'output data', 'output_data_{}'.format(self.order)) self.confidence = float(parameters.get(self.CONFIDENCE_PARAM, 0.5)) if self.confidence < .0001 or self.confidence > 1.0: raise ValueError('Confidence must be greater or equal ' 'to 0.0001 and smaller than 1.0') self.has_import = \ "from juicer.scikit_learn.library.rules_generator " \ "import RulesGenerator\n" self.support_col = \ parameters.get(self.SUPPORT_ATTR_PARAM, [self.SUPPORT_ATTR_PARAM_VALUE])[0] self.items_col = parameters.get(self.ITEMSET_ATTR_PARAM, [''])[0] self.max_rules = parameters.get(self.MAX_COUNT_PARAM, -1) or -1
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) self.has_code = len(named_inputs) > 0 and any( [len(self.named_outputs) > 0, self.contains_results()]) if self.has_code: self.with_mean = parameters.get(self.WITH_MEAN_PARAM, False) in ['1', 1, True] self.with_std = parameters.get(self.WITH_STD_PARAM, True) in ['1', 1, True] self.output = self.named_outputs.get( 'output data', 'output_data_{}'.format(self.order)) self.model = named_outputs.get('transformation model', 'model_{}'.format(self.order)) if self.ATTRIBUTE_PARAM not in self.parameters: msg = _("Parameters '{}' must be informed for task {}") raise ValueError( msg.format(self.ATTRIBUTE_PARAM, self.__class__.__name__)) self.attribute = parameters[self.ATTRIBUTE_PARAM] self.alias = parameters.get(self.ALIAS_PARAM, 'scaled_{}'.format(self.order)) self.has_import = \ "from sklearn.preprocessing import StandardScaler\n"
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) self.has_code = True self.number_neighbors = int( parameters.get(self.NUMBER_NEIGHBORS_ATTRIBUTE_PARAM, 5)) self.n_estimators = int( parameters.get(self.N_ESTIMATORS_ATTRIBUTE_PARAM, 10)) self.min_hash_match = int( parameters.get(self.MIN_HASH_MATCH_ATTRIBUTE_PARAM, 4)) self.n_candidates = int(parameters.get(self.N_CANDIDATES, 10)) self.random_state = int( parameters.get(self.RANDOM_STATE_ATTRIBUTE_PARAM, 0)) self.radius = float(parameters.get(self.RADIUS_ATTRIBUTE_PARAM, 1.0)) self.radius_cutoff_ratio = float( parameters.get(self.RADIUS_CUTOFF_RATIO_ATTRIBUTE_PARAM, 0.9)) if not all([self.LABEL_PARAM in parameters]): msg = _("Parameters '{}' must be informed for task {}") raise ValueError( msg.format(self.LABEL_PARAM, self.__class__.__name__)) self.label = parameters[self.LABEL_PARAM] self.model = self.named_outputs.get('model', 'model_{}'.format(self.order)) self.output = self.named_outputs.get('output data', 'out_task_{}'.format(self.order)) self.input_treatment() self.has_import = \ """
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) if 'label' not in parameters and 'features' not in parameters: raise ValueError( _("Parameters '{}' and '{}' must be informed for task {}"). format('label', 'features', self.__class__)) self.label = parameters['label'][0] self.features = parameters['features'][0] self.predCol = parameters.get('prediction', 'prediction') self.has_code = len(self.named_inputs) == 2 self.has_import = "" if not self.has_code: raise ValueError( _("Parameters '{}' and '{}' must be informed for task {}"). format('train input data', 'algorithm', self.__class__)) self.model = self.named_outputs.get('model', 'model_tmp{}'.format(self.order)) self.perform_transformation = 'output data' in self.named_outputs if not self.perform_transformation: self.output = 'task_{}'.format(self.order) else: self.output = self.named_outputs['output data'] self.prediction = self.parameters.get('prediction', 'prediction')
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) self.has_code = True self.output = self.named_outputs.get( 'output data', 'output_data_{}'.format(self.order)) self.number_neighbors = int( parameters.get(self.NUMBER_NEIGHBORS_ATTRIBUTE_PARAM, 20)) self.algorithm = parameters.get(self.ALGORITHM_ATTRIBUTE_PARAM, "auto") self.leaf_size = int(parameters.get(self.LEAF_SIZE_ATTRIBUTE_PARAM, 30)) self.metric = parameters.get(self.METRIC_ATTRIBUTE_PARAM, "minkowski") self.contamination = float( parameters.get(self.CONTAMINATION_ATTRIBUTE_PARAM, 0.22)) self.p = int(parameters.get(self.P_ATTRIBUTE_PARAM, 2)) self.metric_params = parameters.get(self.METRIC_PARAMS_ATTRIBUTE_PARAM, None) self.novelty = int(parameters.get(self.NOVELTY_ATTRIBUTE_PARAM, 0)) self.n_jobs = int(parameters.get(self.N_JOBS_ATTRIBUTE_PARAM, 0)) self.features = parameters['features'] self.outlier = self.parameters.get(self.OUTLIER_PARAM, 'outlier') self.input_treatment() self.has_import = \ """
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) self.has_code = len(named_inputs) == 1 and any([self.contains_results(), len(named_outputs) > 0]) if self.has_code: if any([self.LAT_PARAM not in parameters, self.LON_PARAM not in parameters, self.DATETIME_PARAM not in parameters]): raise ValueError( _('Parameters {}, {} and {} must be informed for task {}.') .format('Latitude', 'Longitude', 'Datetime', self.__class__)) self.output = self.named_outputs.get( 'output data', 'output_data_{}'.format(self.order)) self.lat_col = parameters.get(self.LAT_PARAM)[0] self.lon_col = parameters.get(self.LON_PARAM)[0] self.datetime_col = parameters.get(self.DATETIME_PARAM)[0] self.alias = parameters.get(self.ALIAS_PARAM, 'cluster') self.min_pts = parameters.get(self.MIN_SAMPLE_PARAM, 15) or 15 self.spatial_thr = parameters.get(self.SPA_THRESHOLD_PARAM, 500) or 500 self.temporal_thr = parameters.get(self.TMP_THRESHOLD_PARAM, 60) or 60 self.min_pts = abs(int(self.min_pts)) self.spatial_thr = abs(float(self.spatial_thr)) self.temporal_thr = abs(int(self.temporal_thr)) self.has_import = "from juicer.scikit_learn.library.stdbscan " \ "import STDBSCAN\n"
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) self.has_code = len(named_inputs) == 1 and any([self.contains_results(), len(named_outputs) > 0]) if self.has_code: for att in [self.SRC_PROJ_PARAM, self.DST_PROJ_PARAM, self.LAT_PARAM, self.LON_PARAM]: if att not in self.parameters: raise ValueError( _('Parameters {} must be informed for task {}.') .format(att, self.__class__)) self.output = self.named_outputs.get( 'output data', 'output_data_{}'.format(self.order)) self.lat_col = parameters.get(self.LAT_PARAM)[0] self.lon_col = parameters.get(self.LON_PARAM)[0] self.lat_alias = parameters.get(self.LAT_ALIAS_PARAM, self.lat_col) self.lon_alias = parameters.get(self.LON_ALIAS_PARAM, self.lon_col) self.src_prj = parameters.get(self.SRC_PROJ_PARAM) self.dst_prj = parameters.get(self.DST_PROJ_PARAM)
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) if 'attributes' not in self.parameters: self.has_code = False raise ValueError( _("Parameter '{}' must be informed for task {}").format( 'attributes', self.__class__)) self.output = named_outputs.get('output data', 'output_data_{}'.format(self.order)) self.attributes = parameters['attributes'] self.name = [ s.strip() for s in self.parameters.get('new_name', '').split(',') ] # Adjust alias in order to have the same number of aliases as # attributes by filling missing alias with the attribute name # sufixed by _indexed. if len(self.name) > 0: size = len(self.attributes) self.name = [ x[1] or '{}_new'.format(x[0]) for x in zip_longest(self.attributes, self.name[:size]) ] self.new_type = parameters.get('new_data_type', 'keep') self.has_code = len(named_inputs) == 1 if self.has_code: self.has_import = "from functions.data.attributes_changer " \ "import AttributesChangerOperation\n"
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) self.prediction_attribute = (parameters.get( self.PREDICTION_ATTRIBUTE_PARAM) or [''])[0] # self.feature_attribute = (parameters.get( # self.FEATURE_ATTRIBUTE_PARAM) or [''])[0] self.label_attribute = (parameters.get( self.LABEL_ATTRIBUTE_PARAM) or [''])[0] self.type_model = parameters.get(self.METRIC_PARAM) or '' if any([self.prediction_attribute == '', self.type_model == '']): msg = \ _("Parameters '{}' and '{}' must be informed for task {}") raise ValueError(msg.format( self.PREDICTION_ATTRIBUTE_PARAM, self.METRIC_PARAM, self.__class__)) if self.type_model not in self.METRIC_TO_EVALUATOR: raise ValueError(_('Invalid metric value {}').format( self.type_model)) # if self.type_model == 'clustering': # if self.feature_attribute == '': # msg = \ # _("Parameters '{}' must be informed for task {}") # raise ValueError(msg.format( # self.FEATURE_ATTRIBUTE_PARAM, self.__class__)) # else: # self.label_attribute = self.feature_attribute # else: if self.label_attribute == '': msg = \ _("Parameters '{}' must be informed for task {}") raise ValueError(msg.format( self.LABEL_ATTRIBUTE_PARAM, self.__class__)) self.has_code = any([( (len(self.named_inputs) > 0 and len(self.named_outputs) > 0) or (self.named_outputs.get('evaluator') is not None) or ('input data' in self.named_inputs) ), self.contains_results()]) self.model = self.named_inputs.get( 'model', 'model_{}'.format(self.order)) self.model_out = self.named_outputs.get( 'evaluated model', 'model_task_{}'.format(self.order)) self.evaluator_out = self.named_outputs.get( 'evaluator', 'evaluator_task_{}'.format(self.order)) if not self.has_code and self.named_outputs.get( 'evaluated model') is not None: raise ValueError( _('Model is being used, but at least one input is missing')) self.supports_cache = False self.has_import = "from sklearn.metrics import * \n"
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) if self.FILE not in parameters: raise ValueError( _("Parameter '{}' must be informed for task {}").format( self.FILE, self.__class__)) self.name_file = "/" + parameters[self.FILE] self.separator = parameters.get(self.SEPARATOR, ',') self.header = parameters.get('header', False) in (1, '1', True) self.schema = parameters.get(self.SCHEMA, "FROM_VALUES") self.mode = parameters.get(self.MODE_PARAM, 'FAILFAST') null_values = parameters.get(self.NULL_VALUES_PARAM, '') self.format = parameters.get('format', 'csv') if null_values == '': self.null_values = [] else: self.null_values = \ list(set(v.strip() for v in null_values.split(","))) self.has_code = len(named_outputs) > 0 self.has_import = "from functions.data.read_data import "\ "ReadOperationHDFS\n"
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) for att in [self.NAME_PARAM, self.FORMAT_PARAM, self.PATH_PARAM]: if att not in parameters: raise ValueError( _("Parameter '{}' must be informed for task {}").format( att, self.__class__)) self.name = parameters.get(self.NAME_PARAM) self.format = parameters.get(self.FORMAT_PARAM, self.FORMAT_CSV) self.path = parameters.get(self.PATH_PARAM, '.') self.mode = parameters.get(self.OVERWRITE_MODE_PARAM, self.MODE_ERROR) self.storage = parameters.get(self.STORAGE_ID_PARAM, 'hdfs') self.header = parameters.get('header', False) in (1, '1', True) self.has_code = len(named_inputs) == 1 if self.has_code: self.has_import = \ "from functions.data.save_data import SaveOperation\n" tmp = 'output_data_{}'.format(self.order) self.output = self.named_outputs.get('output data', tmp) if len(self.path) > 0: self.filename = '/' + self.path + '/' + self.name else: self.filename = '/' + self.name self.has_code = len(self.named_inputs) == 1
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) self.output = named_outputs.get('output data', 'out_task_{}'.format(self.order)) if self.TARGET_SHAPE_PARAM not in parameters or self.TARGET_SHAPE_PARAM is None: raise ValueError( gettext('Parameter {} is required.').format( self.TARGET_SHAPE_PARAM)) self._target_shape = parameters.get(self.TARGET_SHAPE_PARAM, None) self.target_shape = None self.task_name = self.parameters.get('task').get('name') self.task_workflow_order = self.parameters.get('task').get('order') self.has_code = True self.parent = "" self.var_name = "" self.parents_by_port = parameters.get('my_ports', []) self.python_code_to_remove = self.remove_python_code_parent() self.treatment() self.import_code = { 'layer': 'Reshape', 'callbacks': [], 'model': None, 'preprocessing_image': None, 'others': None }
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) for att in ['label_attribute', 'prediction_attribute', 'metric']: if att not in parameters: raise ValueError( _("Parameter '{}' must be informed for task {}").format( att, self.__class__)) self.true_col = self.parameters['label_attribute'][0] self.pred_col = self.parameters['prediction_attribute'][0] self.metric = self.parameters['metric'] self.has_code = len(self.named_inputs) == 2 if not self.has_code: raise ValueError( _("Parameters '{}' and '{}' must be informed for task {}"). format('input data', 'model', self.__class__)) if self.metric in ['rmse', 'mse', 'mae']: self.modeltype = 'RegressionModelEvaluation' self.has_import = "from functions.ml.metrics." \ "RegressionModelEvaluation import *\n" else: self.modeltype = 'ClassificationModelEvaluation' self.has_import = \ "from functions.ml.metrics.ClassificationModelEvaluation" \ " import *\n" self.evaluated_out = \ self.named_outputs.get('evaluated model', 'evaluated_model{}'.format(self.order)) tmp = 'evaluator{}'.format(self.order) self.evaluator = self.named_outputs.get("evaluator", tmp)
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) if len(named_inputs) == 2: self.has_code = True attributes = [self.TARGET_LAT_COLUMN_PARAM, self.TARGET_LON_COLUMN_PARAM, self.POLYGON_POINTS_COLUMN_PARAM] for att in attributes: if att not in parameters: raise ValueError( _("Parameter '{}' must be informed for task {}") .format(att, self.__class__)) self.lat_column = parameters[self.TARGET_LAT_COLUMN_PARAM] self.lon_column = parameters[self.TARGET_LON_COLUMN_PARAM] self.polygon_column = parameters.get( self.POLYGON_POINTS_COLUMN_PARAM, 'points') self.attributes = parameters.get(self.POLYGON_ATTR_COLUMN_PARAM, []) if len(self.attributes) == 0: self.attributes = [] self.alias = parameters.get('alias', '_shp') self.output = self.named_outputs.get( 'output data', 'output_data_{}'.format(self.order)) self.has_import = \ "from juicer.scikit_learn.library.geo_within " \ "import GeoWithinOperation\n"
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) self.output = named_outputs.get('output data', 'out_task_{}'.format(self.order)) self._inputs = parameters.get(self.INPUTS_PARAM, None) self._kwargs = parameters.get(self.KWARGS_PARAM, None) self._advanced_options = parameters.get(self.ADVANCED_OPTIONS_PARAM, 0) self.inputs = None self.kwargs = None self.advanced_options = None self.task_name = self.parameters.get('task').get('name') self.parents = "" self.var_name = "" self.has_code = True self.add_functions_required = "" self.parents_by_port = parameters.get('my_ports', []) self.parents_slug = parameters.get('parents_slug', []) self.python_code_to_remove = self.remove_python_code_parent() self.treatment() self.import_code = { 'layer': 'subtract', 'callbacks': [], 'model': None, 'preprocessing_image': None, 'others': None }
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) self.title = parameters.get(self.TITLE_PARAM, '') self.has_code = len(self.named_inputs) == 1 self.supports_cache = False self.icon = 'fa-question'
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) if self.ATTRIBUTES_PARAM in parameters: self.attributes = parameters.get(self.ATTRIBUTES_PARAM) else: raise ValueError( _("Parameter '{}' must be informed for task {}").format( self.ATTRIBUTES_PARAM, self.__class__)) self.stop_word_attribute = self.parameters.get( self.STOP_WORD_ATTRIBUTE_PARAM, "") self.stop_word_list = [ s.strip() for s in self.parameters.get(self.STOP_WORD_LIST_PARAM, '').split(',') ] self.alias = parameters.get(self.ALIAS_PARAM, 'tokenized_rm') self.sw_case_sensitive = self.parameters.get( self.STOP_WORD_CASE_SENSITIVE_PARAM, 'False') self.stopwords_input = self.named_inputs.get('stop words', []) self.output = self.named_outputs.get( 'output data', 'output_data_{}'.format(self.order)) self.has_code = 'input data' in self.named_inputs if self.has_code: self.has_import = "from functions.text.remove_stopwords "\ "import RemoveStopWordsOperation\n"
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) for att in [self.NAME_PARAM, self.FORMAT_PARAM, self.PATH_PARAM]: if att not in parameters: raise ValueError( _("Parameter '{}' must be informed for task {}").format( att, self.__class__)) self.name = parameters.get(self.NAME_PARAM) self.tags = parameters.get(self.TAGS_PARAM) self.format = parameters.get(self.FORMAT_PARAM, self.FORMAT_CSV) self.path = parameters.get(self.PATH_PARAM, '.') self.mode = parameters.get(self.OVERWRITE_MODE_PARAM, self.MODE_ERROR) self.storage_id = parameters.get(self.STORAGE_ID_PARAM) self.user = parameters.get(self.USER_PARAM) self.workflow_id = parameters.get(self.WORKFLOW_ID_PARAM) self.header = parameters.get(self.HEADER_PARAM, False) in (1, '1', True) self.output = self.named_outputs.get( 'output data', 'output_data_{}'.format(self.order)) self.filename = self.name self.has_code = len(self.named_inputs) == 1
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) self.output = named_outputs.get('output data', 'out_task_{}'.format(self.order)) self.code = parameters.get(self.CODE_PARAM, None) or None self._out_code = int(parameters.get(self.OUT_CODE_PARAM, 0)) self.task_name = self.parameters.get('task').get('name') if self.CODE_PARAM not in parameters: raise ValueError( gettext('Parameter {} is required').format(self.CODE_PARAM)) self.parents_by_port = parameters.get('my_ports', []) self.python_code_to_remove = self.remove_python_code_parent() self.out_code = False self.treatment() self.import_code = { 'layer': None, 'callbacks': [], 'model': None, 'preprocessing_image': None, 'others': None } self.has_code = not self.out_code self.has_external_python_code_operation = self.out_code
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) self.has_code = any( [len(self.named_outputs) > 0, self.contains_results()]) self.header = False if self.has_code: if self.DATA_SOURCE_ID_PARAM in parameters: self._set_data_source_parameters(parameters) else: raise ValueError( _("Parameter '{}' must be informed for task {}").format( self.DATA_SOURCE_ID_PARAM, self.__class__)) # Test if data source was changed since last execution and # invalidate cache if so. self._set_data_source_parameters(parameters) data_source_updated = self.metadata.get('updated') if data_source_updated: data_source_updated = datetime.datetime.strptime( data_source_updated[0:19], '%Y-%m-%dT%H:%M:%S') self.supports_cache = ( parameters.get('execution_date') is not None and data_source_updated < parameters['execution_date']) self.output = named_outputs.get('output data', 'out_task_{}'.format(self.order))
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) self.has_code = len(self.named_inputs) == 1 if self.has_code: if self.ATTRIBUTE_PARAM not in parameters: raise ValueError( _("Parameters '{}' must be informed for task {}").format( 'attributes', self.__class__)) self.output = self.named_outputs.get( 'output data', 'output_data_{}'.format(self.order)) self.model = self.named_outputs.get('model', 'model_{}'.format(self.order)) self.attribute = parameters[self.ATTRIBUTE_PARAM] self.alias = parameters.get(self.ALIAS_PARAM, 'quantiledisc_{}'.format(self.order)) self.n_quantiles = parameters.get(self.N_QUANTILES_PARAM, 1000) or 1000 self.output_distribution = parameters.get( self.DISTRIBUITION_PARAM, self.DISTRIBUITION_PARAM_UNIFORM)\ or self.DISTRIBUITION_PARAM_UNIFORM self.seed = parameters.get(self.SEED_PARAM, 'None') or 'None' if int(self.n_quantiles) <= 0: raise ValueError( _("Parameter '{}' must be x>0 for task {}").format( self.N_QUANTILES_PARAM, self.__class__))
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) self.has_code = 'input data' in self.named_inputs \ and any([self.contains_results(), len(named_outputs) > 0]) if self.has_code: self.output = self.named_outputs.get( 'output data', 'output_data_{}'.format(self.order)) if self.ATTRIBUTES_PARAM in parameters: self.attributes = parameters.get(self.ATTRIBUTES_PARAM)[0] else: raise ValueError( _("Parameter '{}' must be informed for task {}").format( self.ATTRIBUTES_PARAM, self.__class__)) self.sw_case_sensitive = self.parameters.get( self.STOP_WORD_CASE_SENSITIVE_PARAM, False) self.stop_word_list = [ s.strip() for s in self.parameters.get( self.STOP_WORD_LIST_PARAM, '').split(',') ] self.alias = parameters.get(self.ALIAS_PARAM, 'tokenized_rm') self.stopwords_input = self.named_inputs.get('stop words', None) self.stop_word_attribute = self.parameters.get( self.STOP_WORD_ATTRIBUTE_PARAM, [''])[0] self.lang = self.parameters.get(self.LANG_PARAM, '') or '' self.has_import = "import nltk\n" \ "nltk.download('stopwords')\n" \ "from nltk.corpus import stopwords\n"
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) self.has_code = len(self.named_inputs) == 1 if self.has_code: if self.ATTRIBUTE_PARAM not in parameters: raise ValueError( _("Parameters '{}' must be informed for task {}").format( self.ATTRIBUTE_PARAM, self.__class__)) self.output = self.named_outputs.get( 'output data', 'output_data_{}'.format(self.order)) self.model = named_outputs.get('transformation model', 'model_{}'.format(self.order)) if self.ATTRIBUTE_PARAM not in self.parameters: msg = _("Parameters '{}' must be informed for task {}") raise ValueError( msg.format(self.ATTRIBUTE_PARAM, self.__class__.__name__)) self.attribute = parameters[self.ATTRIBUTE_PARAM] self.alias = parameters.get(self.ALIAS_PARAM, 'scaled_{}'.format(self.order)) self.min = parameters.get(self.MIN_PARAM, 0) or 0 self.max = parameters.get(self.MAX_PARAM, 1) or 1 self.has_import = \ "from sklearn.preprocessing import MinMaxScaler\n"
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) self.has_code = 'input data' in self.named_inputs \ and any([self.contains_results(), len(named_outputs) > 0]) if self.has_code: self.output = self.named_outputs.get('output data', 'out_{}'.format(self.order)) if self.N_PARAM in parameters: self.n = abs(int(self.parameters.get(self.N_PARAM, 2))) else: raise ValueError( _("Parameter '{}' must be informed for task {}").format( self.N_PARAM, self.__class__)) if self.ATTRIBUTES_PARAM in parameters: self.attributes = parameters.get(self.ATTRIBUTES_PARAM)[0] else: raise ValueError( _("Parameter '{}' must be informed for task {}").format( self.ATTRIBUTES_PARAM, self.__class__)) self.alias = parameters.get(self.ALIAS_PARAM, '{}_ngram'.format(self.attributes)) self.has_import = "from nltk.util import ngrams\n"
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) self.has_code = all([ len(named_inputs) == 1, self.contains_results() or len(named_outputs) > 0 ]) if self.has_code: if self.MIN_SUPPORT_PARAM not in parameters: raise ValueError( _("Parameter '{}' must be informed for task {}").format( self.MIN_SUPPORT_PARAM, self.__class__)) self.column = parameters.get(self.ATTRIBUTE_PARAM, [''])[0] self.confidence = float(parameters.get(self.CONFIDENCE_PARAM, 0.9)) self.min_support = float(parameters.get(self.MIN_SUPPORT_PARAM)) if self.min_support < .0001 or self.min_support > 1.0: raise ValueError('Support must be greater or equal ' 'to 0.0001 and smaller than 1.0') self.output = named_outputs.get( 'output data', 'output_data_{}'.format(self.order)) self.rules_output = named_outputs.get( 'rules output', 'rules_{}'.format(self.order)) self.has_import = "import pyfpgrowth\n"
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) if self.ATTRIBUTES_PARAM in parameters: self.attributes = parameters.get(self.ATTRIBUTES_PARAM) else: raise ValueError( _("Parameter '{}' must be informed for task {}").format( self.ATTRIBUTES_PARAM, self.__class__)) self.handle_invalid = self.parameters.get(self.HANDLE_INVALID_PARAM) if self.handle_invalid is not None: if self.handle_invalid not in ['skip', 'keep', 'error']: raise ValueError( _('Parameter {} must be one of these: {}').format( _('type'), ','.join([_('keep'), _('skip'), _('error')]) ) ) self.aliases = self._get_aliases( self.attributes, parameters.get(self.ALIASES_PARAM, '').split(','), 'bucketed') self.splits = self._get_splits(parameters) self.model = self.named_outputs.get( 'model', 'model_task_{}'.format(self.order)) self.output = self.named_outputs.get('output data', 'out_task_{}'.format(self.order)) self.has_code = [len(self.named_inputs) > 0, self.contains_results()]
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) self.has_code = all([ len(named_inputs) == 1, self.contains_results() or len(named_outputs) > 0 ]) if self.has_code: if self.MIN_SUPPORT_PARAM not in parameters: raise ValueError( _("Parameter '{}' must be informed for task {}").format( self.MIN_SUPPORT_PARAM, self.__class__)) self.column = parameters.get(self.ATTRIBUTE_PARAM, [''])[0] self.output = self.named_outputs.get( 'output data', 'output_data_{}'.format(self.order)) self.min_support = float(parameters.get(self.MIN_SUPPORT_PARAM)) if self.min_support < .0001 or self.min_support > 1.0: raise ValueError('Support must be greater or equal ' 'to 0.0001 and smaller than 1.0') self.max_length = abs( int(parameters.get(self.MAX_LENGTH_PARAM, 10))) or 10 self.has_import = \ "from juicer.scikit_learn.library." \ "prefix_span import PrefixSpan\n"
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) self.output = named_outputs.get('output data', 'out_task_{}'.format(self.order)) if self.MASK_VALUE_PARAM not in parameters or self.MASK_VALUE_PARAM is None: raise ValueError( gettext('Parameter {} are required.').format( self.MASK_VALUE_PARAM)) self.mask_value = parameters.get(self.MASK_VALUE_PARAM, 0.0) or 0.0 self.task_name = self.parameters.get('task').get('name') self.parent = "" self.var_name = "" self.has_code = True self.parents_by_port = parameters.get('my_ports', []) self.python_code_to_remove = self.remove_python_code_parent() self.treatment() self.import_code = { 'layer': 'Masking', 'callbacks': [], 'model': None, 'preprocessing_image': None, 'others': None }