def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) self.has_code = 'input data' in self.named_inputs \ and any([self.contains_results(), len(named_outputs) > 0]) if self.has_code: self.output = self.named_outputs.get('output data', 'out_{}'.format(self.order)) if self.N_PARAM in parameters: self.n = abs(int(self.parameters.get(self.N_PARAM, 2))) else: raise ValueError( _("Parameter '{}' must be informed for task {}").format( self.N_PARAM, self.__class__)) if self.ATTRIBUTES_PARAM in parameters: self.attributes = parameters.get(self.ATTRIBUTES_PARAM)[0] else: raise ValueError( _("Parameter '{}' must be informed for task {}").format( self.ATTRIBUTES_PARAM, self.__class__)) self.alias = parameters.get(self.ALIAS_PARAM, '{}_ngram'.format(self.attributes)) self.has_import = "from nltk.util import ngrams\n"
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) self.has_code = len(self.named_inputs) == 2 and any( [len(self.named_outputs) >= 1, self.contains_results()]) self.output = self.named_outputs.get( 'output data', 'output_data_{}'.format(self.order))
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) self.not_keep_right_keys = not \ parameters.get(self.KEEP_RIGHT_KEYS_PARAM, False) in (1, '1', True) self.match_case = parameters.get(self.MATCH_CASE_PARAM, False) in (1, '1', True) self.join_type = parameters.get(self.JOIN_TYPE_PARAM, 'inner') # outer should not be allowed? self.join_type = self.join_type.replace("_outer", "") if not all([ self.LEFT_ATTRIBUTES_PARAM in parameters, self.RIGHT_ATTRIBUTES_PARAM in parameters ]): raise ValueError( _("Parameters '{}' and '{}' must be informed for task {}"). format(self.LEFT_ATTRIBUTES_PARAM, self.RIGHT_ATTRIBUTES_PARAM, self.__class__)) self.has_code = len(self.named_inputs) == 2 and any( [len(self.named_outputs) >= 1, self.contains_results()]) self.left_attributes = parameters.get(self.LEFT_ATTRIBUTES_PARAM) self.right_attributes = parameters.get(self.RIGHT_ATTRIBUTES_PARAM) self.suffixes = parameters.get('aliases', '_l,_r') self.suffixes = [s for s in self.suffixes.split(',')] self.output = self.named_outputs.get( 'output data', 'output_data_{}'.format(self.order))
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) self.title = parameters.get(self.TITLE_PARAM, '') self.has_code = len(self.named_inputs) == 1 self.supports_cache = False self.icon = 'fa-question'
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) self.has_code = len(self.named_inputs) == 1 if self.has_code: if self.ATTRIBUTE_PARAM not in parameters: raise ValueError( _("Parameters '{}' must be informed for task {}").format( 'attributes', self.__class__)) self.output = self.named_outputs.get( 'output data', 'output_data_{}'.format(self.order)) self.model = self.named_outputs.get('model', 'model_{}'.format(self.order)) self.attribute = parameters[self.ATTRIBUTE_PARAM] self.alias = parameters.get(self.ALIAS_PARAM, 'quantiledisc_{}'.format(self.order)) self.n_quantiles = parameters.get(self.N_QUANTILES_PARAM, 1000) or 1000 self.output_distribution = parameters.get( self.DISTRIBUITION_PARAM, self.DISTRIBUITION_PARAM_QUANTIS)\ or self.DISTRIBUITION_PARAM_QUANTIS if int(self.n_quantiles) <= 0: raise ValueError( _("Parameter '{}' must be x>0 for task {}").format( self.N_QUANTILES_PARAM, self.__class__))
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) self.has_code = len(named_inputs) == 1 and any([self.contains_results(), len(named_outputs) > 0]) if self.has_code: if any([self.LAT_PARAM not in parameters, self.LON_PARAM not in parameters, self.DATETIME_PARAM not in parameters]): raise ValueError( _('Parameters {}, {} and {} must be informed for task {}.') .format('Latitude', 'Longitude', 'Datetime', self.__class__)) self.output = self.named_outputs.get( 'output data', 'output_data_{}'.format(self.order)) self.lat_col = parameters.get(self.LAT_PARAM)[0] self.lon_col = parameters.get(self.LON_PARAM)[0] self.datetime_col = parameters.get(self.DATETIME_PARAM)[0] self.alias = parameters.get(self.ALIAS_PARAM, 'cluster') self.min_pts = parameters.get(self.MIN_SAMPLE_PARAM, 15) or 15 self.spatial_thr = parameters.get(self.SPA_THRESHOLD_PARAM, 500) or 500 self.temporal_thr = parameters.get(self.TMP_THRESHOLD_PARAM, 60) or 60 self.min_pts = abs(int(self.min_pts)) self.spatial_thr = abs(float(self.spatial_thr)) self.temporal_thr = abs(int(self.temporal_thr)) self.has_import = "from juicer.scikit_learn.library.stdbscan " \ "import STDBSCAN\n"
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) if len(named_inputs) == 2: self.has_code = True attributes = [self.TARGET_LAT_COLUMN_PARAM, self.TARGET_LON_COLUMN_PARAM, self.POLYGON_POINTS_COLUMN_PARAM] for att in attributes: if att not in parameters: raise ValueError( _("Parameter '{}' must be informed for task {}") .format(att, self.__class__)) self.lat_column = parameters[self.TARGET_LAT_COLUMN_PARAM] self.lon_column = parameters[self.TARGET_LON_COLUMN_PARAM] self.polygon_column = parameters.get( self.POLYGON_POINTS_COLUMN_PARAM, 'points') self.attributes = parameters.get(self.POLYGON_ATTR_COLUMN_PARAM, []) if len(self.attributes) == 0: self.attributes = [] self.alias = parameters.get('alias', '_shp') self.output = self.named_outputs.get( 'output data', 'output_data_{}'.format(self.order)) self.has_import = \ "from juicer.scikit_learn.library.geo_within " \ "import GeoWithinOperation\n"
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) self.has_code = len(self.named_inputs) == 1 if self.has_code: if self.ATTRIBUTE_PARAM not in parameters: raise ValueError( _("Parameters '{}' must be informed for task {}").format( self.ATTRIBUTE_PARAM, self.__class__)) self.output = self.named_outputs.get( 'output data', 'output_data_{}'.format(self.order)) self.model = named_outputs.get('transformation model', 'model_{}'.format(self.order)) if self.ATTRIBUTE_PARAM not in self.parameters: msg = _("Parameters '{}' must be informed for task {}") raise ValueError( msg.format(self.ATTRIBUTE_PARAM, self.__class__.__name__)) self.attribute = parameters[self.ATTRIBUTE_PARAM] self.alias = parameters.get(self.ALIAS_PARAM, 'scaled_{}'.format(self.order)) self.min = parameters.get(self.MIN_PARAM, 0) or 0 self.max = parameters.get(self.MAX_PARAM, 1) or 1 self.has_import = \ "from sklearn.preprocessing import MinMaxScaler\n"
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) if self.MIN_SUPPORT_PARAM not in parameters: raise ValueError(_( 'Support must be informed for classifier {}').format( self.__class__)) self.min_support = float(parameters.get(self.MIN_SUPPORT_PARAM)) if self.min_support < .0001 or self.min_support > 1.0: raise ValueError('Support must be greater or equal ' 'to 0.0001 and smaller than 1.0') self.output = self.named_outputs.get( 'output data', 'freq_items_{}'.format(self.order)) self.rules = self.named_outputs.get( 'rules output', 'rules_{}'.format(self.order)) self.attribute = parameters.get(self.ATTRIBUTE_PARAM) if not self.attribute: raise ValueError( 'Missing parameter {}'.format(self.ATTRIBUTE_PARAM)) self.attribute = self.attribute[0] self.has_code = len(self.named_inputs) == 1 self.confidence = float(parameters.get(self.CONFIDENCE_PARAM, 0.9))
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) self.has_code = True self.output = self.named_outputs.get( 'output data', 'output_data_{}'.format(self.order)) self.number_neighbors = int( parameters.get(self.NUMBER_NEIGHBORS_ATTRIBUTE_PARAM, 20)) self.algorithm = parameters.get(self.ALGORITHM_ATTRIBUTE_PARAM, "auto") self.leaf_size = int(parameters.get(self.LEAF_SIZE_ATTRIBUTE_PARAM, 30)) self.metric = parameters.get(self.METRIC_ATTRIBUTE_PARAM, "minkowski") self.contamination = float( parameters.get(self.CONTAMINATION_ATTRIBUTE_PARAM, 0.22)) self.p = int(parameters.get(self.P_ATTRIBUTE_PARAM, 2)) self.metric_params = parameters.get(self.METRIC_PARAMS_ATTRIBUTE_PARAM, None) self.novelty = int(parameters.get(self.NOVELTY_ATTRIBUTE_PARAM, 0)) self.n_jobs = int(parameters.get(self.N_JOBS_ATTRIBUTE_PARAM, 0)) self.features = parameters['features'] self.outlier = self.parameters.get(self.OUTLIER_PARAM, 'outlier') self.input_treatment() self.has_import = \ """
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) self.output = named_outputs.get('output data', 'out_task_{}'.format(self.order)) self.code = parameters.get(self.CODE_PARAM, None) or None self._out_code = int(parameters.get(self.OUT_CODE_PARAM, 0)) self.task_name = self.parameters.get('task').get('name') if self.CODE_PARAM not in parameters: raise ValueError( gettext('Parameter {} is required').format(self.CODE_PARAM)) self.parents_by_port = parameters.get('my_ports', []) self.python_code_to_remove = self.remove_python_code_parent() self.out_code = False self.treatment() self.import_code = { 'layer': None, 'callbacks': [], 'model': None, 'preprocessing_image': None, 'others': None } self.has_code = not self.out_code self.has_external_python_code_operation = self.out_code
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) if self.ATTRIBUTES_PARAM in parameters: self.attributes = parameters.get(self.ATTRIBUTES_PARAM) else: raise ValueError( _("Parameter '{}' must be informed for task {}").format( self.ATTRIBUTES_PARAM, self.__class__)) self.stop_word_attribute = self.parameters.get( self.STOP_WORD_ATTRIBUTE_PARAM, "") self.stop_word_list = [ s.strip() for s in self.parameters.get(self.STOP_WORD_LIST_PARAM, '').split(',') ] self.alias = parameters.get(self.ALIAS_PARAM, 'tokenized_rm') self.sw_case_sensitive = self.parameters.get( self.STOP_WORD_CASE_SENSITIVE_PARAM, 'False') self.stopwords_input = self.named_inputs.get('stop words', []) self.output = self.named_outputs.get( 'output data', 'output_data_{}'.format(self.order)) self.has_code = 'input data' in self.named_inputs if self.has_code: self.has_import = "from functions.text.remove_stopwords "\ "import RemoveStopWordsOperation\n"
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) for att in ['label_attribute', 'prediction_attribute', 'metric']: if att not in parameters: raise ValueError( _("Parameter '{}' must be informed for task {}").format( att, self.__class__)) self.true_col = self.parameters['label_attribute'][0] self.pred_col = self.parameters['prediction_attribute'][0] self.metric = self.parameters['metric'] self.has_code = len(self.named_inputs) == 2 if not self.has_code: raise ValueError( _("Parameters '{}' and '{}' must be informed for task {}"). format('input data', 'model', self.__class__)) if self.metric in ['rmse', 'mse', 'mae']: self.modeltype = 'RegressionModelEvaluation' self.has_import = "from functions.ml.metrics." \ "RegressionModelEvaluation import *\n" else: self.modeltype = 'ClassificationModelEvaluation' self.has_import = \ "from functions.ml.metrics.ClassificationModelEvaluation" \ " import *\n" self.evaluated_out = \ self.named_outputs.get('evaluated model', 'evaluated_model{}'.format(self.order)) tmp = 'evaluator{}'.format(self.order) self.evaluator = self.named_outputs.get("evaluator", tmp)
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) if self.ATTRIBUTES_PARAM in parameters: self.attributes = parameters.get(self.ATTRIBUTES_PARAM) else: raise ValueError( _("Parameter '{}' must be informed for task {}").format( self.ATTRIBUTES_PARAM, self.__class__)) self.handle_invalid = self.parameters.get(self.HANDLE_INVALID_PARAM) if self.handle_invalid is not None: if self.handle_invalid not in ['skip', 'keep', 'error']: raise ValueError( _('Parameter {} must be one of these: {}').format( _('type'), ','.join([_('keep'), _('skip'), _('error')]) ) ) self.aliases = self._get_aliases( self.attributes, parameters.get(self.ALIASES_PARAM, '').split(','), 'bucketed') self.splits = self._get_splits(parameters) self.model = self.named_outputs.get( 'model', 'model_task_{}'.format(self.order)) self.output = self.named_outputs.get('output data', 'out_task_{}'.format(self.order)) self.has_code = [len(self.named_inputs) > 0, self.contains_results()]
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) self.has_code = any( [len(self.named_outputs) > 0, self.contains_results()]) if self.has_code: if self.DATA_SOURCE_ID_PARAM in parameters: self._set_data_source_parameters(parameters) else: raise ValueError( _("Parameter '{}' must be informed for task {}").format( self.DATA_SOURCE_ID_PARAM, self.__class__)) # Test if data source was changed since last execution and # invalidate cache if so. self._set_data_source_parameters(parameters) data_source_updated = self.metadata.get('updated') if data_source_updated: data_source_updated = datetime.datetime.strptime( data_source_updated[0:19], '%Y-%m-%dT%H:%M:%S') self.supports_cache = ( parameters.get('execution_date') is not None and data_source_updated < parameters['execution_date']) self.output = named_outputs.get('output data', 'out_task_{}'.format(self.order))
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) self.has_code = all([ len(named_inputs) == 1, self.contains_results() or len(named_outputs) > 0 ]) if self.has_code: if self.MIN_SUPPORT_PARAM not in parameters: raise ValueError( _("Parameter '{}' must be informed for task {}").format( self.MIN_SUPPORT_PARAM, self.__class__)) self.column = parameters.get(self.ATTRIBUTE_PARAM, [''])[0] self.confidence = float(parameters.get(self.CONFIDENCE_PARAM, 0.9)) self.min_support = float(parameters.get(self.MIN_SUPPORT_PARAM)) if self.min_support < .0001 or self.min_support > 1.0: raise ValueError('Support must be greater or equal ' 'to 0.0001 and smaller than 1.0') self.output = named_outputs.get( 'output data', 'output_data_{}'.format(self.order)) self.rules_output = named_outputs.get( 'rules output', 'rules_{}'.format(self.order)) self.has_import = "import pyfpgrowth\n"
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) if not 'code_0' in self.parameters and 'code_1' in self.parameters: raise ValueError( _("Parameter {} and {} must be informed for task {}").format( 'code_0', 'code_1', self.__class__)) self.order = self.parameters['task']['order'] self.code_0 = self.parameters.get('code_0', '') self.code_1 = self.parameters.get('code_1', '') self.has_code = True self.has_code_otm = True self.number_tasks = self.parameters.get('number_tasks', 0) self.fist_id = self.parameters.get('fist_id', 0) if 'output data' in self.named_outputs: self.output = self.named_outputs['output data'] elif 'output projected data' in self.named_outputs: self.output = self.named_outputs['output projected data'] else: self.output = 'output_data_{}'.format(self.order) self.has_import = "from pycompss.api.task import task\n" \ "from pycompss.api.parameter import *\n"
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) self.has_code = all([ len(named_inputs) == 1, self.contains_results() or len(named_outputs) > 0 ]) if self.has_code: self.output = named_outputs.get( 'output data', 'output_data_{}'.format(self.order)) self.confidence = float(parameters.get(self.CONFIDENCE_PARAM, 0.5)) if self.confidence < .0001 or self.confidence > 1.0: raise ValueError('Confidence must be greater or equal ' 'to 0.0001 and smaller than 1.0') self.has_import = \ "from juicer.scikit_learn.library.rules_generator " \ "import RulesGenerator\n" self.support_col = \ parameters.get(self.SUPPORT_ATTR_PARAM, [self.SUPPORT_ATTR_PARAM_VALUE])[0] self.items_col = parameters.get(self.ITEMSET_ATTR_PARAM, [''])[0] self.max_rules = parameters.get(self.MAX_COUNT_PARAM, -1) or -1
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) self.has_code = len(named_inputs) == 1 and any([self.contains_results(), len(named_outputs) > 0]) if self.has_code: for att in [self.SRC_PROJ_PARAM, self.DST_PROJ_PARAM, self.LAT_PARAM, self.LON_PARAM]: if att not in self.parameters: raise ValueError( _('Parameters {} must be informed for task {}.') .format(att, self.__class__)) self.output = self.named_outputs.get( 'output data', 'output_data_{}'.format(self.order)) self.lat_col = parameters.get(self.LAT_PARAM)[0] self.lon_col = parameters.get(self.LON_PARAM)[0] self.lat_alias = parameters.get(self.LAT_ALIAS_PARAM, self.lat_col) self.lon_alias = parameters.get(self.LON_ALIAS_PARAM, self.lon_col) self.src_prj = parameters.get(self.SRC_PROJ_PARAM) self.dst_prj = parameters.get(self.DST_PROJ_PARAM)
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) self.has_code = all([ len(named_inputs) == 1, self.contains_results() or len(named_outputs) > 0 ]) if self.has_code: if self.MIN_SUPPORT_PARAM not in parameters: raise ValueError( _("Parameter '{}' must be informed for task {}").format( self.MIN_SUPPORT_PARAM, self.__class__)) self.column = parameters.get(self.ATTRIBUTE_PARAM, [''])[0] self.output = self.named_outputs.get( 'output data', 'output_data_{}'.format(self.order)) self.min_support = float(parameters.get(self.MIN_SUPPORT_PARAM)) if self.min_support < .0001 or self.min_support > 1.0: raise ValueError('Support must be greater or equal ' 'to 0.0001 and smaller than 1.0') self.max_length = abs( int(parameters.get(self.MAX_LENGTH_PARAM, 10))) or 10 self.has_import = \ "from juicer.scikit_learn.library." \ "prefix_span import PrefixSpan\n"
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) if 'label' not in parameters and 'features' not in parameters: raise ValueError( _("Parameters '{}' and '{}' must be informed for task {}"). format('label', 'features', self.__class__)) self.label = parameters['label'][0] self.features = parameters['features'][0] self.predCol = parameters.get('prediction', 'prediction') self.has_code = len(self.named_inputs) == 2 if not self.has_code: raise ValueError( _("Parameters '{}' and '{}' must be informed for task {}"). format('train input data', 'algorithm', self.__class__)) self.model = self.named_outputs.get('model', 'model_tmp{}'.format(self.order)) self.perform_transformation = 'output data' in self.named_outputs if not self.perform_transformation: self.output = 'task_{}'.format(self.order) else: self.output = self.named_outputs['output data'] self.prediction = self.parameters.get('prediction', 'prediction')
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) if self.ATTRIBUTES_PARAM in parameters: self.attributes = parameters.get(self.ATTRIBUTES_PARAM) else: raise ValueError( _("Parameter '{}' must be informed for task {}").format( self.ATTRIBUTES_PARAM, self.__class__)) self.type = self.parameters.get(self.TYPE_PARAM, self.TYPE_SIMPLE) if self.type not in [self.TYPE_REGEX, self.TYPE_SIMPLE]: raise ValueError( _('Invalid type for operation Tokenizer: {}').format( self.type)) self.alias = [ alias.strip() for alias in parameters.get(self.ALIAS_PARAM, '').split(',') ] # Adjust alias in order to have the same number of aliases as attributes # by filling missing alias with attribute name suffixed by tokenized. self.alias = [ x[1] or '{}_tokenized'.format(x[0]) for x in zip_longest( self.attributes, self.alias[:len(self.attributes)]) ] self.expression_param = parameters.get(self.EXPRESSION_PARAM, r'\s+') self.min_token_lenght = parameters.get(self.MINIMUM_SIZE, 3) self.has_code = any( [len(self.named_inputs) > 0, self.contains_results()]) self.output = self.named_outputs.get('output data', 'out_{}'.format(self.order))
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) self.has_code = len(named_inputs) > 0 and any( [len(self.named_outputs) > 0, self.contains_results()]) if self.has_code: self.with_mean = parameters.get(self.WITH_MEAN_PARAM, False) in ['1', 1, True] self.with_std = parameters.get(self.WITH_STD_PARAM, True) in ['1', 1, True] self.output = self.named_outputs.get( 'output data', 'output_data_{}'.format(self.order)) self.model = named_outputs.get('transformation model', 'model_{}'.format(self.order)) if self.ATTRIBUTE_PARAM not in self.parameters: msg = _("Parameters '{}' must be informed for task {}") raise ValueError( msg.format(self.ATTRIBUTE_PARAM, self.__class__.__name__)) self.attribute = parameters[self.ATTRIBUTE_PARAM] self.alias = parameters.get(self.ALIAS_PARAM, 'scaled_{}'.format(self.order)) self.has_import = \ "from sklearn.preprocessing import StandardScaler\n"
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) if self.N_PARAM in parameters: self.attributes = parameters.get(self.ATTRIBUTES_PARAM) else: raise ValueError( _("Parameter '{}' must be informed for task {}").format( self.N_PARAM, self.__class__)) if self.ATTRIBUTES_PARAM in parameters: self.attributes = parameters.get(self.ATTRIBUTES_PARAM) else: raise ValueError( _("Parameter '{}' must be informed for task {}").format( self.ATTRIBUTES_PARAM, self.__class__)) self.n = int(self.parameters.get(self.N_PARAM, 2)) self.alias = [ alias.strip() for alias in parameters.get(self.ALIAS_PARAM, '').split(',') ] # Adjust alias in order to have the same number of aliases as attributes # by filling missing alias with the attribute name sufixed by _ngram. self.alias = [ x[1] or '{}_ngram'.format(x[0]) for x in zip_longest( self.attributes, self.alias[:len(self.attributes)]) ] self.has_code = any( [len(self.named_inputs) > 0, self.contains_results()]) self.output = self.named_outputs.get('output data', 'out_{}'.format(self.order))
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) self.has_code = True self.number_neighbors = int( parameters.get(self.NUMBER_NEIGHBORS_ATTRIBUTE_PARAM, 5)) self.n_estimators = int( parameters.get(self.N_ESTIMATORS_ATTRIBUTE_PARAM, 10)) self.min_hash_match = int( parameters.get(self.MIN_HASH_MATCH_ATTRIBUTE_PARAM, 4)) self.n_candidates = int(parameters.get(self.N_CANDIDATES, 10)) self.random_state = int( parameters.get(self.RANDOM_STATE_ATTRIBUTE_PARAM, 0)) self.radius = float(parameters.get(self.RADIUS_ATTRIBUTE_PARAM, 1.0)) self.radius_cutoff_ratio = float( parameters.get(self.RADIUS_CUTOFF_RATIO_ATTRIBUTE_PARAM, 0.9)) if not all([self.LABEL_PARAM in parameters]): msg = _("Parameters '{}' must be informed for task {}") raise ValueError( msg.format(self.LABEL_PARAM, self.__class__.__name__)) self.label = parameters[self.LABEL_PARAM] self.model = self.named_outputs.get('model', 'model_{}'.format(self.order)) self.output = self.named_outputs.get('output data', 'out_task_{}'.format(self.order)) self.input_treatment() self.has_import = \ """
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) self.output = named_outputs.get('output data', 'out_task_{}'.format(self.order)) self._data_format = parameters.get(self.DATA_FORMAT_PARAM, None) self._kwargs = parameters.get(self.KWARG_PARAM, None) self._advanced_options = parameters.get(self.ADVANCED_OPTIONS_PARAM, 0) self.data_format = None self.kwargs = None self.advanced_options = None self.task_name = self.parameters.get('task').get('name') self.parent = "" self.var_name = "" self.has_code = True self.add_functions_required = "" self.parents_by_port = parameters.get('my_ports', []) self.python_code_to_remove = self.remove_python_code_parent() self.treatment() self.import_code = { 'layer': 'GlobalMaxPooling3D', 'callbacks': [], 'model': None, 'preprocessing_image': None, 'others': None }
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) self.has_code = len(self.named_inputs) >= 1 and any( [len(self.named_outputs) >= 1, self.contains_results()]) if not all([self.QUERY_PARAM in parameters]): msg = _("Required parameter {} must be informed for task {}") raise ValueError(msg.format(self.QUERY_PARAM, self.__class__)) self.query = ExecuteSQLOperation._escape_string( parameters.get(self.QUERY_PARAM).strip().replace('\n', ' ')) if self.query[:6].upper() != 'SELECT': raise ValueError(_('Invalid query. Only SELECT is allowed.')) if self.NAMES_PARAM in parameters: self.names = [ n.strip() for n in parameters.get(self.NAMES_PARAM).split(',') if n.strip() ] else: self.names = None self.input1 = self.named_inputs.get('input data 1') self.input2 = self.named_inputs.get('input data 2') self.output = self.named_outputs.get('output data', 'out_{}'.format(self.order)) self.has_import = 'from pandasql import sqldf\n'
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) self.name = parameters.get(self.NAME_PARAM) if self.name is None or not self.name.strip(): raise ValueError(_('You must specify a name for new data source.')) self.format = parameters.get(self.FORMAT_PARAM, '') or '' valid_formats = (self.FORMAT_PARQUET, self.FORMAT_CSV, self.FORMAT_JSON) if not self.format.strip() or self.format not in valid_formats: raise ValueError(_('You must specify a valid format.')) self.url = parameters.get(self.PATH_PARAM) self.storage_id = parameters.get(self.STORAGE_ID_PARAM) if not self.storage_id: raise ValueError(_('You must specify a storage for saving data.')) self.tags = parameters.get(self.TAGS_PARAM, []) self.path = parameters.get(self.PATH_PARAM) if self.path is None or not self.path.strip(): raise ValueError(_('You must specify a path for saving data.')) self.workflow_json = parameters.get(self.WORKFLOW_JSON_PARAM, '') self.mode = parameters.get(self.OVERWRITE_MODE_PARAM, self.MODE_ERROR) self.header = parameters.get(self.HEADER_PARAM, True) in (1, '1', True) self.user = parameters.get(self.USER_PARAM) self.workflow_id = parameters.get(self.WORKFLOW_ID_PARAM) self.has_code = len(self.named_inputs) == 1
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) self.type = self.parameters.get('type', self.TYPE_PERCENT) self.value = int(self.parameters.get('value', -1)) self.fraction = float(self.parameters.get('fraction', -50)) / 100 if self.value < 0 and self.type != self.TYPE_PERCENT: raise ValueError( _("Parameter 'value' must be [x>=0] if is using " "the current type of sampling in task {}.").format( self.__class__)) if self.type == self.TYPE_PERCENT and any( [self.fraction > 1.0, self.fraction < 0]): raise ValueError( _("Parameter 'fraction' must be 0<=x<=1 if is using " "the current type of sampling in task {}.").format( self.__class__)) self.seed = self.parameters.get(self.SEED, 'None') if type(self.seed) == int: self.seed = 0 if self.seed >= 4294967296 or self.seed < 0 else self.seed self.output = self.named_outputs.get( 'sampled data', 'output_data_{}'.format(self.order)) self.has_code = len(self.named_inputs) == 1 and any( [len(self.named_outputs) >= 1, self.contains_results()])
def __init__(self, parameters, named_inputs, named_outputs): Operation.__init__(self, parameters, named_inputs, named_outputs) self.has_code = 'input data' in self.named_inputs \ and any([self.contains_results(), len(named_outputs) > 0]) if self.has_code: self.output = self.named_outputs.get( 'output data', 'output_data_{}'.format(self.order)) if self.ATTRIBUTES_PARAM in parameters: self.attributes = parameters.get(self.ATTRIBUTES_PARAM)[0] else: raise ValueError( _("Parameter '{}' must be informed for task {}").format( self.ATTRIBUTES_PARAM, self.__class__)) self.sw_case_sensitive = self.parameters.get( self.STOP_WORD_CASE_SENSITIVE_PARAM, False) self.stop_word_list = [ s.strip() for s in self.parameters.get( self.STOP_WORD_LIST_PARAM, '').split(',') ] self.alias = parameters.get(self.ALIAS_PARAM, 'tokenized_rm') self.stopwords_input = self.named_inputs.get('stop words', None) self.stop_word_attribute = self.parameters.get( self.STOP_WORD_ATTRIBUTE_PARAM, [''])[0] self.lang = self.parameters.get(self.LANG_PARAM, '') or '' self.has_import = "import nltk\n" \ "nltk.download('stopwords')\n" \ "from nltk.corpus import stopwords\n"