Ejemplo n.º 1
0
    def __parse_param_callables(self, params, obj_dict):
        '''
		Recursive function to parse parameters for callable
		function so that they are writte correctly in the
		final Python file. It documents any function objects 
		and their string format. It then replaces their format
		after the entire configuration is converted to string format.
		This ensures that the functions are represented as functions in
		the final file.

		Args:
			params:						Sub-dict of parameters, including numbers, strings, and callables
			obj_dict:					Reference object dictionary with string and correct object formats

		'''
        if isinstance(params, dict):

            for k, v in params.items():
                if is_callable(k):
                    obj_dict[str(k)] = k
                if is_callable(v):
                    obj_dict[str(v)] = v

                if isinstance(v, dict):
                    self.__parse_param_callables(v, obj_dict)
Ejemplo n.º 2
0
	def validate_config(self):
		'''
		Validates configuration to ensure that it is of an 
		acceptable structure. The configuration MUST be
		represented in a python dictionary. Each operation 
		must also be either None, or a sub-dictionary.

		'''

		if self.config == {} or None:
			raise AttributeError("The DagLayer you provided is empty. Please check your inputs.")

		#Must be a dictionary
		if not isinstance(self.config, dict):
			raise AttributeError("Layer configuration object must be a dictionary. Please check your inputs.")

		#For each key, the value must be None or a dictionary
		for key in self.config:

			#Check information for operator families
			if not isinstance(key, str) and not isinstance(key, tuple):
				raise AttributeError('''All keys of a DagLayer configuration must be strings or tuples.
You must tag every operation or operator family that you intend to use.
\nPlease examine key: {} with value {}'''\
				.format(key, self.config[key]))
			if (self.config[key] is not None and
				not isinstance(self.config[key], dict)):
					raise AttributeError('''\nValues in layer configuration key-value pairs must be one of the following:
- None: No additional arguments, a pass-through
- Dict: Argument dictionary\n\nPlease check inputs for values associated with key =  {}.
A value of type {} was recieved instead of a dictionary'''.format(key, type(self.config[key])))

			if self.config[key] is not None:
				#Check information for operators and their parameter sets
				for op_key in self.config[key]:
					if (self.config[key][op_key] is not None and
						not isinstance(self.config[key][op_key], dict)):
							raise AttributeError('''Parameters for your operators must be represented in a dictionary format.
None can be also provided if there are no parameters.
\nPlease check your inputs for operator family {} with operator {} for further insight.
The parameter was of type {} instead of a dictionary'''.format(key, op_key, type(self.config[key][op_key])))

					if not is_callable(op_key):
							raise AttributeError("""All keys in operator family dictionary must be callables (function, object, class, etc.).
\nAn operator key within the {} operator_family with value(s) = {} is not a callable.
The value was found to be: {}"""\
.format(key, self.config[key][op_key], op_key))
Ejemplo n.º 3
0
	def __register_model(self, family, model):
		'''
		EXPERIMENTAL: May be a good way to 
		generate all of the tasks for evaluation of 
		Machine Learning models

		Args:
			family:					Model family
			model:					Model object
	
		Returns:
			model:					Model object, unchanged
		'''

		#Set model to false until it has been evaluated
		if is_callable(model):
			self.dag.models[family] = False 

		#Return model object, unchanged
		return model
Ejemplo n.º 4
0
	def __route(self, parent = 'init', 
						family = 'init', 
						family_upstream_task = 'init',
						op = 'init', 
						params = 'init',
						inherits = False,
						conditional_mapping = None,
						split = None):
		
		#Holistic or custom operators may come in as strings
		op_name = op.__name__ if is_callable(op) else op

		#Operator router
		#TODO: Find a better place to put this
		self.op_router = \
			{'splitting': 
							{'operator': split_operation, 
							'args': {'func': op,
									'params': params},
							 'task_tag': [family, op_name]},
             'data_sources': 
             				{'operator':read_data_operation, 
             				'args': {'func': op, 
             						'params': params,
             						'filepath': family},
             				'task_tag': [family, 
             							op_name]},
             'preprocessing': 
             				{'operator':bulk_data_operation, 
             				'args': {'func': op,
             						'split': split,
             						'params': params},
             				'task_tag':[family, split, op_name]},
             'evaluation': 
             				{'operator':evaluation_operation, 
             				'args': {'func': op,
             						 'params': params,
             						 #Figure out model id generation for eval tasks
             						 'model_id': conditional_mapping},
             				'holistic': {
             							"post":
             								{'merge_metrics': #Parent
             								{'merge_metrics': {}}}},
             				'task_tag': [conditional_mapping, family]},
             # Will wait to do any EDA design patterns
             # 'eda': 
             # 				{'operator':bulk_data_operation, 
             # 				'args': {'func'},
             # 				'task_tag':[]},

             'modeling': 
             				{'operator':[('fit',fit_operation), 
             							 ('predict',predict_operation)], 

             				#Registers model for evaluation functions later
             				'args': {'model': op, 
             						'params': params},
             				'holistic': {'pre':
             								{'model_data_split':
             									{'model_data_split': {}}}},
             				'arg_xcom_update': ['model'],
             				'task_tag':[family]},

             'feature_engineering': 
             				#Airflows op_converter needs to be determined
             				{'operator': col_data_operation, 
             				'args': {'func': op,
             						 'params': params,
             						 'split': split,
             						 'inherits': inherits,
             						 'column_data_id': family_upstream_task},
             				'holistic': {"post":
             								{'merge_layer': #Parent
             								{'merge_cols': {}}}},
             				'task_tag': [family, split, op_name]},

             #HOLISTIC LAYER OPERATIONS START HERE
             'merge_layer': 
             				{'operator': merge_data_operation, 
             				'args': {'params': params,
             				'merge_ids': self.__get_merge_ids('head',parent, conditional_mapping, split),
             				'pass_through_cols': self.__get_merge_ids('pass_through_cols',
             															parent, conditional_mapping, split),
             				'split': split},
             				'task_tag': [self.tag, split, 'merge_layer']},

             'merge_metrics': 
             				{'operator': merge_metrics_operation, 
             				'args': {'params': params,
             				'merge_ids': self.__get_merge_ids('head', parent, conditional_mapping, split),
             				'model': conditional_mapping},
             				'task_tag': [self.tag, split, conditional_mapping, 'merge_metrics']},

             'model_data_split': 
             				{'operator': model_split_operation, 
             				'args': {'params': params},
             				'task_tag': ['model_data_split']}
		}