def get_fusion_resource(self, fusion): """Extracts the fusion resource info. The fusion argument can be - a path to a local file - an fusion id """ # the string can be a path to a JSON file if isinstance(fusion, basestring): try: with open(fusion) as fusion_file: fusion = json.load(fusion_file) self.resource_id = get_fusion_id(fusion) if self.resource_id is None: raise ValueError("The JSON file does not seem" " to contain a valid BigML fusion" " representation.") except IOError: # if it is not a path, it can be an fusion id self.resource_id = get_fusion_id(fusion) if self.resource_id is None: if fusion.find('fusion/') > -1: raise Exception( self.api.error_message(fusion, resource_type='fusion', method='get')) else: raise IOError("Failed to open the expected JSON file" " at %s" % fusion) except ValueError: raise ValueError("Failed to interpret %s." " JSON file expected.") if not isinstance(fusion, dict): fusion = retrieve_resource(self.api, self.resource_id, no_check_fields=False) return fusion
def __init__(self, fusion, api=None, max_models=None, cache_get=None): if use_cache(cache_get): # using a cache to store the model attributes self.__dict__ = load(get_fusion_id(fusion), cache_get) self.api = get_api_connection(api) return self.resource_id = None self.models_ids = None self.objective_id = None self.distribution = None self.models_splits = [] self.cache_get = None self.regression = False self.fields = None self.class_names = None self.importance = {} self.api = get_api_connection(api) self.resource_id, fusion = get_resource_dict( \ fusion, "fusion", api=self.api) if 'object' in fusion: fusion = fusion.get('object', {}) self.model_ids, self.weights = get_models_weight( \ fusion['models']) model_types = [get_resource_type(model) for model in self.model_ids] for model_type in model_types: if model_type not in LOCAL_SUPERVISED: raise ValueError("The resource %s has not an allowed" " supervised model type." % model_type) self.importance = fusion.get('importance', []) self.missing_numerics = fusion.get('missing_numerics', True) if fusion.get('fusion'): self.fields = fusion.get( \ 'fusion', {}).get("fields") self.objective_id = fusion.get("objective_field") self.input_fields = fusion.get("input_fields") number_of_models = len(self.model_ids) # Downloading the model information to cache it if self.api.storage is not None or cache_get is not None: for model_id in self.model_ids: if get_resource_type(model_id) == "fusion": Fusion(model_id, api=self.api, cache_get=cache_get) else: SupervisedModel(model_id, api=self.api, cache_get=cache_get) if max_models is None: self.models_splits = [self.model_ids] else: self.models_splits = [ self.model_ids[index:(index + max_models)] for index in range(0, number_of_models, max_models) ] if self.fields: add_distribution(self) summary = self.fields[self.objective_id]['summary'] if 'bins' in summary: distribution = summary['bins'] elif 'counts' in summary: distribution = summary['counts'] elif 'categories' in summary: distribution = summary['categories'] else: distribution = [] self.distribution = distribution self.regression = \ self.fields[self.objective_id].get('optype') == NUMERIC if not self.regression: objective_field = self.fields[self.objective_id] categories = objective_field['summary']['categories'] classes = [category[0] for category in categories] self.class_names = sorted(classes) self.objective_categories = [category for \ category, _ in self.fields[self.objective_id][ \ "summary"]["categories"]] ModelFields.__init__( \ self, self.fields, objective_id=self.objective_id)
def __init__(self, fusion, api=None, max_models=None): if api is None: self.api = BigML(storage=STORAGE) else: self.api = api self.resource_id = None self.models_ids = None self.objective_id = None self.distribution = None self.models_splits = [] self.cache_get = None self.regression = False self.fields = None self.class_names = None self.importance = {} fusion = self.get_fusion_resource(fusion) self.resource_id = get_fusion_id(fusion) if 'object' in fusion: fusion = fusion.get('object', {}) models = fusion['models'] self.model_ids = models model_types = [get_resource_type(model) for model in models] for model_type in model_types: if model_type not in LOCAL_SUPERVISED: raise ValueError("The resource %s has not an allowed" " supervised model type.") self.importance = fusion.get('importance', []) self.model_ids = models if fusion.get('fusion'): self.fields = fusion.get( \ 'fusion', {}).get("fields") self.objective_id = fusion.get("objective_field") number_of_models = len(models) if max_models is None: self.models_splits = [models] else: self.models_splits = [ models[index:(index + max_models)] for index in range(0, number_of_models, max_models) ] if self.fields: summary = self.fields[self.objective_id]['summary'] if 'bins' in summary: distribution = summary['bins'] elif 'counts' in summary: distribution = summary['counts'] elif 'categories' in summary: distribution = summary['categories'] else: distribution = [] self.distribution = distribution self.regression = \ self.fields[self.objective_id].get('optype') == 'numeric' if not self.regression: objective_field = self.fields[self.objective_id] categories = objective_field['summary']['categories'] classes = [category[0] for category in categories] self.class_names = sorted(classes) self.objective_categories = [category for \ category, _ in self.fields[self.objective_id][ \ "summary"]["categories"]] ModelFields.__init__( \ self, self.fields, objective_id=self.objective_id)