Example #1
0
 def get_fusion_resource(self, fusion):
     """Extracts the fusion resource info. The fusion argument can be
        - a path to a local file
        - an fusion id
     """
     # the string can be a path to a JSON file
     if isinstance(fusion, basestring):
         try:
             with open(fusion) as fusion_file:
                 fusion = json.load(fusion_file)
                 self.resource_id = get_fusion_id(fusion)
                 if self.resource_id is None:
                     raise ValueError("The JSON file does not seem"
                                      " to contain a valid BigML fusion"
                                      " representation.")
         except IOError:
             # if it is not a path, it can be an fusion id
             self.resource_id = get_fusion_id(fusion)
             if self.resource_id is None:
                 if fusion.find('fusion/') > -1:
                     raise Exception(
                         self.api.error_message(fusion,
                                                resource_type='fusion',
                                                method='get'))
                 else:
                     raise IOError("Failed to open the expected JSON file"
                                   " at %s" % fusion)
         except ValueError:
             raise ValueError("Failed to interpret %s."
                              " JSON file expected.")
     if not isinstance(fusion, dict):
         fusion = retrieve_resource(self.api, self.resource_id,
                                    no_check_fields=False)
     return fusion
Example #2
0
    def __init__(self, fusion, api=None, max_models=None, cache_get=None):

        if use_cache(cache_get):
            # using a cache to store the model attributes
            self.__dict__ = load(get_fusion_id(fusion), cache_get)
            self.api = get_api_connection(api)
            return

        self.resource_id = None
        self.models_ids = None
        self.objective_id = None
        self.distribution = None
        self.models_splits = []
        self.cache_get = None
        self.regression = False
        self.fields = None
        self.class_names = None
        self.importance = {}
        self.api = get_api_connection(api)

        self.resource_id, fusion = get_resource_dict( \
            fusion, "fusion", api=self.api)

        if 'object' in fusion:
            fusion = fusion.get('object', {})
        self.model_ids, self.weights = get_models_weight( \
            fusion['models'])
        model_types = [get_resource_type(model) for model in self.model_ids]

        for model_type in model_types:
            if model_type not in LOCAL_SUPERVISED:
                raise ValueError("The resource %s has not an allowed"
                                 " supervised model type." % model_type)
        self.importance = fusion.get('importance', [])
        self.missing_numerics = fusion.get('missing_numerics', True)
        if fusion.get('fusion'):
            self.fields = fusion.get( \
                'fusion', {}).get("fields")
            self.objective_id = fusion.get("objective_field")
        self.input_fields = fusion.get("input_fields")

        number_of_models = len(self.model_ids)

        # Downloading the model information to cache it
        if self.api.storage is not None or cache_get is not None:
            for model_id in self.model_ids:
                if get_resource_type(model_id) == "fusion":
                    Fusion(model_id, api=self.api, cache_get=cache_get)
                else:
                    SupervisedModel(model_id,
                                    api=self.api,
                                    cache_get=cache_get)

        if max_models is None:
            self.models_splits = [self.model_ids]
        else:
            self.models_splits = [
                self.model_ids[index:(index + max_models)]
                for index in range(0, number_of_models, max_models)
            ]

        if self.fields:
            add_distribution(self)
            summary = self.fields[self.objective_id]['summary']
            if 'bins' in summary:
                distribution = summary['bins']
            elif 'counts' in summary:
                distribution = summary['counts']
            elif 'categories' in summary:
                distribution = summary['categories']
            else:
                distribution = []
            self.distribution = distribution

        self.regression = \
            self.fields[self.objective_id].get('optype') == NUMERIC

        if not self.regression:
            objective_field = self.fields[self.objective_id]
            categories = objective_field['summary']['categories']
            classes = [category[0] for category in categories]
            self.class_names = sorted(classes)
            self.objective_categories = [category for \
                category, _ in self.fields[self.objective_id][ \
               "summary"]["categories"]]

        ModelFields.__init__( \
            self, self.fields,
            objective_id=self.objective_id)
Example #3
0
    def __init__(self, fusion, api=None, max_models=None):

        if api is None:
            self.api = BigML(storage=STORAGE)
        else:
            self.api = api
        self.resource_id = None
        self.models_ids = None
        self.objective_id = None
        self.distribution = None
        self.models_splits = []
        self.cache_get = None
        self.regression = False
        self.fields = None
        self.class_names = None
        self.importance = {}
        fusion = self.get_fusion_resource(fusion)
        self.resource_id = get_fusion_id(fusion)

        if 'object' in fusion:
            fusion = fusion.get('object', {})
        models = fusion['models']
        self.model_ids = models
        model_types = [get_resource_type(model) for model in models]

        for model_type in model_types:
            if model_type not in LOCAL_SUPERVISED:
                raise ValueError("The resource %s has not an allowed"
                                 " supervised model type.")
        self.importance = fusion.get('importance', [])
        self.model_ids = models
        if fusion.get('fusion'):
            self.fields = fusion.get( \
                'fusion', {}).get("fields")
            self.objective_id = fusion.get("objective_field")

        number_of_models = len(models)
        if max_models is None:
            self.models_splits = [models]
        else:
            self.models_splits = [
                models[index:(index + max_models)]
                for index in range(0, number_of_models, max_models)
            ]

        if self.fields:
            summary = self.fields[self.objective_id]['summary']
            if 'bins' in summary:
                distribution = summary['bins']
            elif 'counts' in summary:
                distribution = summary['counts']
            elif 'categories' in summary:
                distribution = summary['categories']
            else:
                distribution = []
            self.distribution = distribution

        self.regression = \
            self.fields[self.objective_id].get('optype') == 'numeric'

        if not self.regression:
            objective_field = self.fields[self.objective_id]
            categories = objective_field['summary']['categories']
            classes = [category[0] for category in categories]
            self.class_names = sorted(classes)
            self.objective_categories = [category for \
                category, _ in self.fields[self.objective_id][ \
               "summary"]["categories"]]

        ModelFields.__init__( \
            self, self.fields,
            objective_id=self.objective_id)