def __init__(self, cluster, api=None): if not (isinstance(cluster, dict) and 'resource' in cluster and cluster['resource'] is not None): if api is None: api = BigML(storage=STORAGE) self.resource_id = get_cluster_id(cluster) if self.resource_id is None: raise Exception( api.error_message(cluster, resource_type='cluster', method='get')) query_string = ONLY_MODEL cluster = retrieve_resource(api, self.resource_id, query_string=query_string) if 'object' in cluster and isinstance(cluster['object'], dict): cluster = cluster['object'] if 'clusters' in cluster and isinstance(cluster['clusters'], dict): status = get_status(cluster) if 'code' in status and status['code'] == FINISHED: clusters = cluster['clusters']['clusters'] self.centroids = [Centroid(centroid) for centroid in clusters] self.scales = {} self.scales.update(cluster['scales']) self.term_forms = {} self.tag_clouds = {} self.term_analysis = {} fields = cluster['clusters']['fields'] for field_id, field in fields.items(): if field['optype'] == 'text': self.term_forms[field_id] = {} self.term_forms[field_id].update( field['summary']['term_forms']) self.tag_clouds[field_id] = {} self.tag_clouds[field_id].update( field['summary']['tag_cloud']) self.term_analysis[field_id] = {} self.term_analysis[field_id].update( field['term_analysis']) ModelFields.__init__(self, fields) if not all( [field_id in self.fields for field_id in self.scales]): raise Exception("Some fields are missing" " to generate a local cluster." " Please, provide a cluster with" " the complete list of fields.") else: raise Exception("The cluster isn't finished yet") else: raise Exception("Cannot create the Cluster instance. Could not" " find the 'clusters' key in the resource:\n\n%s" % cluster)
def __init__(self, cluster, api=None): if not (isinstance(cluster, dict) and 'resource' in cluster and cluster['resource'] is not None): if api is None: api = BigML(storage=STORAGE) self.resource_id = get_cluster_id(cluster) if self.resource_id is None: raise Exception(api.error_message(cluster, resource_type='cluster', method='get')) query_string = ONLY_MODEL cluster = retrieve_resource(api, self.resource_id, query_string=query_string) if 'object' in cluster and isinstance(cluster['object'], dict): cluster = cluster['object'] if 'clusters' in cluster and isinstance(cluster['clusters'], dict): status = get_status(cluster) if 'code' in status and status['code'] == FINISHED: clusters = cluster['clusters']['clusters'] self.centroids = [Centroid(centroid) for centroid in clusters] self.scales = {} self.scales.update(cluster['scales']) self.term_forms = {} self.tag_clouds = {} self.term_analysis = {} fields = cluster['clusters']['fields'] for field_id, field in fields.items(): if field['optype'] == 'text': self.term_forms[field_id] = {} self.term_forms[field_id].update(field[ 'summary']['term_forms']) self.tag_clouds[field_id] = {} self.tag_clouds[field_id].update(field[ 'summary']['tag_cloud']) self.term_analysis[field_id] = {} self.term_analysis[field_id].update( field['term_analysis']) ModelFields.__init__(self, fields) if not all([field_id in self.fields for field_id in self.scales]): raise Exception("Some fields are missing" " to generate a local cluster." " Please, provide a cluster with" " the complete list of fields.") else: raise Exception("The cluster isn't finished yet") else: raise Exception("Cannot create the Cluster instance. Could not" " find the 'clusters' key in the resource:\n\n%s" % cluster)
def __init__(self, cluster, api=None, cache_get=None): self.api = get_api_connection(api) self.centroids = None if use_cache(cache_get): # using a cache to store the cluster attributes self.__dict__ = load(get_cluster_id(cluster), cache_get) for index, centroid in enumerate(self.centroids): self.centroids[index] = Centroid(centroid) self.cluster_global = Centroid(self.cluster_global) return self.resource_id = None self.cluster_global = None self.total_ss = None self.within_ss = None self.between_ss = None self.ratio_ss = None self.critical_value = None self.input_fields = [] self.summary_fields = [] self.default_numeric_value = None self.k = None self.summary_fields = [] self.scales = {} self.term_forms = {} self.tag_clouds = {} self.term_analysis = {} self.item_analysis = {} self.items = {} self.datasets = {} self.resource_id, cluster = get_resource_dict( \ cluster, "cluster", api=self.api) if 'object' in cluster and isinstance(cluster['object'], dict): cluster = cluster['object'] if 'clusters' in cluster and isinstance(cluster['clusters'], dict): status = get_status(cluster) if 'code' in status and status['code'] == FINISHED: self.default_numeric_value = cluster.get( \ "default_numeric_value") self.summary_fields = cluster.get("summary_fields", []) self.input_fields = cluster.get("input_fields", []) self.datasets = cluster.get("cluster_datasets", {}) the_clusters = cluster['clusters'] cluster_global = the_clusters.get('global') clusters = the_clusters['clusters'] self.centroids = [Centroid(centroid) for centroid in clusters] self.cluster_global = cluster_global if cluster_global: self.cluster_global = Centroid(cluster_global) # "global" has no "name" and "count" then we set them self.cluster_global.name = GLOBAL_CLUSTER_LABEL self.cluster_global.count = \ self.cluster_global.distance['population'] self.total_ss = the_clusters.get('total_ss') self.within_ss = the_clusters.get('within_ss') if not self.within_ss: self.within_ss = sum(centroid.distance['sum_squares'] for centroid in self.centroids) self.between_ss = the_clusters.get('between_ss') self.ratio_ss = the_clusters.get('ratio_ss') self.critical_value = cluster.get('critical_value', None) self.k = cluster.get('k') self.scales.update(cluster['scales']) self.term_forms = {} self.tag_clouds = {} self.term_analysis = {} fields = cluster['clusters']['fields'] summary_fields = cluster['summary_fields'] for field_id in summary_fields: try: del fields[field_id] except KeyError: # clusters retrieved from API will only contain # model fields pass for field_id, field in list(fields.items()): if field['optype'] == 'text': self.term_forms[field_id] = {} self.term_forms[field_id].update(field[ 'summary']['term_forms']) self.tag_clouds[field_id] = {} self.tag_clouds[field_id].update(field[ 'summary']['tag_cloud']) self.term_analysis[field_id] = {} self.term_analysis[field_id].update( field['term_analysis']) if field['optype'] == 'items': self.items[field_id] = {} self.items[field_id].update( dict(field['summary']['items'])) self.item_analysis[field_id] = {} self.item_analysis[field_id].update( field['item_analysis']) missing_tokens = cluster['clusters'].get('missing_tokens') ModelFields.__init__(self, fields, missing_tokens=missing_tokens) if not all([field_id in self.fields for field_id in self.scales]): raise Exception("Some fields are missing" " to generate a local cluster." " Please, provide a cluster with" " the complete list of fields.") else: raise Exception("The cluster isn't finished yet") else: raise Exception("Cannot create the Cluster instance. Could not" " find the 'clusters' key in the resource:\n\n%s" % cluster)
def __init__(self, cluster, api=None): self.resource_id = None self.centroids = None self.cluster_global = None self.total_ss = None self.within_ss = None self.between_ss = None self.ratio_ss = None self.critical_value = None self.k = None self.scales = {} self.term_forms = {} self.tag_clouds = {} self.term_analysis = {} self.item_analysis = {} self.items = {} if not (isinstance(cluster, dict) and 'resource' in cluster and cluster['resource'] is not None): if api is None: api = BigML(storage=STORAGE) self.resource_id = get_cluster_id(cluster) if self.resource_id is None: raise Exception(api.error_message(cluster, resource_type='cluster', method='get')) query_string = ONLY_MODEL cluster = retrieve_resource(api, self.resource_id, query_string=query_string) else: self.resource_id = get_cluster_id(cluster) if 'object' in cluster and isinstance(cluster['object'], dict): cluster = cluster['object'] if 'clusters' in cluster and isinstance(cluster['clusters'], dict): status = get_status(cluster) if 'code' in status and status['code'] == FINISHED: the_clusters = cluster['clusters'] cluster_global = the_clusters.get('global') clusters = the_clusters['clusters'] self.centroids = [Centroid(centroid) for centroid in clusters] self.cluster_global = cluster_global if cluster_global: self.cluster_global = Centroid(cluster_global) # "global" has no "name" and "count" then we set them self.cluster_global.name = GLOBAL_CLUSTER_LABEL self.cluster_global.count = \ self.cluster_global.distance['population'] self.total_ss = the_clusters.get('total_ss') self.within_ss = the_clusters.get('within_ss') if not self.within_ss: self.within_ss = sum(centroid.distance['sum_squares'] for centroid in self.centroids) self.between_ss = the_clusters.get('between_ss') self.ratio_ss = the_clusters.get('ratio_ss') self.critical_value = cluster.get('critical_value', None) self.k = cluster.get('k') self.scales.update(cluster['scales']) self.term_forms = {} self.tag_clouds = {} self.term_analysis = {} fields = cluster['clusters']['fields'] summary_fields = cluster['summary_fields'] for field_id in summary_fields: del fields[field_id] for field_id, field in fields.items(): if field['optype'] == 'text': self.term_forms[field_id] = {} self.term_forms[field_id].update(field[ 'summary']['term_forms']) self.tag_clouds[field_id] = {} self.tag_clouds[field_id].update(field[ 'summary']['tag_cloud']) self.term_analysis[field_id] = {} self.term_analysis[field_id].update( field['term_analysis']) if field['optype'] == 'items': self.items[field_id] = {} self.items[field_id].update( dict(field['summary']['items'])) self.item_analysis[field_id] = {} self.item_analysis[field_id].update( field['item_analysis']) ModelFields.__init__(self, fields) if not all([field_id in self.fields for field_id in self.scales]): raise Exception("Some fields are missing" " to generate a local cluster." " Please, provide a cluster with" " the complete list of fields.") else: raise Exception("The cluster isn't finished yet") else: raise Exception("Cannot create the Cluster instance. Could not" " find the 'clusters' key in the resource:\n\n%s" % cluster)
def __init__(self, cluster, api=None): self.resource_id = None self.centroids = None self.cluster_global = None self.total_ss = None self.within_ss = None self.between_ss = None self.ratio_ss = None self.critical_value = None self.default_numeric_value = None self.k = None self.summary_fields = [] self.scales = {} self.term_forms = {} self.tag_clouds = {} self.term_analysis = {} self.item_analysis = {} self.items = {} self.datasets = {} self.api = api # checks whether the information needed for local predictions is in # the first argument if isinstance(cluster, dict) and \ not check_model_fields(cluster): # if the fields used by the cluster are not # available, use only ID to retrieve it again cluster = get_cluster_id(cluster) self.resource_id = cluster if not (isinstance(cluster, dict) and 'resource' in cluster and cluster['resource'] is not None): if api is None: api = BigML(storage=STORAGE) self.api = api self.resource_id = get_cluster_id(cluster) if self.resource_id is None: raise Exception( api.error_message(cluster, resource_type='cluster', method='get')) query_string = ONLY_MODEL cluster = retrieve_resource(api, self.resource_id, query_string=query_string) else: self.resource_id = get_cluster_id(cluster) if 'object' in cluster and isinstance(cluster['object'], dict): cluster = cluster['object'] if 'clusters' in cluster and isinstance(cluster['clusters'], dict): status = get_status(cluster) if 'code' in status and status['code'] == FINISHED: self.default_numeric_value = cluster.get( \ "default_numeric_value") self.summary_fields = cluster.get("summary_fields", []) self.datasets = cluster.get("cluster_datasets", {}) the_clusters = cluster['clusters'] cluster_global = the_clusters.get('global') clusters = the_clusters['clusters'] self.centroids = [Centroid(centroid) for centroid in clusters] self.cluster_global = cluster_global if cluster_global: self.cluster_global = Centroid(cluster_global) # "global" has no "name" and "count" then we set them self.cluster_global.name = GLOBAL_CLUSTER_LABEL self.cluster_global.count = \ self.cluster_global.distance['population'] self.total_ss = the_clusters.get('total_ss') self.within_ss = the_clusters.get('within_ss') if not self.within_ss: self.within_ss = sum(centroid.distance['sum_squares'] for centroid in self.centroids) self.between_ss = the_clusters.get('between_ss') self.ratio_ss = the_clusters.get('ratio_ss') self.critical_value = cluster.get('critical_value', None) self.k = cluster.get('k') self.scales.update(cluster['scales']) self.term_forms = {} self.tag_clouds = {} self.term_analysis = {} fields = cluster['clusters']['fields'] summary_fields = cluster['summary_fields'] for field_id in summary_fields: try: del fields[field_id] except KeyError: # clusters retrieved from API will only contain # model fields pass for field_id, field in fields.items(): if field['optype'] == 'text': self.term_forms[field_id] = {} self.term_forms[field_id].update( field['summary']['term_forms']) self.tag_clouds[field_id] = {} self.tag_clouds[field_id].update( field['summary']['tag_cloud']) self.term_analysis[field_id] = {} self.term_analysis[field_id].update( field['term_analysis']) if field['optype'] == 'items': self.items[field_id] = {} self.items[field_id].update( dict(field['summary']['items'])) self.item_analysis[field_id] = {} self.item_analysis[field_id].update( field['item_analysis']) ModelFields.__init__(self, fields) if not all( [field_id in self.fields for field_id in self.scales]): raise Exception("Some fields are missing" " to generate a local cluster." " Please, provide a cluster with" " the complete list of fields.") else: raise Exception("The cluster isn't finished yet") else: raise Exception("Cannot create the Cluster instance. Could not" " find the 'clusters' key in the resource:\n\n%s" % cluster)