def reify_batchprediction(self, resource_id): """ Extracts the REST API arguments from the batch prediction JSON structure: model/ensemble, dataset and args """ child = self.get_resource(resource_id) # evalutations have 2 different origins as arguments [(_, parent1), (_, parent2)] = u.get_origin_info(child) parent1 = self.get_resource(parent1) parent2 = self.get_resource(parent2) opts = {"create": {}, "update": {}} # common create options for batch resources u.common_batch_options(child, parent1, parent2, opts) if child.get('header', True): opts['create'].update( u.default_setting(child, 'prediction_name', [None, ''])) opts['create'].update( u.default_setting(child, 'centroid_name', [None, ''])) # name, exclude automatic naming alternatives u.non_automatic_name( child, opts) calls = u.build_calls( resource_id, [parent1['resource'], parent2['resource']], opts) self.add(resource_id, calls)
def reify_batchprediction(self, resource_id): """ Extracts the REST API arguments from the batch prediction JSON structure: model/ensemble, dataset and args """ child = self.get_resource(resource_id) # evalutations have 2 different origins as arguments [(_, parent1), (_, parent2)] = u.get_origin_info(child) parent1 = self.get_resource(parent1) parent2 = self.get_resource(parent2) opts = {"create": {}, "update": {}} # common create options for batch resources u.common_batch_options(child, parent1, parent2, opts) if child.get('header', True): opts['create'].update( u.default_setting(child, 'prediction_name', [None, ''])) opts['create'].update( u.default_setting(child, 'centroid_name', [None, ''])) # name, exclude automatic naming alternatives u.non_automatic_name(child, opts) calls = u.build_calls(resource_id, [parent1['resource'], parent2['resource']], opts) self.add(resource_id, calls)
def reify_batchcentroid(self, resource_id): """ Extracts the REST API arguments from the batch centroid JSON structure: cluster, dataset and args """ child = self.get_resource(resource_id) # batch resources have 2 different origins as arguments [(_, parent1), (_, parent2)] = u.get_origin_info(child) parent1 = self.get_resource(parent1) parent2 = self.get_resource(parent2) opts = {"create": {}, "update": {}} # common create options for batch resources u.common_batch_options(child, parent1, parent2, opts) if child.get('header', True): opts['create'].update( u.default_setting(child, 'distance_name', [None, ''])) # name, exclude automatic naming alternatives u.non_automatic_name( child, opts, autoname=u'Batch Centroid of %s with %s' % \ (parent1.get('name', ''), parent2.get('name', ''))) calls = u.build_calls( resource_id, [parent1['resource'], parent2['resource']], opts) self.add(resource_id, calls)
def _inspect_model(self, resource_id): """Auxliliary function to use model JSON structure to define ensembles and models """ child = self.get_resource(resource_id) origin, parent_id = u.get_origin_info(child) parent = self.get_resource(parent_id) opts = {"create": {}, "update": {}} # as two-steps result from a cluster if origin == 'cluster': opts['create'].update({"centroid": child['centroid']}) _, grandparent = u.get_origin_info(parent) grandparent = self.get_resource(grandparent) elif origin == 'datasets': grandparent = parent if child.get('objective_field') != \ grandparent.get('objective_field').get('id'): opts['create'].update( {"objective_field": child.get('objective_field')}) else: grandparent = parent if child.get('objective_field') != \ grandparent.get('objective_field').get('id'): opts['create'].update( {"objective_field": child.get('objective_field')}) # the objective field name is automatically added to tags objective_field_name = child.get('objective_field_name','') if objective_field_name in child.get('tags'): child['tags'].remove(objective_field_name) # options common to all model types u.common_model_opts(child, grandparent, opts) # name, exclude automatic naming alternatives autonames = [u''] autonames.append( u"%s\'s model" % grandparent.get('name', '')) autonames.append( u"%s\' model" % grandparent.get('name', '')) autonames.append( u"%s model" % grandparent.get('name', '')) autonames.append( u"Cluster %s - %s" % (int(child.get('centroid', "0"), base=16), parent['name'])) u.non_automatic_name(child, opts, autonames=autonames) if child.get('randomize') == True: default_random_candidates = int( math.floor(math.sqrt(len(child['input_fields'])))) opts['create'].update( u.default_setting( \ child, 'random_candidates', [default_random_candidates])) return parent_id, opts
def _inspect_model(self, resource_id): """Auxliliary function to use model JSON structure to define ensembles and models """ child = self.get_resource(resource_id) origin, parent_id = u.get_origin_info(child) parent = self.get_resource(parent_id) opts = {"create": {}, "update": {}} # as two-steps result from a cluster if origin == 'cluster': opts['create'].update({"centroid": child['centroid']}) _, grandparent = u.get_origin_info(parent) grandparent = self.get_resource(grandparent) elif origin == 'datasets': grandparent = parent if child.get('objective_field') != \ grandparent.get('objective_field').get('id'): opts['create'].update( {"objective_field": child.get('objective_field')}) else: grandparent = parent if child.get('objective_field') != \ grandparent.get('objective_field').get('id'): opts['create'].update( {"objective_field": child.get('objective_field')}) # the objective field name is automatically added to tags objective_field_name = child.get('objective_field_name', '') if objective_field_name in child.get('tags'): child['tags'].remove(objective_field_name) # options common to all model types u.common_model_opts(child, grandparent, opts) # name, exclude automatic naming alternatives autonames = [u''] autonames.append( u"%s\'s model" % grandparent.get('name', '')) autonames.append( u"%s\' model" % grandparent.get('name', '')) autonames.append( u"%s model" % grandparent.get('name', '')) autonames.append( u"Cluster %s - %s" % (int(child.get('centroid', "0"), base=16), parent['name'])) u.non_automatic_name(child, opts, autonames=autonames) if child.get('randomize') == True: default_random_candidates = int( math.floor(math.sqrt(len(child['input_fields'])))) opts['create'].update( u.default_setting( \ child, 'random_candidates', [default_random_candidates])) return parent_id, opts
def reify_source(self, resource_id): """Extracts the REST API arguments from the source JSON structure """ resource_type = get_resource_type(resource_id) child = self.get_resource(resource_id) opts = {"create": {}, "update": {}} # create options source_defaults = DEFAULTS[resource_type].get("create", {}) source_defaults.update(COMMON_DEFAULTS.get("create", {})) # special case, souces can be named like uploaded files name_as_file = [child.get('file_name')] name_as_file.extend(source_defaults["name"]) source_defaults["name"] = name_as_file for attribute, default_value in source_defaults.items(): opts["create"].update( u.default_setting(child, attribute, *default_value)) # data if child.get('remote') is not None: data = child['remote'] elif child.get('file_name') is not None: data = child['file_name'] else: data = "UNKNOWN-INLINE-DATA" # update options source_defaults = DEFAULTS[resource_type].get("update", {}) for attribute, default_value in source_defaults.items(): opts["update"].update( u.default_setting(child, attribute, *default_value)) # We add the information for the updatable fields only when requested. if self.add_fields: opts["update"].update({"fields": u.get_fields_changes(child)}) calls = u.build_calls(resource_id, [data], opts) self.add(resource_id, calls)
def reify_evaluation(self, resource_id): """ Extracts the REST API arguments from the evaluation JSON structure: model/ensemble, dataset and args """ child = self.get_resource(resource_id) # evalutations have 2 different origins as arguments [(_, parent1), (_, parent2)] = u.get_origin_info(child) parent1 = self.get_resource(parent1) parent2 = self.get_resource(parent2) opts = {"create": {}, "update": {}} # non-inherited create options u.non_inherited_opts(child, parent1, opts) # non-default create options u.non_default_opts(child, opts) # model/ensemble to dataset mapping fields = parent2['fields'].keys() default_map = dict(zip(fields, fields)) opts['create'].update( u.default_setting(child, 'fields_map', default_map)) # name, exclude automatic naming alternatives u.non_automatic_name( child, opts, autoname=u'Evaluation of %s with %s' % \ (parent1.get('name', ''), parent2.get('name', ''))) # range in dataset if not child.get('range', []) in [[], [1, parent2.get('rows', None)]]: opts['create'].update({"range": child['range']}) calls = u.build_calls( resource_id, [parent1['resource'], parent2['resource']], opts) self.add(resource_id, calls)
def _inspect_model(self, resource_id): """Auxliliary function to use model JSON structure to define ensembles and models """ child = self.get_resource(resource_id) origin, parent_id = u.get_origin_info(child) parent = self.get_resource(parent_id) opts = {"create": {}, "update": {}} # as two-steps result from a cluster if origin == "cluster": opts["create"].update({"centroid": child["centroid"]}) _, grandparent = u.get_origin_info(parent) grandparent = self.get_resource(grandparent) elif origin == "datasets": grandparent = parent if child.get("objective_field") != grandparent.get("objective_field").get("id"): opts["create"].update({"objective_field": child.get("objective_field")}) else: grandparent = parent if child.get("objective_field") != grandparent.get("objective_field").get("id"): opts["create"].update({"objective_field": child.get("objective_field")}) # the objective field name is automatically added to tags objective_field_name = child.get("objective_field_name", "") if objective_field_name in child.get("tags"): child["tags"].remove(objective_field_name) # options common to all model types u.common_model_opts(child, grandparent, opts) # name, exclude automatic naming alternatives autonames = [u""] autonames.append(u"%s model" % grandparent.get("name", "")) autonames.append(u"Cluster %s - %s" % (int(child.get("centroid", "0"), base=16), parent["name"])) u.non_automatic_name(child, opts, autonames=autonames) if child.get("randomize") == True: default_random_candidates = int(math.floor(math.sqrt(len(child["input_fields"])))) opts["create"].update(u.default_setting(child, "random_candidates", [default_random_candidates])) return parent_id, opts
def reify_dataset(self, resource_id): """Extracts the REST API arguments from the dataset JSON structure """ child = self.get_resource(resource_id) origin, parent_id = u.get_origin_info(child) parent = self.get_resource(parent_id) opts = {"create": {}, "update": {}, "get": {}} # as two-steps result from a cluster or batch prediction, centroid # or anomaly score grandparent = parent if origin in ['origin_batch_resource', 'cluster']: if origin == "cluster": opts['create'].update({"centroid": child['centroid']}) grandparents = u.get_origin_info(parent) # batch resources have two parents, choose the dataset if origin == "origin_batch_resource" and \ isinstance(grandparents, list): for gp_origin, grandparent in grandparents: if gp_origin == "dataset": break else: _, grandparent = grandparents grandparent = self.get_resource(grandparent) # options common to all model types call = "update" if origin == "origin_batch_resource" else "create" u.common_dataset_opts(child, grandparent, opts, call=call) # update options dataset_defaults = DEFAULTS["dataset"].get("update", {}) for attribute, default_value in dataset_defaults.items(): opts["update"].update( u.default_setting(child, attribute, *default_value)) # name, exclude automatic naming alternatives autonames = [u''] u.non_automatic_name(child, opts, autonames=autonames) # objective field resource_fields = Fields({ 'resource': child['resource'], 'object': child }) objective_id = child['objective_field']['id'] preferred_fields = resource_fields.preferred_fields() # if there's no preferred fields, use the fields structure if len(preferred_fields.keys()) == 0: preferred_fields = resource_fields.fields max_column = sorted([ field['column_number'] for _, field in preferred_fields.items() if field['optype'] != "text" ], reverse=True)[0] objective_column = resource_fields.fields[objective_id][ \ 'column_number'] if objective_column != max_column: opts['create'].update({"objective_field": {"id": objective_id}}) if origin != "origin_batch_resource": # resize if (child['size'] != grandparent['size'] and get_resource_type(parent) == 'source'): opts['create'].update({"size": child['size']}) # generated fields if child.get('new_fields', None): new_fields = child['new_fields'] for new_field in new_fields: new_field['field'] = new_field['generator'] del new_field['generator'] opts['create'].update({"new_fields": new_fields}) u.range_opts(child, grandparent, opts) # for batch_predictions, batch_clusters, batch_anomalies generated # datasets, attributes cannot be set at creation time, so we # must update the resource instead suffix = None if origin == "origin_batch_resource": opts["update"].update(opts["create"]) opts["create"] = {} suffix = "['object']['output_dataset_resource']" calls = u.build_calls(resource_id, [parent_id], opts, suffix=suffix) self.add(resource_id, calls)
def reify_dataset(self, resource_id): """Extracts the REST API arguments from the dataset JSON structure """ child = self.get_resource(resource_id) origin, parent_id = u.get_origin_info(child) parent = self.get_resource(parent_id) opts = {"create": {}, "update": {}, "get": {}} # as two-steps result from a cluster or batch prediction, centroid # or anomaly score grandparent = parent if origin in ['origin_batch_resource', 'cluster']: if origin == "cluster": opts['create'].update({"centroid": child['centroid']}) grandparents = u.get_origin_info(parent) # batch resources have two parents, choose the dataset if origin == "origin_batch_resource" and \ isinstance(grandparents, list): for gp_origin, grandparent in grandparents: if gp_origin == "dataset": break else: _, grandparent = grandparents grandparent = self.get_resource(grandparent) # options common to all model types call = "update" if origin == "origin_batch_resource" else "create" u.common_dataset_opts(child, grandparent, opts, call=call) # update options dataset_defaults = DEFAULTS["dataset"].get("update", {}) for attribute, default_value in dataset_defaults.items(): opts["update"].update( u.default_setting(child, attribute, *default_value)) # name, exclude automatic naming alternatives autonames = [u''] u.non_automatic_name(child, opts, autonames=autonames) # objective field resource_fields = Fields( {'resource': child['resource'], 'object': child}) objective_id = child['objective_field']['id'] preferred_fields = resource_fields.preferred_fields() # if there's no preferred fields, use the fields structure if len(preferred_fields.keys()) == 0: preferred_fields = resource_fields.fields max_column = sorted([field['column_number'] for _, field in preferred_fields.items() if field['optype'] != "text"], reverse=True)[0] objective_column = resource_fields.fields[objective_id][ \ 'column_number'] if objective_column != max_column: opts['create'].update({"objective_field": {"id": objective_id}}) if origin != "origin_batch_resource": # resize if (child['size'] != grandparent['size'] and get_resource_type(parent) == 'source'): opts['create'].update({"size": child['size']}) # generated fields if child.get('new_fields', None): new_fields = child['new_fields'] for new_field in new_fields: new_field['field'] = new_field['generator'] del new_field['generator'] opts['create'].update({"new_fields": new_fields}) u.range_opts(child, grandparent, opts) # for batch_predictions, batch_clusters, batch_anomalies generated # datasets, attributes cannot be set at creation time, so we # must update the resource instead suffix = None if origin == "origin_batch_resource": opts["update"].update(opts["create"]) opts["create"] = {} suffix = "['object']['output_dataset_resource']" calls = u.build_calls(resource_id, [parent_id], opts, suffix=suffix) self.add(resource_id, calls)
def reify_dataset(self, resource_id): """Extracts the REST API arguments from the dataset JSON structure """ child = self.get_resource(resource_id) origin, parent_id = u.get_origin_info(child) parent = self.get_resource(parent_id) opts = {"create": {}, "update": {}} # as two-steps result from a cluster or batch prediction, centroid # or anomaly score if origin in ["origin_batch_resource", "cluster"]: if origin == "cluster": opts["create"].update({"centroid": child["centroid"]}) _, grandparent = u.get_origin_info(parent) grandparent = self.get_resource(grandparent) else: grandparent = parent # options common to all model types u.common_dataset_opts(child, grandparent, opts) # update options dataset_defaults = DEFAULTS["dataset"].get("update", {}) dataset_defaults.update(COMMON_DEFAULTS.get("update", {})) for attribute, default_value in dataset_defaults.items(): opts["update"].update(u.default_setting(child, attribute, *default_value)) # name, exclude automatic naming alternatives autonames = [u""] suffixes = [ u"filtered", u"sampled", u"dataset", u"extended", u"- batchprediction", u"- batchanomalyscore", u"- batchcentroid", u"- merged", ] autonames.extend([u"%s %s" % (grandparent.get("name", ""), suffix) for suffix in suffixes]) autonames.append(u"%s's dataset" % ".".join(parent["name"].split(".")[0:-1])) autonames.append(u"%s' dataset" % ".".join(parent["name"].split(".")[0:-1])) autonames.append(u"Cluster %s - %s" % (int(child.get("centroid", "0"), base=16), parent["name"])) autonames.append(u"Dataset from %s model - segment" % parent["name"]) u.non_automatic_name(child, opts, autonames=autonames) # objective field resource_fields = Fields({"resource": child["resource"], "object": child}) objective_id = child["objective_field"]["id"] preferred_fields = resource_fields.preferred_fields() max_column = sorted([field["column_number"] for _, field in preferred_fields.items()], reverse=True)[0] objective_column = resource_fields.fields[objective_id]["column_number"] if objective_column != max_column: opts["create"].update({"objective_field": {"id": objective_id}}) # resize if child["size"] != grandparent["size"] and get_resource_type(parent) == "source": opts["create"].update({"size": child["size"]}) # generated fields if child.get("new_fields", None): new_fields = child["new_fields"] for new_field in new_fields: new_field["field"] = new_field["generator"] del new_field["generator"] opts["create"].update({"new_fields": new_fields}) u.range_opts(child, grandparent, opts) calls = u.build_calls(resource_id, [parent_id], opts) self.add(resource_id, calls)
def reify_dataset(self, resource_id): """Extracts the REST API arguments from the dataset JSON structure """ child = self.get_resource(resource_id) origin, parent_id = u.get_origin_info(child) parent = self.get_resource(parent_id) opts = {"create": {}, "update": {}} # as two-steps result from a cluster or batch prediction, centroid # or anomaly score if origin in ['origin_batch_resource', 'cluster']: if origin == "cluster": opts['create'].update({"centroid": child['centroid']}) _, grandparent = u.get_origin_info(parent) grandparent = self.get_resource(grandparent) else: grandparent = parent # options common to all model types u.common_dataset_opts(child, grandparent, opts) # update options dataset_defaults = DEFAULTS["dataset"].get("update", {}) dataset_defaults.update(COMMON_DEFAULTS.get("update", {})) for attribute, default_value in dataset_defaults.items(): opts["update"].update( u.default_setting(child, attribute, *default_value)) # name, exclude automatic naming alternatives autonames = [u''] suffixes = [ u"filtered", u"sampled", u"dataset", u"extended", u"- batchprediction", u"- batchanomalyscore", u"- batchcentroid", u"- merged" ] autonames.extend([ u'%s %s' % (grandparent.get('name', ''), suffix) for suffix in suffixes ]) autonames.append(u"%s's dataset" % '.'.join(parent['name'].split('.')[0:-1])) autonames.append(u"%s' dataset" % '.'.join(parent['name'].split('.')[0:-1])) autonames.append( u"Cluster %s - %s" % (int(child.get('centroid', "0"), base=16), parent['name'])) autonames.append(u"Dataset from %s model - segment" % parent['name']) u.non_automatic_name(child, opts, autonames=autonames) # objective field resource_fields = Fields({ 'resource': child['resource'], 'object': child }) objective_id = child['objective_field']['id'] preferred_fields = resource_fields.preferred_fields() max_column = sorted( [field['column_number'] for _, field in preferred_fields.items()], reverse=True)[0] objective_column = resource_fields.fields[objective_id][ \ 'column_number'] if objective_column != max_column: opts['create'].update({"objective_field": {"id": objective_id}}) # resize if (child['size'] != grandparent['size'] and get_resource_type(parent) == 'source'): opts['create'].update({"size": child['size']}) # generated fields if child.get('new_fields', None): new_fields = child['new_fields'] for new_field in new_fields: new_field['field'] = new_field['generator'] del new_field['generator'] opts['create'].update({"new_fields": new_fields}) u.range_opts(child, grandparent, opts) calls = u.build_calls(resource_id, [parent_id], opts) self.add(resource_id, calls)