def reify_centroid(self, resource_id): """ Extracts the REST API arguments from the centroid JSON structure: """ child = self.get_resource(resource_id) _, parent = u.get_origin_info(child) parent = self.get_resource(parent) opts = {"create": {}, "update": {}} # non-inherited create options u.non_inherited_opts(child, parent, opts) # non-default create options u.non_default_opts(child, opts) opts['create'].update({'input_data': child['input_data']}) # name, exclude automatic naming alternatives u.non_automatic_name(child, opts) # non-default update options u.non_default_opts(child, opts, call="update") calls = u.build_calls(resource_id, [parent['resource']], opts) self.add(resource_id, calls)
def reify_batchcentroid(self, resource_id): """ Extracts the REST API arguments from the batch centroid JSON structure: cluster, dataset and args """ child = self.get_resource(resource_id) # batch resources have 2 different origins as arguments [(_, parent1), (_, parent2)] = u.get_origin_info(child) parent1 = self.get_resource(parent1) parent2 = self.get_resource(parent2) opts = {"create": {}, "update": {}} # common create options for batch resources u.common_batch_options(child, parent1, parent2, opts) if child.get('header', True): opts['create'].update( u.default_setting(child, 'distance_name', [None, ''])) # name, exclude automatic naming alternatives u.non_automatic_name(child, opts) calls = u.build_calls(resource_id, [parent1['resource'], parent2['resource']], opts) self.add(resource_id, calls)
def reify_prediction(self, resource_id): """ Extracts the REST API arguments from the prediction JSON structure: """ child = self.get_resource(resource_id) origin, parent = u.get_origin_info(child) if origin == 'models': model = self.get_resource(parent[0]) parent = self.get_resource('ensemble/%s' % model['ensemble_id']) else: parent = self.get_resource(parent) opts = {"create": {}, "update": {}} # non-inherited create options u.non_inherited_opts(child, parent, opts) # non-default create options u.non_default_opts(child, opts) opts['create'].update({'input_data': child['input_data']}) # name, exclude automatic naming alternatives u.non_automatic_name(child, opts) calls = u.build_calls(resource_id, [parent['resource']], opts) self.add(resource_id, calls)
def reify_cluster(self, resource_id): """Extracts the REST API arguments from the cluster JSON structure """ child = self.get_resource(resource_id) _, parent_id = u.get_origin_info(child) parent = self.get_resource(parent_id) opts = {"create": {}, "update": {}} # options common to all model types u.common_model_opts(child, parent, opts) if child.get('critical_value') is None and 'k' in child: opts['create'].update({"k": child['k']}) # name, exclude automatic naming alternatives autonames = [u''] autonames.append( u'%s\'s cluster' % parent.get('name', '')) autonames.append( u'%s\' cluster' % parent.get('name', '')) autonames.append( u'%s\ cluster' % parent.get('name', '')) u.non_automatic_name( \ child, opts, autonames=autonames) calls = u.build_calls(resource_id, [parent_id], opts) self.add(resource_id, calls)
def reify_batchanomalyscore(self, resource_id): """ Extracts the REST API arguments from the batch anomaly score JSON structure: anomaly detector, dataset and args """ child = self.get_resource(resource_id) # batch resources have 2 different origins as arguments [(_, parent1), (_, parent2)] = u.get_origin_info(child) parent1 = self.get_resource(parent1) parent2 = self.get_resource(parent2) opts = {"create": {}, "update": {}} # common create options for batch resources u.common_batch_options(child, parent1, parent2, opts) # name, exclude automatic naming alternatives u.non_automatic_name( child, opts) calls = u.build_calls( resource_id, [parent1['resource'], parent2['resource']], opts) self.add(resource_id, calls)
def reify_batchprediction(self, resource_id): """ Extracts the REST API arguments from the batch prediction JSON structure: model/ensemble, dataset and args """ child = self.get_resource(resource_id) # evalutations have 2 different origins as arguments [(_, parent1), (_, parent2)] = u.get_origin_info(child) parent1 = self.get_resource(parent1) parent2 = self.get_resource(parent2) opts = {"create": {}, "update": {}} # common create options for batch resources u.common_batch_options(child, parent1, parent2, opts) if child.get('header', True): opts['create'].update( u.default_setting(child, 'prediction_name', [None, ''])) opts['create'].update( u.default_setting(child, 'centroid_name', [None, ''])) # name, exclude automatic naming alternatives u.non_automatic_name( child, opts, autoname=u'Batch Prediction of %s with %s' % \ (parent1.get('name', ''), parent2.get('name', ''))) calls = u.build_calls( resource_id, [parent1['resource'], parent2['resource']], opts) self.add(resource_id, calls)
def reify_anomaly(self, resource_id): """Extracts the REST API arguments from the anomaly JSON structure """ child = self.get_resource(resource_id) _, parent_id = u.get_origin_info(child) parent = self.get_resource(parent_id) opts = {"create": {}, "update": {}} # options common to all model types u.common_model_opts(child, parent, opts) # name, exclude automatic naming alternatives autonames = [u''] autonames.append( u'%s\'s anomaly detector' % parent.get('name', '')) autonames.append( u'%s\' anomaly detector' % parent.get('name', '')) autonames.append( u'%s\ anomaly detector' % parent.get('name', '')) u.non_automatic_name( child, opts, autonames=autonames) calls = u.build_calls(resource_id, [parent_id], opts) self.add(resource_id, calls)
def reify_prediction(self, resource_id): """ Extracts the REST API arguments from the prediction JSON structure: """ child = self.get_resource(resource_id) origin, parent = u.get_origin_info(child) if origin == 'models': model = self.get_resource(parent[0]) parent = self.get_resource('ensemble/%s' % model['ensemble_id']) else: parent = self.get_resource(parent) opts = {"create": {}, "update": {}} # non-inherited create options u.non_inherited_opts(child, parent, opts) # non-default create options u.non_default_opts(child, opts) opts['create'].update({'input_data': child['input_data']}) # name, exclude automatic naming alternatives u.non_automatic_name( child, opts) calls = u.build_calls(resource_id, [parent['resource']], opts) self.add(resource_id, calls)
def reify_centroid(self, resource_id): """ Extracts the REST API arguments from the centroid JSON structure: """ child = self.get_resource(resource_id) _, parent = u.get_origin_info(child) parent = self.get_resource(parent) opts = {"create": {}, "update": {}} # non-inherited create options u.non_inherited_opts(child, parent, opts) # non-default create options u.non_default_opts(child, opts) opts['create'].update({'input_data': child['input_data']}) # name, exclude automatic naming alternatives u.non_automatic_name( child, opts) # non-default update options u.non_default_opts(child, opts, call="update") calls = u.build_calls(resource_id, [parent['resource']], opts) self.add(resource_id, calls)
def reify_prediction(self, resource_id): """ Extracts the REST API arguments from the prediction JSON structure: """ child = self.get_resource(resource_id) origin, parent = u.get_origin_info(child) if origin == "models": model = self.get_resource(parent[0]) parent = self.get_resource("ensemble/%s" % model["ensemble_id"]) else: parent = self.get_resource(parent) opts = {"create": {}, "update": {}} # non-inherited create options u.non_inherited_opts(child, parent, opts) # non-default create options u.non_default_opts(child, opts) opts["create"].update({"input_data": child["input_data"]}) # name, exclude automatic naming alternatives u.non_automatic_name(child, opts, autoname=u"Prediction for %s" % child["objective_field_name"]) calls = u.build_calls(resource_id, [parent["resource"]], opts) self.add(resource_id, calls)
def reify_batchcentroid(self, resource_id): """ Extracts the REST API arguments from the batch centroid JSON structure: cluster, dataset and args """ child = self.get_resource(resource_id) # batch resources have 2 different origins as arguments [(_, parent1), (_, parent2)] = u.get_origin_info(child) parent1 = self.get_resource(parent1) parent2 = self.get_resource(parent2) opts = {"create": {}, "update": {}} # common create options for batch resources u.common_batch_options(child, parent1, parent2, opts) if child.get('header', True): opts['create'].update( u.default_setting(child, 'distance_name', [None, ''])) # name, exclude automatic naming alternatives u.non_automatic_name( child, opts) calls = u.build_calls( resource_id, [parent1['resource'], parent2['resource']], opts) self.add(resource_id, calls)
def _inspect_model(self, resource_id): """Auxliliary function to use model JSON structure to define ensembles and models """ child = self.get_resource(resource_id) origin, parent_id = u.get_origin_info(child) parent = self.get_resource(parent_id) opts = {"create": {}, "update": {}} # as two-steps result from a cluster if origin == 'cluster': opts['create'].update({"centroid": child['centroid']}) _, grandparent = u.get_origin_info(parent) grandparent = self.get_resource(grandparent) elif origin == 'datasets': grandparent = parent if child.get('objective_field') != \ grandparent.get('objective_field').get('id'): opts['create'].update( {"objective_field": child.get('objective_field')}) else: grandparent = parent if child.get('objective_field') != \ grandparent.get('objective_field').get('id'): opts['create'].update( {"objective_field": child.get('objective_field')}) # the objective field name is automatically added to tags objective_field_name = child.get('objective_field_name','') if objective_field_name in child.get('tags'): child['tags'].remove(objective_field_name) # options common to all model types u.common_model_opts(child, grandparent, opts) # name, exclude automatic naming alternatives autonames = [u''] autonames.append( u"%s\'s model" % grandparent.get('name', '')) autonames.append( u"%s\' model" % grandparent.get('name', '')) autonames.append( u"%s model" % grandparent.get('name', '')) autonames.append( u"Cluster %s - %s" % (int(child.get('centroid', "0"), base=16), parent['name'])) u.non_automatic_name(child, opts, autonames=autonames) if child.get('randomize') == True: default_random_candidates = int( math.floor(math.sqrt(len(child['input_fields'])))) opts['create'].update( u.default_setting( \ child, 'random_candidates', [default_random_candidates])) return parent_id, opts
def _inspect_model(self, resource_id): """Auxliliary function to use model JSON structure to define ensembles and models """ child = self.get_resource(resource_id) origin, parent_id = u.get_origin_info(child) parent = self.get_resource(parent_id) opts = {"create": {}, "update": {}} # as two-steps result from a cluster if origin == 'cluster': opts['create'].update({"centroid": child['centroid']}) _, grandparent = u.get_origin_info(parent) grandparent = self.get_resource(grandparent) elif origin == 'datasets': grandparent = parent if child.get('objective_field') != \ grandparent.get('objective_field').get('id'): opts['create'].update( {"objective_field": child.get('objective_field')}) else: grandparent = parent if child.get('objective_field') != \ grandparent.get('objective_field').get('id'): opts['create'].update( {"objective_field": child.get('objective_field')}) # the objective field name is automatically added to tags objective_field_name = child.get('objective_field_name', '') if objective_field_name in child.get('tags'): child['tags'].remove(objective_field_name) # options common to all model types u.common_model_opts(child, grandparent, opts) # name, exclude automatic naming alternatives autonames = [u''] autonames.append( u"%s\'s model" % grandparent.get('name', '')) autonames.append( u"%s\' model" % grandparent.get('name', '')) autonames.append( u"%s model" % grandparent.get('name', '')) autonames.append( u"Cluster %s - %s" % (int(child.get('centroid', "0"), base=16), parent['name'])) u.non_automatic_name(child, opts, autonames=autonames) if child.get('randomize') == True: default_random_candidates = int( math.floor(math.sqrt(len(child['input_fields'])))) opts['create'].update( u.default_setting( \ child, 'random_candidates', [default_random_candidates])) return parent_id, opts
def reify_evaluation(self, resource_id): """ Extracts the REST API arguments from the evaluation JSON structure: model/ensemble, dataset and args """ child = self.get_resource(resource_id) # evalutations have 2 different origins as arguments [(_, parent1), (_, parent2)] = u.get_origin_info(child) parent1 = self.get_resource(parent1) parent2 = self.get_resource(parent2) opts = {"create": {}, "update": {}} # non-inherited create options u.non_inherited_opts(child, parent1, opts) # non-default create options u.non_default_opts(child, opts) # model/ensemble to dataset mapping fields = parent2['fields'].keys() default_map = dict(zip(fields, fields)) opts['create'].update( u.default_setting(child, 'fields_map', default_map)) # name, exclude automatic naming alternatives u.non_automatic_name( child, opts, autoname=u'Evaluation of %s with %s' % \ (parent1.get('name', ''), parent2.get('name', ''))) # range in dataset if not child.get('range', []) in [[], [1, parent2.get('rows', None)]]: opts['create'].update({"range": child['range']}) calls = u.build_calls( resource_id, [parent1['resource'], parent2['resource']], opts) self.add(resource_id, calls)
def reify_anomaly(self, resource_id): """Extracts the REST API arguments from the anomaly JSON structure """ child = self.get_resource(resource_id) _, parent_id = u.get_origin_info(child) parent = self.get_resource(parent_id) opts = {"create": {}, "update": {}} # options common to all model types u.common_model_opts(child, parent, opts) # name, exclude automatic naming alternatives autonames = [u''] u.non_automatic_name(child, opts, autonames=autonames) calls = u.build_calls(resource_id, [parent_id], opts) self.add(resource_id, calls)
def _inspect_model(self, resource_id): """Auxliliary function to use model JSON structure to define ensembles and models """ child = self.get_resource(resource_id) origin, parent_id = u.get_origin_info(child) parent = self.get_resource(parent_id) opts = {"create": {}, "update": {}} # as two-steps result from a cluster if origin == "cluster": opts["create"].update({"centroid": child["centroid"]}) _, grandparent = u.get_origin_info(parent) grandparent = self.get_resource(grandparent) elif origin == "datasets": grandparent = parent if child.get("objective_field") != grandparent.get("objective_field").get("id"): opts["create"].update({"objective_field": child.get("objective_field")}) else: grandparent = parent if child.get("objective_field") != grandparent.get("objective_field").get("id"): opts["create"].update({"objective_field": child.get("objective_field")}) # the objective field name is automatically added to tags objective_field_name = child.get("objective_field_name", "") if objective_field_name in child.get("tags"): child["tags"].remove(objective_field_name) # options common to all model types u.common_model_opts(child, grandparent, opts) # name, exclude automatic naming alternatives autonames = [u""] autonames.append(u"%s model" % grandparent.get("name", "")) autonames.append(u"Cluster %s - %s" % (int(child.get("centroid", "0"), base=16), parent["name"])) u.non_automatic_name(child, opts, autonames=autonames) if child.get("randomize") == True: default_random_candidates = int(math.floor(math.sqrt(len(child["input_fields"])))) opts["create"].update(u.default_setting(child, "random_candidates", [default_random_candidates])) return parent_id, opts
def reify_ensemble(self, resource_id): """Extracts the REST API arguments from the ensemble JSON structure """ child = self.get_resource(resource_id) _, parent_id = u.get_origin_info(child) # add options defined at model level _, opts = self._inspect_model(child['models'][0]) # the default value for replacement in models is the oposite, so # name, exclude automatic naming alternatives autonames = [u''] u.non_automatic_name( \ child, opts, autonames=autonames) # it will be added afterwards if 'replacement' in opts['create']: del opts['create']['replacement'] # create options u.non_default_opts(child, opts) calls = u.build_calls(resource_id, [parent_id], opts) self.add(resource_id, calls)
def reify_cluster(self, resource_id): """Extracts the REST API arguments from the cluster JSON structure """ child = self.get_resource(resource_id) _, parent_id = u.get_origin_info(child) parent = self.get_resource(parent_id) opts = {"create": {}, "update": {}} # options common to all model types u.common_model_opts(child, parent, opts) if child.get("critical_value") is None and "k" in child: opts["create"].update({"k": child["k"]}) # name, exclude automatic naming alternatives u.non_automatic_name(child, opts, autoname=u"%s cluster" % parent.get("name", "")) calls = u.build_calls(resource_id, [parent_id], opts) self.add(resource_id, calls)
def reify_anomalyscore(self, resource_id): """ Extracts the REST API arguments from the anomaly score JSON structure: """ child = self.get_resource(resource_id) _, parent = u.get_origin_info(child) parent = self.get_resource(parent) opts = {"create": {}, "update": {}} # non-inherited create options u.non_inherited_opts(child, parent, opts) opts["create"].update({"input_data": child["input_data"]}) # name, exclude automatic naming alternatives u.non_automatic_name(child, opts, autoname=u"Score for %s" % parent["name"]) calls = u.build_calls(resource_id, [parent["resource"]], opts) self.add(resource_id, calls)
def reify_cluster(self, resource_id): """Extracts the REST API arguments from the cluster JSON structure """ child = self.get_resource(resource_id) _, parent_id = u.get_origin_info(child) parent = self.get_resource(parent_id) opts = {"create": {}, "update": {}} # options common to all model types u.common_model_opts(child, parent, opts) if child.get('critical_value') is None and 'k' in child: opts['create'].update({"k": child['k']}) # name, exclude automatic naming alternatives autonames = [u''] u.non_automatic_name( \ child, opts, autonames=autonames) calls = u.build_calls(resource_id, [parent_id], opts) self.add(resource_id, calls)
def reify_evaluation(self, resource_id): """ Extracts the REST API arguments from the evaluation JSON structure: model/ensemble, dataset and args """ child = self.get_resource(resource_id) # evalutations have 2 different origins as arguments [(_, parent1), (_, parent2)] = u.get_origin_info(child) parent1 = self.get_resource(parent1) parent2 = self.get_resource(parent2) opts = {"create": {}, "update": {}} # non-inherited create options u.non_inherited_opts(child, parent1, opts) # non-default create options u.non_default_opts(child, opts) u.fields_map_options(child, parent1, parent2, opts, call="create") # name, exclude automatic naming alternatives u.non_automatic_name( child, opts) # range in dataset if not child.get('range', []) in [[], None, \ [1, parent2.get('rows', None)]]: opts['create'].update({"range": child['range']}) calls = u.build_calls( resource_id, [parent1['resource'], parent2['resource']], opts) self.add(resource_id, calls)
def reify_dataset(self, resource_id): """Extracts the REST API arguments from the dataset JSON structure """ child = self.get_resource(resource_id) origin, parent_id = u.get_origin_info(child) parent = self.get_resource(parent_id) opts = {"create": {}, "update": {}} # as two-steps result from a cluster or batch prediction, centroid # or anomaly score if origin in ['origin_batch_resource', 'cluster']: if origin == "cluster": opts['create'].update({"centroid": child['centroid']}) _, grandparent = u.get_origin_info(parent) grandparent = self.get_resource(grandparent) else: grandparent = parent # options common to all model types u.common_dataset_opts(child, grandparent, opts) # update options dataset_defaults = DEFAULTS["dataset"].get("update", {}) dataset_defaults.update(COMMON_DEFAULTS.get("update", {})) for attribute, default_value in dataset_defaults.items(): opts["update"].update( u.default_setting(child, attribute, *default_value)) # name, exclude automatic naming alternatives autonames = [u''] suffixes = [ u"filtered", u"sampled", u"dataset", u"extended", u"- batchprediction", u"- batchanomalyscore", u"- batchcentroid", u"- merged" ] autonames.extend([ u'%s %s' % (grandparent.get('name', ''), suffix) for suffix in suffixes ]) autonames.append(u"%s's dataset" % '.'.join(parent['name'].split('.')[0:-1])) autonames.append(u"%s' dataset" % '.'.join(parent['name'].split('.')[0:-1])) autonames.append( u"Cluster %s - %s" % (int(child.get('centroid', "0"), base=16), parent['name'])) autonames.append(u"Dataset from %s model - segment" % parent['name']) u.non_automatic_name(child, opts, autonames=autonames) # objective field resource_fields = Fields({ 'resource': child['resource'], 'object': child }) objective_id = child['objective_field']['id'] preferred_fields = resource_fields.preferred_fields() max_column = sorted( [field['column_number'] for _, field in preferred_fields.items()], reverse=True)[0] objective_column = resource_fields.fields[objective_id][ \ 'column_number'] if objective_column != max_column: opts['create'].update({"objective_field": {"id": objective_id}}) # resize if (child['size'] != grandparent['size'] and get_resource_type(parent) == 'source'): opts['create'].update({"size": child['size']}) # generated fields if child.get('new_fields', None): new_fields = child['new_fields'] for new_field in new_fields: new_field['field'] = new_field['generator'] del new_field['generator'] opts['create'].update({"new_fields": new_fields}) u.range_opts(child, grandparent, opts) calls = u.build_calls(resource_id, [parent_id], opts) self.add(resource_id, calls)
def reify_dataset(self, resource_id): """Extracts the REST API arguments from the dataset JSON structure """ child = self.get_resource(resource_id) origin, parent_id = u.get_origin_info(child) parent = self.get_resource(parent_id) opts = {"create": {}, "update": {}, "get": {}} # as two-steps result from a cluster or batch prediction, centroid # or anomaly score grandparent = parent if origin in ['origin_batch_resource', 'cluster']: if origin == "cluster": opts['create'].update({"centroid": child['centroid']}) grandparents = u.get_origin_info(parent) # batch resources have two parents, choose the dataset if origin == "origin_batch_resource" and \ isinstance(grandparents, list): for gp_origin, grandparent in grandparents: if gp_origin == "dataset": break else: _, grandparent = grandparents grandparent = self.get_resource(grandparent) # options common to all model types call = "update" if origin == "origin_batch_resource" else "create" u.common_dataset_opts(child, grandparent, opts, call=call) # update options dataset_defaults = DEFAULTS["dataset"].get("update", {}) for attribute, default_value in dataset_defaults.items(): opts["update"].update( u.default_setting(child, attribute, *default_value)) # name, exclude automatic naming alternatives autonames = [u''] u.non_automatic_name(child, opts, autonames=autonames) # objective field resource_fields = Fields({ 'resource': child['resource'], 'object': child }) objective_id = child['objective_field']['id'] preferred_fields = resource_fields.preferred_fields() # if there's no preferred fields, use the fields structure if len(preferred_fields.keys()) == 0: preferred_fields = resource_fields.fields max_column = sorted([ field['column_number'] for _, field in preferred_fields.items() if field['optype'] != "text" ], reverse=True)[0] objective_column = resource_fields.fields[objective_id][ \ 'column_number'] if objective_column != max_column: opts['create'].update({"objective_field": {"id": objective_id}}) if origin != "origin_batch_resource": # resize if (child['size'] != grandparent['size'] and get_resource_type(parent) == 'source'): opts['create'].update({"size": child['size']}) # generated fields if child.get('new_fields', None): new_fields = child['new_fields'] for new_field in new_fields: new_field['field'] = new_field['generator'] del new_field['generator'] opts['create'].update({"new_fields": new_fields}) u.range_opts(child, grandparent, opts) # for batch_predictions, batch_clusters, batch_anomalies generated # datasets, attributes cannot be set at creation time, so we # must update the resource instead suffix = None if origin == "origin_batch_resource": opts["update"].update(opts["create"]) opts["create"] = {} suffix = "['object']['output_dataset_resource']" calls = u.build_calls(resource_id, [parent_id], opts, suffix=suffix) self.add(resource_id, calls)
def reify_dataset(self, resource_id): """Extracts the REST API arguments from the dataset JSON structure """ child = self.get_resource(resource_id) origin, parent_id = u.get_origin_info(child) parent = self.get_resource(parent_id) opts = {"create": {}, "update": {}} # as two-steps result from a cluster or batch prediction, centroid # or anomaly score if origin in ["origin_batch_resource", "cluster"]: if origin == "cluster": opts["create"].update({"centroid": child["centroid"]}) _, grandparent = u.get_origin_info(parent) grandparent = self.get_resource(grandparent) else: grandparent = parent # options common to all model types u.common_dataset_opts(child, grandparent, opts) # update options dataset_defaults = DEFAULTS["dataset"].get("update", {}) dataset_defaults.update(COMMON_DEFAULTS.get("update", {})) for attribute, default_value in dataset_defaults.items(): opts["update"].update(u.default_setting(child, attribute, *default_value)) # name, exclude automatic naming alternatives autonames = [u""] suffixes = [ u"filtered", u"sampled", u"dataset", u"extended", u"- batchprediction", u"- batchanomalyscore", u"- batchcentroid", u"- merged", ] autonames.extend([u"%s %s" % (grandparent.get("name", ""), suffix) for suffix in suffixes]) autonames.append(u"%s's dataset" % ".".join(parent["name"].split(".")[0:-1])) autonames.append(u"%s' dataset" % ".".join(parent["name"].split(".")[0:-1])) autonames.append(u"Cluster %s - %s" % (int(child.get("centroid", "0"), base=16), parent["name"])) autonames.append(u"Dataset from %s model - segment" % parent["name"]) u.non_automatic_name(child, opts, autonames=autonames) # objective field resource_fields = Fields({"resource": child["resource"], "object": child}) objective_id = child["objective_field"]["id"] preferred_fields = resource_fields.preferred_fields() max_column = sorted([field["column_number"] for _, field in preferred_fields.items()], reverse=True)[0] objective_column = resource_fields.fields[objective_id]["column_number"] if objective_column != max_column: opts["create"].update({"objective_field": {"id": objective_id}}) # resize if child["size"] != grandparent["size"] and get_resource_type(parent) == "source": opts["create"].update({"size": child["size"]}) # generated fields if child.get("new_fields", None): new_fields = child["new_fields"] for new_field in new_fields: new_field["field"] = new_field["generator"] del new_field["generator"] opts["create"].update({"new_fields": new_fields}) u.range_opts(child, grandparent, opts) calls = u.build_calls(resource_id, [parent_id], opts) self.add(resource_id, calls)
def reify_dataset(self, resource_id): """Extracts the REST API arguments from the dataset JSON structure """ child = self.get_resource(resource_id) origin, parent_id = u.get_origin_info(child) parent = self.get_resource(parent_id) opts = {"create": {}, "update": {}, "get": {}} # as two-steps result from a cluster or batch prediction, centroid # or anomaly score grandparent = parent if origin in ['origin_batch_resource', 'cluster']: if origin == "cluster": opts['create'].update({"centroid": child['centroid']}) grandparents = u.get_origin_info(parent) # batch resources have two parents, choose the dataset if origin == "origin_batch_resource" and \ isinstance(grandparents, list): for gp_origin, grandparent in grandparents: if gp_origin == "dataset": break else: _, grandparent = grandparents grandparent = self.get_resource(grandparent) # options common to all model types call = "update" if origin == "origin_batch_resource" else "create" u.common_dataset_opts(child, grandparent, opts, call=call) # update options dataset_defaults = DEFAULTS["dataset"].get("update", {}) for attribute, default_value in dataset_defaults.items(): opts["update"].update( u.default_setting(child, attribute, *default_value)) # name, exclude automatic naming alternatives autonames = [u''] u.non_automatic_name(child, opts, autonames=autonames) # objective field resource_fields = Fields( {'resource': child['resource'], 'object': child}) objective_id = child['objective_field']['id'] preferred_fields = resource_fields.preferred_fields() # if there's no preferred fields, use the fields structure if len(preferred_fields.keys()) == 0: preferred_fields = resource_fields.fields max_column = sorted([field['column_number'] for _, field in preferred_fields.items() if field['optype'] != "text"], reverse=True)[0] objective_column = resource_fields.fields[objective_id][ \ 'column_number'] if objective_column != max_column: opts['create'].update({"objective_field": {"id": objective_id}}) if origin != "origin_batch_resource": # resize if (child['size'] != grandparent['size'] and get_resource_type(parent) == 'source'): opts['create'].update({"size": child['size']}) # generated fields if child.get('new_fields', None): new_fields = child['new_fields'] for new_field in new_fields: new_field['field'] = new_field['generator'] del new_field['generator'] opts['create'].update({"new_fields": new_fields}) u.range_opts(child, grandparent, opts) # for batch_predictions, batch_clusters, batch_anomalies generated # datasets, attributes cannot be set at creation time, so we # must update the resource instead suffix = None if origin == "origin_batch_resource": opts["update"].update(opts["create"]) opts["create"] = {} suffix = "['object']['output_dataset_resource']" calls = u.build_calls(resource_id, [parent_id], opts, suffix=suffix) self.add(resource_id, calls)