def create_execution(self,
                         origin_resource,
                         args=None,
                         wait_time=3,
                         retries=10):
        """Creates an execution from a `script` or a list of `scripts`.

        """

        create_args = {}
        if args is not None:
            create_args.update(args)

        if (isinstance(origin_resource, str)
                or isinstance(origin_resource, dict)):
            # single script
            scripts = [origin_resource]
        else:
            scripts = origin_resource
        try:
            script_ids = [get_script_id(script) for script in scripts]
        except TypeError:
            raise Exception("A script id or a list of them is needed to create"
                            " a script execution. %s found." %
                            get_resource_type(origin_resource))

        if all([
                get_resource_type(script_id) == SCRIPT_PATH
                for script_id in script_ids
        ]):
            for script in scripts:
                check_resource(script,
                               query_string=TINY_RESOURCE,
                               wait_time=wait_time,
                               retries=retries,
                               raise_on_error=True,
                               api=self)
        else:
            raise Exception("A script id or a list of them is needed to create"
                            " a script execution. %s found." %
                            get_resource_type(origin_resource))

        if len(scripts) > 1:
            create_args.update({"scripts": script_ids})
        else:
            create_args.update({"script": script_ids[0]})

        body = json.dumps(create_args)
        return self._create(self.execution_url, body)
Example #2
0
    def create_anomaly_score(self,
                             anomaly,
                             input_data=None,
                             args=None,
                             wait_time=3,
                             retries=10):
        """Creates a new anomaly score.

        """
        anomaly_id = None
        resource_type = get_resource_type(anomaly)
        if resource_type == ANOMALY_PATH:
            anomaly_id = get_anomaly_id(anomaly)
            check_resource(anomaly_id,
                           query_string=TINY_RESOURCE,
                           wait_time=wait_time,
                           retries=retries,
                           raise_on_error=True,
                           api=self)
        else:
            raise Exception("An anomaly detector id is needed to create an"
                            " anomaly score. %s found." % resource_type)

        if input_data is None:
            input_data = {}
        create_args = {}
        if args is not None:
            create_args.update(args)
        create_args.update({"input_data": input_data})
        create_args.update({"anomaly": anomaly_id})

        body = json.dumps(create_args)
        return self._create(self.anomaly_score_url, body, verify=self.verify)
Example #3
0
    def get_fields(self, resource):
        """Retrieve fields used by a resource.

        Returns a dictionary with the fields that uses
        the resource keyed by Id.

        """

        if isinstance(resource, dict) and 'resource' in resource:
            resource_id = resource['resource']
        elif isinstance(resource, str) and get_resource_type(resource) \
                in RESOURCES_WITH_FIELDS:
            resource_id = resource
            resource = self.retrieve_resource(resource,
                                              query_string=ALL_FIELDS)
        else:
            LOGGER.error("Wrong resource id")
            return
        # Tries to extract fields information from resource dict. If it fails,
        # a get remote call is used to retrieve the resource by id.
        fields = None
        try:
            fields = get_fields(resource)
        except KeyError:
            resource = self._get("%s%s" % (self.url, resource_id))
            fields = get_fields(resource)

        return fields
    def create_correlation(self, dataset, args=None, wait_time=3, retries=10):
        """Creates a correlation from a `dataset`.

        """
        dataset_id = None
        resource_type = get_resource_type(dataset)
        if resource_type == DATASET_PATH:
            dataset_id = get_dataset_id(dataset)
            check_resource(dataset_id,
                           query_string=TINY_RESOURCE,
                           wait_time=wait_time,
                           retries=retries,
                           raise_on_error=True,
                           api=self)
        else:
            raise Exception("A dataset id is needed to create a"
                            " correlation. %s found." % resource_type)

        create_args = {}
        if args is not None:
            create_args.update(args)
        create_args.update({"dataset": dataset_id})

        body = json.dumps(create_args)
        return self._create(self.correlation_url, body)
Example #5
0
    def create_projection(self, pca, input_data=None,
                          args=None, wait_time=3, retries=10):
        """Creates a new projection.
           The pca parameter can be a pca resource or ID

        """
        pca_id = None

        resource_type = get_resource_type(pca)
        if resource_type != PCA_PATH:
            raise Exception("A PCA resource id is needed"
                            " to create a projection. %s found." %
                            resource_type)

        pca_id = get_resource_id(pca)
        if pca_id is not None:
            check_resource(pca_id,
                           query_string=TINY_RESOURCE,
                           wait_time=wait_time, retries=retries,
                           raise_on_error=True, api=self)

        if input_data is None:
            input_data = {}
        create_args = {}
        if args is not None:
            create_args.update(args)
        create_args.update({
            "input_data": input_data})
        if pca_id is not None:
            create_args.update({
                "pca": pca_id})

        body = json.dumps(create_args)
        return self._create(self.projection_url, body,
                            verify=self.verify)
Example #6
0
    def create_topic_distribution(self, topic_model, input_data=None,
                                  args=None, wait_time=3, retries=10):
        """Creates a new topic distribution.

        """
        topic_model_id = get_topic_model_id(topic_model)
        if topic_model_id is not None:
            check_resource(topic_model_id,
                           query_string=TINY_RESOURCE,
                           wait_time=wait_time, retries=retries,
                           raise_on_error=True, api=self)
        else:
            resource_type = get_resource_type(topic_model)
            raise Exception("A topic model id is needed to create a"
                            " topic distribution. %s found." % resource_type)

        if input_data is None:
            input_data = {}
        create_args = {}
        if args is not None:
            create_args.update(args)
        create_args.update({
            "input_data": input_data,
            "topicmodel": topic_model_id})

        body = json.dumps(create_args)
        return self._create(self.topic_distribution_url, body,
                            verify=self.verify_prediction)
Example #7
0
def check_model_fields(model):
    """Checks the model structure to see whether it contains the required
    fields information

    """
    inner_key = FIELDS_PARENT.get(get_resource_type(model), 'model')
    if check_model_structure(model, inner_key):
        model = model.get('object', model)
        fields = model.get("fields", model.get(inner_key, {}).get('fields'))
        input_fields = model.get("input_fields")
        # models only need model_fields to work. The rest of resources will
        # need all fields to work
        model_fields = list(model.get(inner_key, {}).get( \
            'model_fields', {}).keys())
        # fusions don't have input fields
        if input_fields is None and inner_key != "fusion":
            return False
        if not model_fields:
            fields_meta = model.get('fields_meta', \
                model.get(inner_key, {}).get('fields_meta', {}))
            try:
                return fields_meta['count'] == fields_meta['total']
            except KeyError:
                # stored old models will not have the fields_meta info, so
                # we return True to avoid failing in this case
                return True
        else:
            if fields is None:
                return False
            return all([field_id in list(fields.keys()) \
                for field_id in model_fields])
    return False
Example #8
0
    def create_forecast(self, time_series, input_data=None,
                        args=None, wait_time=3, retries=10):
        """Creates a new forecast.

        """
        time_series_id = get_time_series_id(time_series)
        resource_type = get_resource_type(time_series_id)
        if resource_type == TIME_SERIES_PATH and time_series_id is not None:
            check_resource(time_series_id,
                           query_string=TINY_RESOURCE,
                           wait_time=wait_time, retries=retries,
                           raise_on_error=True, api=self)
        else:
            raise Exception("A time series model id is needed to create a"
                            " forecast. %s found." % resource_type)

        if input_data is None:
            input_data = {}
        create_args = {}
        if args is not None:
            create_args.update(args)
        create_args.update({
            "input_data": input_data})
        if time_series_id is not None:
            create_args.update({
                "timeseries": time_series_id})

        body = json.dumps(create_args)
        return self._create(self.forecast_url, body,
                            verify=self.verify_prediction)
    def create_association_set(self, association, input_data=None,
                               args=None, wait_time=3, retries=10):
        """Creates a new association set.

        """
        association_id = None
        resource_type = get_resource_type(association)
        if resource_type == ASSOCIATION_PATH:
            association_id = get_association_id(association)
            check_resource(association_id,
                           query_string=TINY_RESOURCE,
                           wait_time=wait_time, retries=retries,
                           raise_on_error=True, api=self)
        else:
            raise Exception("A association id is needed to create an"
                            " association set. %s found." % resource_type)

        if input_data is None:
            input_data = {}
        create_args = {}
        if args is not None:
            create_args.update(args)
        create_args.update({
            "input_data": input_data})
        create_args.update({
            "association": association_id})

        body = json.dumps(create_args)
        return self._create(self.association_set_url, body, verify=self.verify)
Example #10
0
    def create_centroid(self, cluster, input_data=None,
                        args=None, wait_time=3, retries=10):
        """Creates a new centroid.

        """
        cluster_id = None
        resource_type = get_resource_type(cluster)
        if resource_type == CLUSTER_PATH:
            cluster_id = get_cluster_id(cluster)
            check_resource(cluster_id,
                           query_string=TINY_RESOURCE,
                           wait_time=wait_time, retries=retries,
                           raise_on_error=True, api=self)
        else:
            raise Exception("A cluster id is needed to create a"
                            " centroid. %s found." % resource_type)

        if input_data is None:
            input_data = {}
        create_args = {}
        if args is not None:
            create_args.update(args)
        create_args.update({
            "input_data": input_data})
        create_args.update({
            "cluster": cluster_id})

        body = json.dumps(create_args)
        return self._create(self.centroid_url, body,
                            verify=self.verify)
Example #11
0
    def delete(self, resource, **kwargs):
        """Method to delete resources

        """
        try:
            resource_type = get_resource_type(resource)
            return self.deleters[resource_type](resource, **kwargs)
        except KeyError:
            raise ValueError("%s is not a resource." % resource)
Example #12
0
    def create_model(self, origin_resource, args=None, wait_time=3, retries=10):
        """Creates a model from an origin_resource.

        Uses a remote resource to create a new model using the
        arguments in `args`.
        The allowed remote resources can be:
            - dataset
            - list of datasets
            - cluster
        In the case of using cluster id as origin_resource, a centroid must
        also be provided in the args argument. The first centroid is used
        otherwise.

        """

        create_args = {}
        if args is not None:
            create_args.update(args)
        if isinstance(origin_resource, list):
            # mutidatasets
            create_args = self._set_create_from_datasets_args(
                origin_resource, args=create_args, wait_time=wait_time,
                retries=retries)
        else:
            resource_type = get_resource_type(origin_resource)
            # model from cluster and centroid
            if resource_type == CLUSTER_PATH:
                cluster_id = get_cluster_id(origin_resource)
                cluster = check_resource(cluster_id,
                                         query_string=TINY_RESOURCE,
                                         wait_time=wait_time,
                                         retries=retries,
                                         raise_on_error=True, api=self)
                if 'centroid' not in create_args:
                    try:
                        centroid = list(cluster['object'][
                            'cluster_models'].keys())[0]
                        create_args.update({'centroid': centroid})
                    except KeyError:
                        raise KeyError("Failed to generate the model. A "
                                       "centroid id is needed in the args "
                                       "argument to generate a model from "
                                       "a cluster.")
                create_args.update({'cluster': cluster_id})
            elif resource_type == DATASET_PATH:
                create_args = self._set_create_from_datasets_args(
                    origin_resource, args=create_args, wait_time=wait_time,
                    retries=retries)
            else:
                raise Exception("A dataset, list of dataset ids"
                                " or cluster id plus centroid id are needed"
                                " to create a"
                                " dataset. %s found." % resource_type)

        body = json.dumps(create_args)
        return self._create(self.model_url, body)
Example #13
0
def check_model_structure(model, inner_key=None):
    """Checks the model structure to see if it contains all the
    main expected keys

    """
    if inner_key is None:
        inner_key = FIELDS_PARENT.get(get_resource_type(model), 'model')
    return (isinstance(model, dict) and 'resource' in model
            and model['resource'] is not None
            and (('object' in model and inner_key in model['object'])
                 or inner_key in model))
Example #14
0
    def create_script(self,
                      source_code=None,
                      args=None,
                      wait_time=3,
                      retries=10):
        """Creates a whizzml script from its source code. The `source_code`
           parameter can be a:
            {script ID}: the ID for an existing whizzml script
            {path}: the path to a file containing the source code
            {string} : the string containing the source code for the script

        """
        create_args = {}
        if args is not None:
            create_args.update(args)

        if source_code is None:
            raise Exception('A valid code string'
                            ' or a script id must be provided.')
        resource_type = get_resource_type(source_code)
        if resource_type == SCRIPT_PATH:
            script_id = get_script_id(source_code)
            if script_id:
                check_resource(script_id,
                               query_string=TINY_RESOURCE,
                               wait_time=wait_time,
                               retries=retries,
                               raise_on_error=True,
                               api=self)
                create_args.update({"origin": script_id})
        elif isinstance(source_code, str):
            if is_url(source_code):
                script_args = retrieve_script_args(source_code)
                source_code = script_args.get("source_code")
                create_args.update(json.loads(script_args.get("json")))
            else:
                try:
                    if os.path.exists(source_code):
                        with open(source_code) as code_file:
                            source_code = code_file.read()
                except IOError:
                    raise IOError("Could not open the source code file %s." %
                                  source_code)
            create_args.update({"source_code": source_code})
        else:
            raise Exception("A script id or a valid source code"
                            " is needed to create a"
                            " script. %s found." % resource_type)

        body = json.dumps(create_args)
        return self._create(self.script_url, body)
Example #15
0
    def get(self, resource, **kwargs):
        """Method to get resources

        """
        finished = kwargs.get('finished', True)
        get_kwargs = filter_kwargs(kwargs, ['finished'])
        try:
            resource_type = get_resource_type(resource)
            resource_info = self.getters[resource_type](resource, **get_kwargs)
        except KeyError:
            raise ValueError("%s is not a resource or ID." % resource)
        if finished:
            ok_kwargs = filter_kwargs(kwargs, ['query_string'])
            ok_kwargs.update({"error_retries": 5})
            self.ok(resource_info, **ok_kwargs)
        return resource_info
Example #16
0
    def create_prediction(self,
                          model,
                          input_data=None,
                          args=None,
                          wait_time=3,
                          retries=10):
        """Creates a new prediction.
           The model parameter can be:
            - a simple tree model
            - a simple logistic regression model
            - an ensemble
            - a deepnet
            . a linear regression
            - a fusion
           Note that the old `by_name` argument has been deprecated.

        """
        model_id = None

        resource_type = get_resource_type(model)
        if resource_type not in SUPERVISED_PATHS:
            raise Exception("A supervised model resource id is needed"
                            " to create a prediction. %s found." %
                            resource_type)

        model_id = get_resource_id(model)
        if model_id is not None:
            check_resource(model_id,
                           query_string=TINY_RESOURCE,
                           wait_time=wait_time,
                           retries=retries,
                           raise_on_error=True,
                           api=self)

        if input_data is None:
            input_data = {}
        create_args = {}
        if args is not None:
            create_args.update(args)
        create_args.update({"input_data": input_data})
        if model_id is not None:
            create_args.update({"model": model_id})

        body = json.dumps(create_args)
        return self._create(self.prediction_url,
                            body,
                            verify=self.verify_prediction)
Example #17
0
    def create_library(self,
                       source_code=None,
                       args=None,
                       wait_time=3,
                       retries=10):
        """Creates a whizzml library from its source code. The `source_code`
           parameter can be a:
            {library ID}: the ID for an existing whizzml library
            {path}: the path to a file containing the source code
            {string} : the string containing the source code for the library

        """
        create_args = {}
        if args is not None:
            create_args.update(args)

        if source_code is None:
            raise Exception('A valid code string'
                            ' or a library id must be provided.')
        resource_type = get_resource_type(source_code)
        if resource_type == LIBRARY_PATH:
            library_id = get_library_id(source_code)
            if library_id:
                check_resource(library_id,
                               query_string=TINY_RESOURCE,
                               wait_time=wait_time,
                               retries=retries,
                               raise_on_error=True,
                               api=self)
                create_args.update({"origin": library_id})
        elif isinstance(source_code, str):
            try:
                if os.path.exists(source_code):
                    with open(source_code) as code_file:
                        source_code = code_file.read()
            except IOError:
                raise IOError("Could not open the source code file %s." %
                              source_code)
            create_args.update({"source_code": source_code})
        else:
            raise Exception("A library id or a valid source code"
                            " is needed to create a"
                            " library. %s found." % resource_type)

        body = json.dumps(create_args)
        return self._create(self.library_url, body)
Example #18
0
def get_fields(resource):
    """Returns the field information in a resource dictionary structure

    """
    try:
        resource_type = get_resource_type(resource)
    except ValueError:
        raise ValueError("Unknown resource structure. Failed to find"
                         " a valid resource dictionary as argument.")

    if resource_type in RESOURCES_WITH_FIELDS:
        resource = resource.get('object', resource)
        # fields structure
        if resource_type in list(FIELDS_PARENT.keys()):
            fields = resource[FIELDS_PARENT[resource_type]].get('fields', {})
        else:
            fields = resource.get('fields', {})

        if resource_type == SAMPLE_PATH:
            fields = dict([(field['id'], field) for field in fields])
    return fields
Example #19
0
    def __init__(self,
                 fields,
                 objective_id=None,
                 data_locale=None,
                 missing_tokens=None,
                 terms=False,
                 categories=False,
                 numerics=False):
        if isinstance(fields, dict):
            try:
                self.objective_id = objective_id
                self.uniquify_varnames(fields)
                self.inverted_fields = invert_dictionary(fields)
                self.fields = {}
                self.fields.update(fields)
                if not (hasattr(self, "input_fields") and self.input_fields):
                    self.input_fields = [field_id for field_id, field in \
                        sorted(list(self.fields.items()),
                               key=lambda x: x[1].get("column_number")) \
                        if not self.objective_id or \
                        field_id != self.objective_id]
                self.model_fields = {}
                self.datetime_parents = []
                self.model_fields.update(
                    dict([(field_id, field) for field_id, field in \
                    list(self.fields.items()) if field_id in self.input_fields and \
                    self.fields[field_id].get("preferred", True)]))
                # if any of the model fields is a generated datetime field
                # we need to add the parent datetime field
                self.model_fields = self.add_datetime_parents()
                self.data_locale = data_locale
                self.missing_tokens = missing_tokens
                if self.data_locale is None:
                    self.data_locale = DEFAULT_LOCALE
                if self.missing_tokens is None:
                    self.missing_tokens = DEFAULT_MISSING_TOKENS
                if terms:
                    # adding text and items information to handle terms
                    # expansion
                    self.term_forms = {}
                    self.tag_clouds = {}
                    self.term_analysis = {}
                    self.items = {}
                    self.item_analysis = {}
                if categories:
                    self.categories = {}
                if terms or categories or numerics:
                    self.add_terms(categories, numerics)

                if self.objective_id is not None and \
                        hasattr(self, "resource_id") and self.resource_id and \
                        get_resource_type(self.resource_id) != ENSEMBLE_PATH:
                    # Only for models. Ensembles need their own logic
                    self.regression = \
                        (not hasattr(self, "boosting") or not self.boosting) \
                        and self.fields[self.objective_id][ \
                        'optype'] == NUMERIC \
                        or (hasattr(self, "boosting") and self.boosting and \
                        self.boosting.get("objective_class") is None)

            except KeyError:
                raise Exception("Wrong field structure.")
Example #20
0
    def create_dataset(self,
                       origin_resource,
                       args=None,
                       wait_time=3,
                       retries=10):
        """Creates a remote dataset.

        Uses a remote resource to create a new dataset using the
        arguments in `args`.
        The allowed remote resources can be:
            - source
            - dataset
            - list of datasets
            - cluster
        In the case of using cluster id as origin_resources, a centroid must
        also be provided in the args argument. The first centroid is used
        otherwise.
        If `wait_time` is higher than 0 then the dataset creation
        request is not sent until the `source` has been created successfuly.

        """
        create_args = {}
        if args is not None:
            create_args.update(args)

        if isinstance(origin_resource, list):
            # mutidatasets
            create_args = self._set_create_from_datasets_args(
                origin_resource,
                args=create_args,
                wait_time=wait_time,
                retries=retries,
                key="origin_datasets")
        else:
            # dataset from source
            resource_type = get_resource_type(origin_resource)
            if resource_type == SOURCE_PATH:
                source_id = get_source_id(origin_resource)
                if source_id:
                    check_resource(source_id,
                                   query_string=TINY_RESOURCE,
                                   wait_time=wait_time,
                                   retries=retries,
                                   raise_on_error=True,
                                   api=self)
                    create_args.update({"source": source_id})
            # dataset from dataset
            elif resource_type == DATASET_PATH:
                create_args = self._set_create_from_datasets_args(
                    origin_resource,
                    args=create_args,
                    wait_time=wait_time,
                    retries=retries,
                    key="origin_dataset")
            # dataset from cluster and centroid
            elif resource_type == CLUSTER_PATH:
                cluster_id = get_cluster_id(origin_resource)
                cluster = check_resource(cluster_id,
                                         query_string=TINY_RESOURCE,
                                         wait_time=wait_time,
                                         retries=retries,
                                         raise_on_error=True,
                                         api=self)
                if 'centroid' not in create_args:
                    try:
                        centroid = list(cluster['object']
                                        ['cluster_datasets_ids'].keys())[0]
                        create_args.update({'centroid': centroid})
                    except KeyError:
                        raise KeyError("Failed to generate the dataset. A "
                                       "centroid id is needed in the args "
                                       "argument to generate a dataset from "
                                       "a cluster.")
                create_args.update({'cluster': cluster_id})
            else:
                raise Exception("A source, dataset, list of dataset ids"
                                " or cluster id plus centroid id are needed"
                                " to create a"
                                " dataset. %s found." % resource_type)

        body = json.dumps(create_args)
        return self._create(self.dataset_url, body)