def _handle_ok(self, response, response_json):
        if not response_json:
            return FireflyResponse(headers=response.headers,
                                   status_code=response.status_code)

        if 'result' not in response_json:
            response_json = {'result': response_json}

        response_type = type(response_json['result'])
        if response_type == dict:
            result = FireflyResponse(data=response_json.get(
                'result', response_json),
                                     headers=response.headers,
                                     status_code=response.status_code)
        elif response_type == bool:
            result = FireflyResponse(data=response_json,
                                     headers=response.headers,
                                     status_code=response.status_code)
        elif response_type == int:
            result = FireflyResponse(data={'id': response_json['result']},
                                     headers=response.headers,
                                     status_code=response.status_code)
        else:
            result = FireflyResponse(data=response_json,
                                     headers=response.headers,
                                     status_code=response.status_code)
        return result
Ejemplo n.º 2
0
def authenticate(username: str, password: str) -> FireflyResponse:
    """
    Authenticates user and stores temporary token in `fireflyai.token`.

    Other modules automatically detect if a token exists and use it, unless a user specifically provides a token
    for a specific request.
    The token is valid for a 24-hour period, after which this method needs to be called again in order to generate
    a new token.

    Args:
        username (str): Username.
        password (str): Password.

    Returns:
        FireflyResponse: Empty FireflyResponse if successful, raises FireflyError otherwise.
    """
    url = 'login'

    requestor = APIRequestor()
    response = requestor.post(url,
                              body={
                                  'username': username,
                                  'password': password,
                                  'tnc': None
                              },
                              api_key="")
    fireflyai.token = response['token']
    return FireflyResponse(status_code=response.status_code,
                           headers=response.headers)
Ejemplo n.º 3
0
    def impersonate(cls,
                    user_id: str = None,
                    admin_token: str = None) -> FireflyResponse:
        """
       impersonate user and stores temporary token in `fireflyai.token`

       Args:
           user_id (str): user ID.
           email (str): User email.
           admin_token (str): Admin user token.

       Returns:
           FireflyResponse: Empty FireflyResponse if successful, raises FireflyError otherwise.
       """
        logging.info(' '.join(['impersonate to user_ID:', str(user_id)]))
        url = ''.join(['users/login_as/', str(user_id)])
        requester = APIRequestor()
        try:
            response = requester.post(url, api_key=admin_token)
            fireflyai.token = response['result']
            my_token = UserToken(fireflyai.token)
            logging.info(' '.join([
                'user ID:',
                str(user_id), '- Login successful with Account ID: ' +
                str(my_token.get_account_id())
            ]))
        except Exception as ex:
            logging.warning(' '.join(
                ['user ID:', str(user_id), '- Login Failed']))
            raise ex

        return FireflyResponse(status_code=response.status_code,
                               headers=response.headers)
Ejemplo n.º 4
0
 def _get_available_configuration_options(cls, id: int, inter_level: InterpretabilityLevel = None,
                                          api_key: str = None) -> FireflyResponse:
     inter_level = inter_level.value if inter_level is not None else None
     requestor = APIRequestor()
     url = "tasks/configuration/options"
     response = requestor.get(url=url, params={'dataset_id': id, 'interpretable': inter_level}, api_key=api_key)
     new_data = {
         'estimators': [Estimator(e) for e in response['estimators']],
         'target_metric': [TargetMetric(e) for e in response['target_metric']],
         'splitting_strategy': [SplittingStrategy(e) for e in response['splitting_strategy']],
         'pipeline': [Pipeline(e) for e in response['pipeline']],
     }
     return FireflyResponse(data=new_data)
Ejemplo n.º 5
0
    def refit(cls,
              id: int,
              datasource_id: int,
              wait: bool = False,
              api_key: str = None) -> FireflyResponse:
        """
        Refits the chosen Ensemble of a Task on a specific Datasource.

        A refit trains the chosen Ensemble's models with the data of the given Datasource. The model training is done
        from scratch and uses all the given data. A new Ensemble is created that is made of all the refitted models of
        the chosen Ensemble and their original combination.

        Args:
            id (int): Task ID.
            datasource_id (int): Datasource ID.
            wait (Optional[bool]): Should the call be synchronous or not.
            api_key (Optional[str]): Explicit api_key, not required if `fireflyai.authenticate` was run prior.

        Returns:
            FireflyResponse: Ensemble ID, if successful and wait=False or Ensemble if successful and wait=True;
            raises FireflyError otherwise.
        """
        data = {
            "datasource_id": datasource_id,
        }

        ensemble_id = cls.get(id=id, api_key=api_key).get('ensemble_id', None)
        if not ensemble_id:
            raise InvalidRequestError(
                message="No ensemble exists for this Task.")

        requestor = APIRequestor()
        url = "ensembles/{ensemble_id}/refit".format(ensemble_id=ensemble_id)
        response = requestor.post(url=url, body=data, api_key=api_key)
        new_ens_id = response.get('ensemble_id')

        if wait:
            utils.wait_for_finite_state(fireflyai.Ensemble.get,
                                        new_ens_id,
                                        api_key=api_key)
            response = fireflyai.Ensemble.get(new_ens_id, api_key=api_key)
        else:
            response = FireflyResponse(data={'id': new_ens_id},
                                       headers=response.headers,
                                       status_code=response.status_code)

        return response
    def create_from_dataframe(cls,
                              df,
                              data_source_name: str,
                              na_values: List[str] = None,
                              wait: bool = False,
                              skip_if_exists: bool = False,
                              api_key: str = None) -> FireflyResponse:
        """
        Creates a Datasource from pandas DataFrame.

        Args:
            df (pandas.DataFrame): DataFrame object to upload to server.
            data_source_name (str): Name of the Datasource.
            na_values (Optional[List[str]]): List of user specific Null values.
            wait (Optional[bool]): Should the call be synchronous or not.
            skip_if_exists (Optional[bool]): Check if a Datasource with same name exists and skip if true.
            api_key (Optional[str]): Explicit `api_key`, not required, if `fireflyai.authenticate()` was run prior.

        Returns:
            FireflyResponse: Datasource ID, if successful and wait=False or Datasource if successful and wait=True;
            raises FireflyError otherwise.
        """
        data_source_name = data_source_name if data_source_name.endswith(
            '.csv') else data_source_name + ".csv"
        existing_ds = cls.list(filter_={'name': [data_source_name]},
                               api_key=api_key)
        if existing_ds and existing_ds['total'] > 0:
            if skip_if_exists:
                return FireflyResponse(data=existing_ds['hits'][0])
            else:
                raise APIError("Datasource with that name exists")

        csv_buffer = io.StringIO()
        df.to_csv(csv_buffer, index=False)

        aws_credentials = cls.__get_upload_details(api_key=api_key)
        utils.s3_upload_stream(csv_buffer, data_source_name, aws_credentials)

        return cls._create(data_source_name,
                           na_values=na_values,
                           wait=wait,
                           api_key=api_key)
Ejemplo n.º 7
0
    def login(cls, email: str, password: str) -> FireflyResponse:
        """
        Authenticates user and stores temporary token in `fireflyai.token`.

        Other modules automatically detect if a token exists and use it, unless a user specifically provides a token
        for a specific request.
        The token is valid for a 24-hour period, after which this method needs to be called again in order to generate
        a new token.

        Args:
            email (str): email.
            password (str): Password.

        Returns:
            FireflyResponse: Empty FireflyResponse if successful, raises FireflyError otherwise.
        """
        logging.info(' '.join(['login with user:'******'login'
        requestor = APIRequestor()
        try:
            response = requestor.post(url,
                                      body={
                                          'username': email,
                                          'password': password,
                                          'tnc': None
                                      },
                                      api_key="")
            fireflyai.token = response['token']
            my_token = UserToken(fireflyai.token)
            logging.info(' '.join([
                'user mail:',
                str(email), '- Login successful, User ID: ',
                str(my_token.get_user_id()),
                ' Account ID: ' + str(my_token.get_account_id())
            ]))
        except Exception as ex:
            logging.warning(' '.join(
                ['user mail:', str(email), '- Login FAILED']))
            raise ex

        return FireflyResponse(status_code=response.status_code,
                               headers=response.headers)
Ejemplo n.º 8
0
    def get_by_name(cls, name: str, api_key: str = None) -> FireflyResponse:
        """
        Gets information on a specific Dataset identified by its name.

        Information includes the state of the Dataset and other attributes.
        Similar to calling `fireflyai.Dataset.list(filters_={'name': [NAME]})`.

        Args:
            name (str): Dataset name.
            api_key (Optional[str]): Explicit api_key, not required if `fireflyai.authenticate` was run prior.

        Returns:
            FireflyResponse: Information about the Dataset.
        """
        resp = cls.list(filter_={'name': [name]}, api_key=api_key)
        if resp and 'total' in resp and resp['total'] > 0:
            ds = resp['hits'][0]
            return FireflyResponse(data=ds)
        else:
            raise APIError("Dataset with that name does not exist")
    def get_feature_importance_report(cls,
                                      id: int,
                                      api_key: str = None) -> FireflyResponse:
        """
        Gets feature importance report for Ensemble.

        Args:
            id (int): Ensemble ID.
            api_key (Optional[str]): Explicit api_key, not required if `fireflyai.authenticate` was run prior.

        Returns:
            FireflyResponse: Contains mapping of feature importance for the ensemble_id.
        """
        requestor = APIRequestor()
        url = "reports/{prefix}/{id}/feature_importance".format(
            prefix=cls._CLASS_PREFIX, id=id)
        response = requestor.get(url=url, api_key=api_key)
        result = response.to_dict()
        cls.__cleanup_report(result)
        return FireflyResponse(data=result)
Ejemplo n.º 10
0
    def create(cls, ensemble_id: int, data_id: int = None, file_path: str = None, download_details: Dict = None,
               remove_header: bool = False,
               data_name: str = None, header: List = None, wait: bool = None, api_key: str = None) -> FireflyResponse:
        """
        Create a prediction from a given ensemble and prediction datasource.

        The prediction datasource should include all the of original features, without the target column (unless the
        ensemble belongs to a timeseries task).
        The prediction uses the ensemble to produce the prediction's results file.

        Args:
            ensemble_id (int): Ensemble to use for the prediction.
            data_id (int): Datasource to run the prediction on.
            wait (Optional[bool]): Should the call be synchronous or not.
            api_key (Optional[str]): Explicit api_key, not required if `fireflyai.authenticate` was run prior.

        Returns:
            FireflyResponse: Prediction ID, if successful and wait=False or Prediction if successful and wait=True;
            raises FireflyError otherwise.
        """
        data_name = data_name or os.path.basename(file_path) if file_path else None
        data = {
            "ensemble_id": ensemble_id,
            "datasource_id": data_id,
            "header": header,
            "data_name": data_name,
            "file_path": file_path,
            "remove_header": remove_header,
        }
        if download_details:
            data['download_details'] = download_details
        requestor = APIRequestor()
        response = requestor.post(url=cls._CLASS_PREFIX, body=data, api_key=api_key)
        id = response['id']
        if wait:
            utils.wait_for_finite_state(cls.get, id, state_field='stage', api_key=api_key)
            response = cls.get(id, api_key=api_key)
        else:
            response = FireflyResponse(data={'id': id})

        return response
Ejemplo n.º 11
0
    def get_model_sensitivity_report(cls,
                                     id: int,
                                     api_key: str = None) -> FireflyResponse:
        """
        Gets sensitivity report for Ensemble.

        Contains each feature's sensitivity score for missing values and feature values.

        Args:
            id (int): Ensemble ID.
            api_key (Optional[str]): Explicit api_key, not required if `fireflyai.authenticate` was run prior.

        Returns:
            FireflyResponse: Score for each feature in every sensitivity test.
        """
        requestor = APIRequestor()
        url = "reports/{prefix}/{id}/sensitivity".format(
            prefix=cls._CLASS_PREFIX, id=id)
        response = requestor.get(url=url, api_key=api_key)
        result = response.to_dict()
        cls.__cleanup_report(result)
        return FireflyResponse(data=result)
    def create(cls,
               filename: str,
               na_values: List[str] = None,
               wait: bool = False,
               skip_if_exists: bool = False,
               api_key: str = None) -> FireflyResponse:
        """
        Uploads a file to the server to creates a new Datasource.

        Args:
            filename (str): File to be uploaded.
            na_values (Optional[List[str]]): List of user specific Null values.
            wait (Optional[bool]): Should the call be synchronous or not.
            skip_if_exists (Optional[bool]): Check if a Datasource with same name exists and skip if true.
            api_key (Optional[str]): Explicit api_key, not required if `fireflyai.authenticate` was run prior.

        Returns:
            FireflyResponse: Datasource ID, if successful and wait=False or Datasource if successful and wait=True;
            raises FireflyError otherwise.
        """
        data_source_name = os.path.basename(filename)

        existing_ds = cls.list(filter_={'name': [data_source_name]},
                               api_key=api_key)
        if existing_ds and existing_ds['total'] > 0:
            if skip_if_exists:
                return FireflyResponse(data=existing_ds['hits'][0])
            else:
                raise InvalidRequestError(
                    "Datasource with that name already exists")

        aws_credentials = cls.__get_upload_details(api_key=api_key)
        utils.s3_upload(data_source_name, filename, aws_credentials.to_dict())

        return cls._create(data_source_name,
                           na_values=na_values,
                           wait=wait,
                           api_key=api_key)
Ejemplo n.º 13
0
    def create(cls, datasource_id: int, dataset_name: str, target: str, problem_type: ProblemType, header: bool = True,
               na_values: List[str] = None, retype_columns: Dict[str, FeatureType] = None,
               rename_columns: List[str] = None, datetime_format: str = None, time_axis: str = None,
               block_id: List[str] = None, sample_id: List[str] = None, subdataset_id: List[str] = None,
               sample_weight: List[str] = None, not_used: List[str] = None, hidden: List[str] = False,
               wait: bool = False, skip_if_exists: bool = False, api_key: str = None) -> FireflyResponse:
        """
        Creates and prepares a Dataset.

        While creating a Dataset, the feature roles are labeled and the feature types can be set by the user.
        Data analysis is done in order to optimize model training and search process.

        Args:
            datasource_id (int): Datasource ID.
            dataset_name (str): The name of the Dataset.
            target (str): The name of the target feature, or its column index if header=False.
            problem_type (ProblemType): The problem type.
            header (bool): Does the file include a header row or not.
            na_values (Optional[List[str]]): List of user specific Null values.
            retype_columns (Dict[str, FeatureType]): Change the types of certain columns.
            rename_columns (Optional[List[str]]): ??? #TODO
            datetime_format (Optional[str]): The datetime format used in the data.
            time_axis (Optional[str]): In timeseries problems, the feature that is the time axis.
            block_id (Optional[List[str]]): To avoid data leakage, data can be split into blocks. Rows with the same
                `block_id`, must all be in the train set or the test set. Requires at least 50 unique values in the data.
            sample_id (Optional[List[str]]): Row identifier.
            subdataset_id (Optional[List[str]]): Features which specify a subdataset ID in the data.
            sample_weight (Optional[List[str]]): ??? #TODO
            not_used (Optional[List[str]]): List of features to ignore.
            hidden (Optional[List[str]]): List of features to mark as hidden.
            wait (Optional[bool]): Should the call be synchronous or not.
            skip_if_exists (Optional[bool]): Check if a Dataset with same name exists and skip if true.
            api_key (Optional[str]): Explicit `api_key`, not required, if `fireflyai.authenticate()` was run prior.

        Returns:
            FireflyResponse: Dataset ID, if successful and wait=False or Dataset if successful and wait=True;
            raises FireflyError otherwise.
        """
        existing_ds = cls.list(filter_={'name': [dataset_name]}, api_key=api_key)
        if existing_ds and existing_ds['total'] > 0:
            if skip_if_exists:
                return FireflyResponse(data=existing_ds['hits'][0])
            else:
                raise InvalidRequestError("Dataset with that name already exists")

        data = {
            "name": dataset_name,
            "data_id": datasource_id,
            "header": header,
            "problem_type": problem_type.value if problem_type is not None else None,
            "hidden": hidden,
            "na_values": na_values,
            "retype_columns": {key: retype_columns[key].value for key in
                               retype_columns} if retype_columns is not None else None,
            "datetime_format": datetime_format,
            "target": target,
            "time_axis": time_axis,
            "block_id": block_id,
            "sample_id": sample_id,
            "subdataset_id": subdataset_id,
            "sample_weight": sample_weight,
            "not_used": not_used,
            "rename_columns": rename_columns
        }

        requestor = APIRequestor()
        response = requestor.post(url=cls._CLASS_PREFIX, body=data, api_key=api_key)

        if wait:
            id = response['id']
            utils.wait_for_finite_state(cls.get, id, api_key=api_key)
            response = cls.get(id, api_key=api_key)

        return response
Ejemplo n.º 14
0
    def create(cls,
               name: str,
               dataset_id: int,
               estimators: List[Estimator] = None,
               target_metric: TargetMetric = None,
               splitting_strategy: SplittingStrategy = None,
               notes: str = None,
               ensemble_size: int = None,
               max_models_num: int = None,
               single_model_timeout: int = None,
               pipeline: List[Pipeline] = None,
               prediction_latency: int = None,
               interpretability_level: InterpretabilityLevel = None,
               timeout: int = 7200,
               cost_matrix_weights: List[List[str]] = None,
               train_size: float = None,
               test_size: float = None,
               validation_size: float = None,
               fold_size: int = None,
               n_folds: int = None,
               horizon: int = None,
               validation_strategy: ValidationStrategy = None,
               cv_strategy: CVStrategy = None,
               forecast_horizon: int = None,
               model_life_time: int = None,
               refit_on_all: bool = None,
               wait: bool = False,
               skip_if_exists: bool = False,
               leaky_features: List[str] = None,
               api_key: str = None) -> FireflyResponse:
        """
        Create and run a training task.

        A task is responsible for searching for hyper-parameters that would maximize the model scores.
        The task constructs ensembles made of selected models. Seeking ways to combine different models allows us
        a smarter decision making.

        Args:
            name (str): Task's name.
            dataset_id (int): Dataset ID of the training data.
            estimators (List[Estimator]): Estimators to use in the train task.
            target_metric (TargetMetric): The target metric is the metric the model hyperparameter search process
                attempts to optimize.
            splitting_strategy (SplittingStrategy): Splitting strategy of the data.
            notes (Optional[str]): Notes of the task.
            ensemble_size (Optional[int]): Maximum number for models in ensemble.
            max_models_num (Optional[int]): Maximum number of models to train.
            single_model_timeout (Optional[int]): Maximum time for training one model.
            pipeline (Optional[List[Pipeline]): Possible pipeline steps.
            prediction_latency (Optional[int]): Maximum number of seconds ensemble prediction should take.
            interpretability_level (Optional[InterpretabilityLevel]): Determines how interpertable your ensemble is. Higher level
                of interpretability leads to more interpretable ensembles
            timeout (Optional[int]): timeout in seconds for the search process (default: 2 hours).
            cost_matrix_weights (Optional[List[List[str]]]): For classification and anomaly detection problems, the weights allow
                determining a custom cost metric, which assigns different weights to the entries of the confusion matrix.
            train_size (Optional[int]): The ratio of data taken for the train set of the model.
            test_size (Optional[int]): The ratio of data taken for the test set of the model.
            validation_size (Optional[int]): The ratio of data taken for the validation set of the model.
            fold_size (Optional[int]): Fold size where performing cross-validation splitting.s
            n_folds (Optional[int]): Number of folds when performing cross-validation splitting.\
            validation_strategy (Optional[ValidationStrategy]): Validation strategy used for the train task.
            cv_strategy (Optional[CVStrategy]): Cross-validation strategy to use for the train task.
            horizon (Optional[int]): DEPRECATED. Please use `forecast_horizon` and `model_life_time`.
            forecast_horizon (Optional[int]): Something related to time-series models.
            model_life_time (Optional[int]): Something related to time-series models.
            refit_on_all (Optional[bool]): Determines if the final ensemble will be refit on all data after
                search process is done.
            leaky_features: add leaky features
            wait (Optional[bool]): Should the call be synchronous or not.
            skip_if_exists (Optional[bool]): Check if a Datasource with same name exists and skip if true.
            api_key (Optional[str]): Explicit api_key, not required if `fireflyai.authenticate` was run prior.

        Returns:
            FireflyResponse: Task ID, if successful and wait=False or Task if successful and wait=True;
            raises FireflyError otherwise.
        """
        if horizon is not None:
            fireflyai.logger.warning(
                "Parameter `horizon` is DEPRECATED. Please use `forecast_horizon` and `model_life_time`."
            )

        existing_ds = cls.list(filter_={'name': [name]}, api_key=api_key)
        if existing_ds and existing_ds['total'] > 0:
            if skip_if_exists:
                return FireflyResponse(data=existing_ds['hits'][0])
            else:
                raise InvalidRequestError("Task with that name already exists")

        try:
            dataset = fireflyai.Dataset.get(id=dataset_id, api_key=api_key)
        except InvalidRequestError as e:
            raise e

        problem_type = ProblemType(dataset['problem_type'])

        task_config = cls._get_config_defaults(
            dataset_id=dataset_id,
            problem_type=problem_type,
            inter_level=interpretability_level)

        user_config = {
            'dataset_id':
            dataset_id,
            'name':
            name,
            'estimators':
            [e.value for e in estimators] if estimators is not None else None,
            'target_metric':
            target_metric.value if target_metric is not None else None,
            'splitting_strategy':
            splitting_strategy.value
            if splitting_strategy is not None else None,
            'ensemble_size':
            ensemble_size,
            'max_models_num':
            max_models_num,
            'single_model_timeout':
            single_model_timeout,
            'pipeline':
            [p.value for p in pipeline] if pipeline is not None else None,
            'prediction_latency':
            prediction_latency,
            'interpretability_level':
            interpretability_level.value
            if interpretability_level is not None else None,
            'timeout':
            timeout,
            'cost_matrix_weights':
            cost_matrix_weights,
            'train_size':
            train_size,
            'test_size':
            test_size,
            'validation_size':
            validation_size,
            'cv_strategy':
            cv_strategy.value if cv_strategy is not None else None,
            'n_folds':
            n_folds,
            'forecast_horizon':
            forecast_horizon,
            'model_life_time':
            model_life_time,
            'fold_size':
            fold_size,
            'validation_strategy':
            validation_strategy.value
            if validation_strategy is not None else None,
            'notes':
            notes,
            'leaky_features':
            leaky_features,
            'refit_on_all':
            refit_on_all
        }
        task_config.update(
            {k: v
             for k, v in user_config.items() if v is not None})

        requestor = APIRequestor()
        response = requestor.post(url=cls._CLASS_PREFIX,
                                  body=task_config,
                                  api_key=api_key)
        id = response['task_id']
        if wait:
            utils.wait_for_finite_state(cls.get,
                                        id,
                                        max_time=timeout,
                                        api_key=api_key)
            response = cls.get(id, api_key=api_key)
        else:
            response = FireflyResponse(data={'id': id})

        return response