def reset_api(self): """Reset the api connection values """ self.api = BigML(self.USERNAME, self.API_KEY, debug=self.debug) self.api_dev_mode = BigML(self.USERNAME, self.API_KEY, dev_mode=True, debug=self.debug)
def __init__(self, data, epsilon=0.01, rounding=None, black_box=False): self.black_box = black_box if not self.black_box: model_id = models[data] # retrieve a model from local storage or from bigml.io # (only works for public models) try: self.model = Model('model/{}'.format(model_id), api=BigML(storage=STORAGE)) except ValueError: self.model = Model('public/model/{}'.format(model_id), api=BigML(storage=STORAGE)) self.leaves = self.model.tree.get_leaves() else: logging.info('Extracting a Black Box Model') self.model_id = black_box_models[data] # get the black-box model with the real credentials for sanity # checks try: self.model = Model('model/{}'.format(self.model_id), api=BigML(username='******', api_key=BB_KEY)) except ValueError: self.model = Model('public/model/{}'.format(self.model_id), api=BigML(storage=STORAGE)) self.connection = BigML() TreeExtractor.__init__(self, epsilon, rounding)
def setup_resources(feature): world.USERNAME = os.environ['BIGML_USERNAME'] world.API_KEY = os.environ['BIGML_API_KEY'] assert world.USERNAME is not None assert world.API_KEY is not None world.api = BigML(world.USERNAME, world.API_KEY) world.api_dev_mode = BigML(world.USERNAME, world.API_KEY, dev_mode=True) world.test_lines = 0 store_init_resources() world.sources = [] world.datasets = [] world.models = [] world.predictions = [] world.evaluations = [] world.ensembles = [] world.batch_predictions = [] world.clusters = [] world.centroids = [] world.batch_centroids = [] world.anomalies = [] world.anomaly_scores = [] world.batch_anomaly_scores = [] world.projects = [] world.source_lower = None world.source_upper = None world.source_reference = None
def reset_api(self): """Reset the api connection values """ if self.api is not None and self.api.dev_mode: world.project_id = None if self.api is None or self.api.dev_mode: self.api = BigML(self.USERNAME, self.API_KEY) self.api_dev_mode = BigML(self.USERNAME, self.API_KEY, dev_mode=True)
def points_in_cluster(self, centroid_id): """Returns the list of data points that fall in one cluster. """ cluster_datasets = self.datasets centroid_dataset = cluster_datasets.get(centroid_id) if self.api is None: self.api = BigML(storage=STORAGE) if centroid_dataset in [None, ""]: centroid_dataset = self.api.create_dataset( \ self.resource_id, {"centroid": centroid_id}) self.api.ok(centroid_dataset) else: centroid_dataset = self.api.check_resource( \ "dataset/%s" % centroid_dataset) # download dataset to compute local predictions downloaded_data = self.api.download_dataset( \ centroid_dataset["resource"]) if PY3: text_reader = codecs.getreader("utf-8") downloaded_data = text_reader(downloaded_data) reader = csv.DictReader(downloaded_data) points = [] for row in reader: points.append(row) return points
def __init__(self, ensemble, api=None, max_models=None): if api is None: self.api = BigML(storage=STORAGE) else: self.api = api self.ensemble_id = None if isinstance(ensemble, list): try: models = [get_model_id(model) for model in ensemble] except ValueError: raise ValueError('Failed to verify the list of models. Check ' 'your model id values.') self.distributions = None else: self.ensemble_id = get_ensemble_id(ensemble) ensemble = check_resource(ensemble, self.api.get_ensemble) models = ensemble['object']['models'] self.distributions = ensemble['object'].get('distributions', None) self.model_ids = models self.fields = self.all_model_fields() number_of_models = len(models) if max_models is None: self.models_splits = [models] else: self.models_splits = [models[index:(index + max_models)] for index in range(0, number_of_models, max_models)] if len(self.models_splits) == 1: models = [retrieve_resource(self.api, model_id, query_string=ONLY_MODEL) for model_id in self.models_splits[0]] self.multi_model = MultiModel(models, self.api)
def print_connection_info(self): self.USERNAME = os.environ.get('BIGML_USERNAME') self.API_KEY = os.environ.get('BIGML_API_KEY') try: self.debug = bool(os.environ.get('BIGMLER_DEBUG', 0)) self.api_debug = bool(os.environ.get('BIGML_DEBUG', 0)) except ValueError: pass if self.USERNAME is None or self.API_KEY is None: assert False, ("Tests use the BIGML_USERNAME and BIGML_API_KEY" " environment variables to authenticate the" " connection, but they seem to be unset. Please," "set them before testing.") else: assert True self.api = BigML(self.USERNAME, self.API_KEY, debug=self.api_debug) print self.api.connection_info() output_dir = "./last_run" dirs = [] for _, subFolders, _ in os.walk("./"): for folder in subFolders: if folder.startswith("scenario"): dirs.append(folder) dirs.reverse() for folder in dirs: bigmler_delete(folder, output_dir=output_dir) if os.path.exists(output_dir): shutil.rmtree(output_dir)
def __init__(self, anomaly, api=None): self.resource_id = None self.sample_size = None self.input_fields = None self.mean_depth = None self.expected_mean_depth = None self.iforest = None self.top_anomalies = None self.id_fields = [] if not (isinstance(anomaly, dict) and 'resource' in anomaly and anomaly['resource'] is not None): if api is None: api = BigML(storage=STORAGE) self.resource_id = get_anomaly_id(anomaly) if self.resource_id is None: raise Exception(api.error_message(anomaly, resource_type='anomaly', method='get')) query_string = ONLY_MODEL anomaly = retrieve_resource(api, self.resource_id, query_string=query_string) else: self.resource_id = get_anomaly_id(anomaly) if 'object' in anomaly and isinstance(anomaly['object'], dict): anomaly = anomaly['object'] self.sample_size = anomaly.get('sample_size') self.input_fields = anomaly.get('input_fields') self.id_fields = anomaly.get('id_fields', []) if 'model' in anomaly and isinstance(anomaly['model'], dict): ModelFields.__init__(self, anomaly['model'].get('fields')) if ('top_anomalies' in anomaly['model'] and isinstance(anomaly['model']['top_anomalies'], list)): self.mean_depth = anomaly['model'].get('mean_depth') status = get_status(anomaly) if 'code' in status and status['code'] == FINISHED: self.expected_mean_depth = None if self.mean_depth is None or self.sample_size is None: raise Exception("The anomaly data is not complete. " "Score will" " not be available") else: default_depth = ( 2 * (DEPTH_FACTOR + \ math.log(self.sample_size - 1) - \ (float(self.sample_size - 1) / self.sample_size))) self.expected_mean_depth = min(self.mean_depth, default_depth) iforest = anomaly['model'].get('trees', []) if iforest: self.iforest = [ AnomalyTree(anomaly_tree['root'], self.fields) for anomaly_tree in iforest] self.top_anomalies = anomaly['model']['top_anomalies'] else: raise Exception("The anomaly isn't finished yet") else: raise Exception("Cannot create the Anomaly instance. Could not" " find the 'top_anomalies' key in the" " resource:\n\n%s" % anomaly['model'].keys())
def teardown_module(): """Operations to be performed after each module """ if os.path.exists('./tmp'): shutil.rmtree('./tmp') if not world.debug: try: world.delete_resources() except Exception, exc: print exc world.api = BigML(world.USERNAME, world.API_KEY, debug=world.debug, organization=BIGML_ORGANIZATION) project_stats = world.api.get_project( \ world.project_id)['object']['stats'] for resource_type, value in project_stats.items(): if value['count'] != 0: # assert False, ("Increment in %s: %s" % (resource_type, value)) print "WARNING: Increment in %s: %s" % (resource_type, value) world.api.delete_project(world.project_id)
def __init__(self, model, api=None): if (isinstance(model, dict) and 'resource' in model and model['resource'] is not None): self.resource_id = model['resource'] else: if api is None: api = BigML(storage=STORAGE) self.resource_id = get_model_id(model) if self.resource_id is None: raise Exception(error_message(model, resource_type='model', method='get')) model = retrieve_model(api, self.resource_id) if ('object' in model and isinstance(model['object'], dict)): model = model['object'] if ('model' in model and isinstance(model['model'], dict)): status = get_status(model) if ('code' in status and status['code'] == FINISHED): if 'model_fields' in model['model']: fields = model['model']['model_fields'] # pagination or exclusion might cause a field not to # be in available fields dict if not all(key in model['model']['fields'] for key in fields.keys()): raise Exception("Some fields are missing" " to generate a local model." " Please, provide a model with" " the complete list of fields.") for field in fields: field_info = model['model']['fields'][field] fields[field]['summary'] = field_info['summary'] fields[field]['name'] = field_info['name'] else: fields = model['model']['fields'] self.inverted_fields = invert_dictionary(fields) self.all_inverted_fields = invert_dictionary(model['model'] ['fields']) self.tree = Tree( model['model']['root'], fields, model['objective_fields']) self.description = model['description'] self.field_importance = model['model'].get('importance', None) if self.field_importance: self.field_importance = [element for element in self.field_importance if element[0] in fields] self.locale = model.get('locale', DEFAULT_LOCALE) else: raise Exception("The model isn't finished yet") else: raise Exception("Cannot create the Model instance. Could not" " find the 'model' key in the resource:\n\n%s" % model)
def setup_module(): """Operations to be performed before each module """ # Project or Organization IDs world.bck_api = world.api world.api = BigML(world.USERNAME, world.API_KEY, debug=world.debug, organization=BIGML_ORGANIZATION) print world.api.connection_info() world.bck_project_id = world.project_id world.project_id = world.api.create_project( \ {"name": world.test_project_name})['resource'] world.api = BigML(world.USERNAME, world.API_KEY, debug=world.debug, project=world.project_id) print world.api.connection_info() world.clear()
def do_bigml(fname): api = BigML() src = api.create_source(fname) api.ok(src) api.create_dataset(src) dategen = {'field': '(epoch-fields (* 1000 (f "created_utc"))'} ds = api.create_dataset(src, args={'new_fields': [dategen]}) api.ok(ds)
def get_resource_dict(resource, resource_type, api=None): """Extracting the resource JSON info as a dict from the first argument of the local object constructors, that can be: - the path to a file that contains the JSON - the ID of the resource - the resource dict itself """ if api is None: api = BigML(storage=STORAGE) get_id = ID_GETTERS[resource_type] resource_id = None # the string can be a path to a JSON file if isinstance(resource, basestring): try: with open(resource) as resource_file: resource = json.load(resource_file) resource_id = get_id(resource) if resource_id is None: raise ValueError("The JSON file does not seem" " to contain a valid BigML %s" " representation." % resource_type) except IOError: # if it is not a path, it can be a model id resource_id = get_id(resource) if resource_id is None: if resource.find("%s/" % resource_type) > -1: raise Exception( api.error_message(resource, resource_type=resource_type, method="get")) else: raise IOError("Failed to open the expected JSON file" " at %s." % resource) except ValueError: raise ValueError("Failed to interpret %s." " JSON file expected." % resource) # checks whether the information needed for local predictions is in # the first argument if isinstance(resource, dict) and \ not check_model_fields(resource): # if the fields used by the model are not # available, use only ID to retrieve it again resource = get_id(resource) resource_id = resource if not (isinstance(resource, dict) and 'resource' in resource and resource['resource'] is not None): query_string = ONLY_MODEL resource = retrieve_resource(api, resource_id, query_string=query_string) else: resource_id = get_id(resource) return resource_id, resource
def setup_resources(feature): world.USERNAME = os.environ['BIGML_USERNAME'] world.API_KEY = os.environ['BIGML_API_KEY'] assert world.USERNAME is not None assert world.API_KEY is not None world.api = BigML(world.USERNAME, world.API_KEY) world.api_dev_mode = BigML(world.USERNAME, world.API_KEY, dev_mode=True) sources = world.api.list_sources() assert sources['code'] == HTTP_OK world.init_sources_count = sources['meta']['total_count'] datasets = world.api.list_datasets() assert datasets['code'] == HTTP_OK world.init_datasets_count = datasets['meta']['total_count'] models = world.api.list_models() assert models['code'] == HTTP_OK world.init_models_count = models['meta']['total_count'] predictions = world.api.list_predictions() assert predictions['code'] == HTTP_OK world.init_predictions_count = predictions['meta']['total_count'] evaluations = world.api.list_evaluations() assert evaluations['code'] == HTTP_OK world.init_evaluations_count = evaluations['meta']['total_count'] ensembles = world.api.list_ensembles() assert ensembles['code'] == HTTP_OK world.init_ensembles_count = ensembles['meta']['total_count'] batch_predictions = world.api.list_batch_predictions() assert batch_predictions['code'] == HTTP_OK world.init_batch_predictions_count = batch_predictions['meta'][ 'total_count'] world.sources = [] world.datasets = [] world.models = [] world.predictions = [] world.folders = [] world.evaluations = [] world.ensembles = [] world.batch_predictions = []
def get_bigml_api(self, *args, **kwargs): try: self.bigml = BigML(*args, **kwargs) except AttributeError: raise NotConfigured( self.BIGML_AUTH_ERRMSG.format(errtype='Missing')) if not self.check_bigml_auth(): raise NotConfigured( self.BIGML_AUTH_ERRMSG.format(errtype='Invalid'))
def print_connection_info(self): self.USERNAME = os.environ.get('BIGML_USERNAME') self.API_KEY = os.environ.get('BIGML_API_KEY') if self.USERNAME is None or self.API_KEY is None: assert False, ("Tests use the BIGML_USERNAME and BIGML_API_KEY" " environment variables to authenticate the" " connection, but they seem to be unset. Please," "set them before testing.") self.api = BigML(self.USERNAME, self.API_KEY, debug=self.debug) print self.api.connection_info()
def __init__(self, cluster, api=None): if not (isinstance(cluster, dict) and 'resource' in cluster and cluster['resource'] is not None): if api is None: api = BigML(storage=STORAGE) self.resource_id = get_cluster_id(cluster) if self.resource_id is None: raise Exception( api.error_message(cluster, resource_type='cluster', method='get')) query_string = ONLY_MODEL cluster = retrieve_resource(api, self.resource_id, query_string=query_string) if 'object' in cluster and isinstance(cluster['object'], dict): cluster = cluster['object'] if 'clusters' in cluster and isinstance(cluster['clusters'], dict): status = get_status(cluster) if 'code' in status and status['code'] == FINISHED: clusters = cluster['clusters']['clusters'] self.centroids = [Centroid(centroid) for centroid in clusters] self.scales = {} self.scales.update(cluster['scales']) self.term_forms = {} self.tag_clouds = {} self.term_analysis = {} fields = cluster['clusters']['fields'] for field_id, field in fields.items(): if field['optype'] == 'text': self.term_forms[field_id] = {} self.term_forms[field_id].update( field['summary']['term_forms']) self.tag_clouds[field_id] = {} self.tag_clouds[field_id].update( field['summary']['tag_cloud']) self.term_analysis[field_id] = {} self.term_analysis[field_id].update( field['term_analysis']) ModelFields.__init__(self, fields) if not all( [field_id in self.fields for field_id in self.scales]): raise Exception("Some fields are missing" " to generate a local cluster." " Please, provide a cluster with" " the complete list of fields.") else: raise Exception("The cluster isn't finished yet") else: raise Exception("Cannot create the Cluster instance. Could not" " find the 'clusters' key in the resource:\n\n%s" % cluster)
def setup_module(): """Setup for the module """ world.bck_api = world.api world.api = BigML(world.USERNAME, world.API_KEY, debug=world.debug, organization=BIGML_ORGANIZATION) print world.api.connection_info() world.bck_project_id = world.project_id world.project_id = None world.clear()
def print_connection_info(): world.USERNAME = os.environ.get('BIGML_USERNAME') world.API_KEY = os.environ.get('BIGML_API_KEY') if world.USERNAME is None or world.API_KEY is None: assert False, ("Tests use the BIGML_USERNAME and BIGML_API_KEY" " environment variables to authenticate the" " connection, but they seem to be unset. Please," "set them before testing.") else: assert True world.api = BigML(world.USERNAME, world.API_KEY) print world.api.connection_info()
def setup_module(): """Operations to be performed before each module """ # Project or Organization IDs general_setup_module() world.bck_api = world.api world.api = BigML(world.USERNAME, world.API_KEY, debug=world.debug, project=world.project_id) print world.api.connection_info() world.clear()
def __init__(self, model, api=None): if api is None: api = BigML(storage=STORAGE) resource_id, model = extract_id(model, api) resource_type = get_resource_type(resource_id) kwargs = {"api": api} local_model = COMPONENT_CLASSES[resource_type](model, **kwargs) self.__class__.__bases__ = local_model.__class__.__bases__ for attr, value in local_model.__dict__.items(): setattr(self, attr, value) self.local_model = local_model
def test_online_model(model_name): # Create local_model object print("Creating model from API .... ") predict_storage = os.path.join(PREDICT_STORAGE, model_name) if not os.path.exists(predict_storage): print("Creating predict directory .... ") os.makedirs(predict_storage) API_predict_storage = os.path.join(predict_storage, "API_result") if not os.path.exists(API_predict_storage): print("Creating predict directory .... ") os.makedirs(API_predict_storage) api = BigML(storage=API_predict_storage) print("Reading testing data .... ") test_source = api.create_source( os.path.join(DATASET_STORAGE, model_name, model_name + "_test.csv")) api.ok(test_source) test_dataset = api.create_dataset(test_source) api.ok(test_dataset) print("Start predicting .... ") print(" Opening testing data") training_data_path = os.path.join(DATASET_STORAGE, model_name, model_name) + "_test.csv" with open(training_data_path) as test_handler: reader = csv.DictReader(test_handler) counter = 1 for input_data in reader: print("=================================") print("===== Prediction ", counter, " ========") print("=================================") print("Input testing data : ", input_data) predict_result = api.create_prediction( 'model/{}'.format(models[model_name]), input_data) print(">> Prediction : ", predict_result, "\n") # predict_pprint counter = counter + 1 ## File conversion: Extract confidence path_API = os.path.join(PREDICT_STORAGE, model_name, "API_result") predictions = glob.glob(os.path.join(path_API, "prediction*")) big_array = [] with open(os.path.join(path_API, "probabilities.txt"), 'a') as fh: for prediction in predictions: with open(prediction, 'r') as pf: j = json.loads(pf.read()) input_dictionary = j["object"]["input_data"] dic = {} for each_answer in j["object"]["probabilities"]: dic[each_answer[0]] = each_answer[1] input_dictionary["probability"] = dic big_array.append(input_dictionary) print("Wrting to file >> ", input_dictionary) fh.write(str(big_array))
def model_creation(model_name, local_or_online): #### CREAT API ### All step that make request through api will be stored model_storage = os.path.join(MODEL_STORAGE, model_name) if not os.path.exists(model_storage): os.makedirs(model_storage) api = BigML(storage=model_storage) if local_or_online == "L": #### CREAT MODEL ### api.ok() is to make sure each step is finished before running subsequent data. ### Create data source (from local) print("Creating model from Local data .... ") dataset_storage = os.path.join(DATASET_STORAGE, model_name, model_name+"_train.csv") print("Reading training data .... ") source = api.create_source(dataset_storage) api.ok(source) dataset = api.create_dataset(source) api.ok(dataset) print("Model creating .... ") if which_model == "DT": model = api.create_model(dataset) elif which_model == "EN": model = api.create_ensemble(dataset) elif which_model == "DN": model = api.create_deepnet(dataset) elif which_model == "AS": model = api.create_association(dataset) else: print("Your input model is invalid, byebye!!!") sys.exit() api.ok(model) print("Model is created ! DONE !!") print(">> model name : ", model_name, " model id : ", model["resource"]) elif local_or_online == "O": #### DOWNLOAD MODEL print("Download model from API .... ") print(">> model name : ", model_name, " model id : ", models[model_name]) api = BigML(storage=model_storage) api.export('model/{}'.format(models[model_name]))
def createRacingModel(dataset, type=util.ML_BIGML): if type == util.ML_BIGML: api = BigML(config.BIGML_USER, config.BIGML_API_KEY) print("Creating model...") args = {"name": "Racing Model", "objective_field": "Movement"} model = api.create_deepnet(dataset, args) api.ok(model) resource = model["resource"] # Saves model id to a file file = open("saved_models.txt", "a+") file.write(f"\nracing-{resource}") file.close() # Creates LOCAL model model = Deepnet(resource, api) return model
def createRacingDataset(type=util.ML_BIGML): if type == util.ML_BIGML: api = BigML(config.BIGML_USER, config.BIGML_API_KEY) # Initialize BigML print("Creating source...") source = api.create_source('train_racing.csv', args={"name": "Racing Data"}) api.ok(source) # Changes "movement" field type to categorical changes = {"fields": {"000005": {"optype": "categorical"}}} api.update_source(source, changes) api.ok(source) print("Creating dataset...") dataset = api.create_dataset(source, args={"name": "Racing Data"}) api.ok(dataset) return dataset
def print_connection_info(self): self.USERNAME = os.environ.get('BIGML_USERNAME') self.API_KEY = os.environ.get('BIGML_API_KEY') self.EXTERNAL_CONN = get_env_connection_info() if self.USERNAME is None or self.API_KEY is None: assert False, ("Tests use the BIGML_USERNAME and BIGML_API_KEY" " environment variables to authenticate the" " connection, but they seem to be unset. Please," "set them before testing.") self.api = BigML(self.USERNAME, self.API_KEY, debug=self.debug, short_debug=self.short_debug, storage=(None if not (self.debug or self.short_debug) else "./debug_storage")) print(self.api.connection_info()) print(self.external_connection_info())
def __init__(self, ensemble, api=None, max_models=None): if api is None: self.api = BigML(storage=STORAGE) else: self.api = api self.ensemble_id = get_ensemble_id(ensemble) ensemble = check_resource(ensemble, self.api.get_ensemble) models = ensemble['object']['models'] self.model_ids = models number_of_models = len(models) if max_models is None: self.models_splits = [models] else: self.models_splits = [models[index:(index + max_models)] for index in range(0, number_of_models, max_models)]
def teardown_module(): """Teardown for the module """ if not world.debug: world.api = BigML(world.USERNAME, world.API_KEY, debug=world.debug, organization=BIGML_ORGANIZATION) world.project_id = world.project["resource"] project_stats = world.api.get_project( \ world.project_id)['object']['stats'] world.api.delete_project(world.project_id) world.project_id = world.bck_project_id world.api = world.bck_api print world.api.connection_info()
def __init__(self, model, api=None): if not (isinstance(model, dict) and 'resource' in model and model['resource'] is not None): if api is None: api = BigML(storage=STORAGE) self.resource_id = get_model_id(model) if self.resource_id is None: raise Exception( api.error_message(model, resource_type='model', method='get')) query_string = ONLY_MODEL model = retrieve_model(api, self.resource_id, query_string=query_string) BaseModel.__init__(self, model, api=api) if ('object' in model and isinstance(model['object'], dict)): model = model['object'] if ('model' in model and isinstance(model['model'], dict)): status = get_status(model) if ('code' in status and status['code'] == FINISHED): distribution = model['model']['distribution']['training'] self.ids_map = {} self.tree = Tree(model['model']['root'], self.fields, objective_field=self.objective_field, root_distribution=distribution, parent_id=None, ids_map=self.ids_map) self.terms = {} else: raise Exception("The model isn't finished yet") else: raise Exception("Cannot create the Model instance. Could not" " find the 'model' key in the resource:\n\n%s" % model) if self.tree.regression: try: import numpy import scipy self.regression_ready = True except ImportError: self.regression_ready = False
def print_connection_info(self): self.USERNAME = os.environ.get('BIGML_USERNAME') self.API_KEY = os.environ.get('BIGML_API_KEY') if self.USERNAME is None or self.API_KEY is None: assert False, ("Tests use the BIGML_USERNAME and BIGML_API_KEY" " environment variables to authenticate the" " connection, but they seem to be unset. Please," "set them before testing.") else: assert True self.api = BigML(self.USERNAME, self.API_KEY) print self.api.connection_info() output_dir = "./last_run" for _, subFolders, _ in os.walk("./"): for folder in subFolders: if folder.startswith("scenario"): bigmler_delete(folder, output_dir=output_dir) if os.path.exists(output_dir): shutil.rmtree(output_dir)