def __init__(self, anomaly, api=None): self.resource_id = None self.sample_size = None self.input_fields = None self.mean_depth = None self.expected_mean_depth = None self.iforest = None self.top_anomalies = None self.id_fields = [] if not (isinstance(anomaly, dict) and 'resource' in anomaly and anomaly['resource'] is not None): if api is None: api = BigML(storage=STORAGE) self.resource_id = get_anomaly_id(anomaly) if self.resource_id is None: raise Exception(api.error_message(anomaly, resource_type='anomaly', method='get')) query_string = ONLY_MODEL anomaly = retrieve_resource(api, self.resource_id, query_string=query_string) else: self.resource_id = get_anomaly_id(anomaly) if 'object' in anomaly and isinstance(anomaly['object'], dict): anomaly = anomaly['object'] self.sample_size = anomaly.get('sample_size') self.input_fields = anomaly.get('input_fields') self.id_fields = anomaly.get('id_fields', []) if 'model' in anomaly and isinstance(anomaly['model'], dict): ModelFields.__init__(self, anomaly['model'].get('fields')) if ('top_anomalies' in anomaly['model'] and isinstance(anomaly['model']['top_anomalies'], list)): self.mean_depth = anomaly['model'].get('mean_depth') status = get_status(anomaly) if 'code' in status and status['code'] == FINISHED: self.expected_mean_depth = None if self.mean_depth is None or self.sample_size is None: raise Exception("The anomaly data is not complete. " "Score will" " not be available") else: default_depth = ( 2 * (DEPTH_FACTOR + \ math.log(self.sample_size - 1) - \ (float(self.sample_size - 1) / self.sample_size))) self.expected_mean_depth = min(self.mean_depth, default_depth) iforest = anomaly['model'].get('trees', []) if iforest: self.iforest = [ AnomalyTree(anomaly_tree['root'], self.fields) for anomaly_tree in iforest] self.top_anomalies = anomaly['model']['top_anomalies'] else: raise Exception("The anomaly isn't finished yet") else: raise Exception("Cannot create the Anomaly instance. Could not" " find the 'top_anomalies' key in the" " resource:\n\n%s" % anomaly['model'].keys())
class BigMLAPIMixIn(object): BIGML_AUTH_ERRMSG = ( "{errtype:s} BigML credentials. Please supply " "BIGML_USERNAME and BIGML_API_KEY as either Scrapy " "settings or environment variables." ) # XXX: This should get a method to read BigML configuration from settings def get_bigml_api(self, *args, **kwargs): try: self.bigml = BigML(*args, **kwargs) except AttributeError: raise NotConfigured(self.BIGML_AUTH_ERRMSG.format(errtype="Missing")) if not self.check_bigml_auth(): raise NotConfigured(self.BIGML_AUTH_ERRMSG.format(errtype="Invalid")) def check_bigml_auth(self): return self.bigml.list_projects("limit=1")["code"] == 200 def export_to_bigml(self, path, name, as_dataset=False): source = self.bigml.create_source(file, {"name": name}) if not as_dataset: return source return self.bigml.create_dataset(source, {"name": name})
def reset_api(self): """Reset the api connection values """ self.api = BigML(self.USERNAME, self.API_KEY, debug=self.debug) self.api_dev_mode = BigML(self.USERNAME, self.API_KEY, dev_mode=True, debug=self.debug)
def print_connection_info(self): self.USERNAME = os.environ.get('BIGML_USERNAME') self.API_KEY = os.environ.get('BIGML_API_KEY') try: self.debug = bool(os.environ.get('BIGMLER_DEBUG', 0)) self.api_debug = bool(os.environ.get('BIGML_DEBUG', 0)) except ValueError: pass if self.USERNAME is None or self.API_KEY is None: assert False, ("Tests use the BIGML_USERNAME and BIGML_API_KEY" " environment variables to authenticate the" " connection, but they seem to be unset. Please," "set them before testing.") else: assert True self.api = BigML(self.USERNAME, self.API_KEY, debug=self.api_debug) print self.api.connection_info() output_dir = "./last_run" dirs = [] for _, subFolders, _ in os.walk("./"): for folder in subFolders: if folder.startswith("scenario"): dirs.append(folder) dirs.reverse() for folder in dirs: bigmler_delete(folder, output_dir=output_dir) if os.path.exists(output_dir): shutil.rmtree(output_dir)
class BigMLAPIMixIn(object): BIGML_AUTH_ERRMSG = ("{errtype:s} BigML credentials. Please supply " "BIGML_USERNAME and BIGML_API_KEY as either Scrapy " "settings or environment variables.") # XXX: This should get a method to read BigML configuration from settings def get_bigml_api(self, *args, **kwargs): try: self.bigml = BigML(*args, **kwargs) except AttributeError: raise NotConfigured( self.BIGML_AUTH_ERRMSG.format(errtype='Missing')) if not self.check_bigml_auth(): raise NotConfigured( self.BIGML_AUTH_ERRMSG.format(errtype='Invalid')) def check_bigml_auth(self): return self.bigml.list_projects('limit=1')['code'] == 200 def export_to_bigml(self, path, name, as_dataset=False): source = self.bigml.create_source(file, {'name': name}) if not as_dataset: return source return self.bigml.create_dataset(source, {'name': name})
def setup_resources(feature): world.USERNAME = os.environ['BIGML_USERNAME'] world.API_KEY = os.environ['BIGML_API_KEY'] assert world.USERNAME is not None assert world.API_KEY is not None world.api = BigML(world.USERNAME, world.API_KEY) world.api_dev_mode = BigML(world.USERNAME, world.API_KEY, dev_mode=True) world.test_lines = 0 store_init_resources() world.sources = [] world.datasets = [] world.models = [] world.predictions = [] world.evaluations = [] world.ensembles = [] world.batch_predictions = [] world.clusters = [] world.centroids = [] world.batch_centroids = [] world.anomalies = [] world.anomaly_scores = [] world.batch_anomaly_scores = [] world.projects = [] world.source_lower = None world.source_upper = None world.source_reference = None
def __init__(self, data, epsilon=0.01, rounding=None, black_box=False): self.black_box = black_box if not self.black_box: model_id = models[data] # retrieve a model from local storage or from bigml.io # (only works for public models) try: self.model = Model('model/{}'.format(model_id), api=BigML(storage=STORAGE)) except ValueError: self.model = Model('public/model/{}'.format(model_id), api=BigML(storage=STORAGE)) self.leaves = self.model.tree.get_leaves() else: logging.info('Extracting a Black Box Model') self.model_id = black_box_models[data] # get the black-box model with the real credentials for sanity # checks try: self.model = Model('model/{}'.format(self.model_id), api=BigML(username='******', api_key=BB_KEY)) except ValueError: self.model = Model('public/model/{}'.format(self.model_id), api=BigML(storage=STORAGE)) self.connection = BigML() TreeExtractor.__init__(self, epsilon, rounding)
def points_in_cluster(self, centroid_id): """Returns the list of data points that fall in one cluster. """ cluster_datasets = self.datasets centroid_dataset = cluster_datasets.get(centroid_id) if self.api is None: self.api = BigML(storage=STORAGE) if centroid_dataset in [None, ""]: centroid_dataset = self.api.create_dataset( \ self.resource_id, {"centroid": centroid_id}) self.api.ok(centroid_dataset) else: centroid_dataset = self.api.check_resource( \ "dataset/%s" % centroid_dataset) # download dataset to compute local predictions downloaded_data = self.api.download_dataset( \ centroid_dataset["resource"]) if PY3: text_reader = codecs.getreader("utf-8") downloaded_data = text_reader(downloaded_data) reader = csv.DictReader(downloaded_data) points = [] for row in reader: points.append(row) return points
def do_bigml(fname): api = BigML() src = api.create_source(fname) api.ok(src) api.create_dataset(src) dategen = {'field': '(epoch-fields (* 1000 (f "created_utc"))'} ds = api.create_dataset(src, args={'new_fields': [dategen]}) api.ok(ds)
def get_resource_dict(resource, resource_type, api=None): """Extracting the resource JSON info as a dict from the first argument of the local object constructors, that can be: - the path to a file that contains the JSON - the ID of the resource - the resource dict itself """ if api is None: api = BigML(storage=STORAGE) get_id = ID_GETTERS[resource_type] resource_id = None # the string can be a path to a JSON file if isinstance(resource, basestring): try: with open(resource) as resource_file: resource = json.load(resource_file) resource_id = get_id(resource) if resource_id is None: raise ValueError("The JSON file does not seem" " to contain a valid BigML %s" " representation." % resource_type) except IOError: # if it is not a path, it can be a model id resource_id = get_id(resource) if resource_id is None: if resource.find("%s/" % resource_type) > -1: raise Exception( api.error_message(resource, resource_type=resource_type, method="get")) else: raise IOError("Failed to open the expected JSON file" " at %s." % resource) except ValueError: raise ValueError("Failed to interpret %s." " JSON file expected." % resource) # checks whether the information needed for local predictions is in # the first argument if isinstance(resource, dict) and \ not check_model_fields(resource): # if the fields used by the model are not # available, use only ID to retrieve it again resource = get_id(resource) resource_id = resource if not (isinstance(resource, dict) and 'resource' in resource and resource['resource'] is not None): query_string = ONLY_MODEL resource = retrieve_resource(api, resource_id, query_string=query_string) else: resource_id = get_id(resource) return resource_id, resource
def get_bigml_api(self, *args, **kwargs): try: self.bigml = BigML(*args, **kwargs) except AttributeError: raise NotConfigured( self.BIGML_AUTH_ERRMSG.format(errtype='Missing')) if not self.check_bigml_auth(): raise NotConfigured( self.BIGML_AUTH_ERRMSG.format(errtype='Invalid'))
def get_model(): bigml_model = memcache.Client().get(model_id) if bigml_model is None : bigml_api = BigML(BIGML_USERNAME, BIGML_API_KEY, dev_mode=dev_mode) bigml_model = bigml_api.get_model('model/%s' % model_id, query_string='only_model=true;limit=-1') memcache.Client().add(model_id, bigml_model, time=memcache_timeout) return bigml_model
def print_connection_info(self): self.USERNAME = os.environ.get('BIGML_USERNAME') self.API_KEY = os.environ.get('BIGML_API_KEY') if self.USERNAME is None or self.API_KEY is None: assert False, ("Tests use the BIGML_USERNAME and BIGML_API_KEY" " environment variables to authenticate the" " connection, but they seem to be unset. Please," "set them before testing.") self.api = BigML(self.USERNAME, self.API_KEY, debug=self.debug) print self.api.connection_info()
def __init__(self, model, api=None): if check_model_structure(model): self.resource_id = model["resource"] else: # If only the model id is provided, the short version of the model # resource is used to build a basic summary of the model if api is None: api = BigML() self.resource_id = get_model_id(model) if self.resource_id is None: raise Exception(api.error_message(model, resource_type="model", method="get")) query_string = ONLY_MODEL model = retrieve_resource(api, self.resource_id, query_string=query_string) # Stored copies of the model structure might lack some necessary # keys if not check_model_structure(model): model = api.get_model(self.resource_id, query_string=query_string) if "object" in model and isinstance(model["object"], dict): model = model["object"] if "model" in model and isinstance(model["model"], dict): status = get_status(model) if "code" in status and status["code"] == FINISHED: if "model_fields" in model["model"] or "fields" in model["model"]: fields = model["model"].get("model_fields", model["model"].get("fields", [])) # pagination or exclusion might cause a field not to # be in available fields dict if not all(key in model["model"]["fields"] for key in fields.keys()): raise Exception( "Some fields are missing" " to generate a local model." " Please, provide a model with" " the complete list of fields." ) for field in fields: field_info = model["model"]["fields"][field] if "summary" in field_info: fields[field]["summary"] = field_info["summary"] fields[field]["name"] = field_info["name"] objective_field = model["objective_fields"] ModelFields.__init__(self, fields, objective_id=extract_objective(objective_field)) self.description = model["description"] self.field_importance = model["model"].get("importance", None) if self.field_importance: self.field_importance = [element for element in self.field_importance if element[0] in fields] self.locale = model.get("locale", DEFAULT_LOCALE) else: raise Exception("The model isn't finished yet") else: raise Exception( "Cannot create the BaseModel instance. Could not" " find the 'model' key in the resource:\n\n%s" % model )
def __init__(self, cluster, api=None): if not (isinstance(cluster, dict) and 'resource' in cluster and cluster['resource'] is not None): if api is None: api = BigML(storage=STORAGE) self.resource_id = get_cluster_id(cluster) if self.resource_id is None: raise Exception( api.error_message(cluster, resource_type='cluster', method='get')) query_string = ONLY_MODEL cluster = retrieve_resource(api, self.resource_id, query_string=query_string) if 'object' in cluster and isinstance(cluster['object'], dict): cluster = cluster['object'] if 'clusters' in cluster and isinstance(cluster['clusters'], dict): status = get_status(cluster) if 'code' in status and status['code'] == FINISHED: clusters = cluster['clusters']['clusters'] self.centroids = [Centroid(centroid) for centroid in clusters] self.scales = {} self.scales.update(cluster['scales']) self.term_forms = {} self.tag_clouds = {} self.term_analysis = {} fields = cluster['clusters']['fields'] for field_id, field in fields.items(): if field['optype'] == 'text': self.term_forms[field_id] = {} self.term_forms[field_id].update( field['summary']['term_forms']) self.tag_clouds[field_id] = {} self.tag_clouds[field_id].update( field['summary']['tag_cloud']) self.term_analysis[field_id] = {} self.term_analysis[field_id].update( field['term_analysis']) ModelFields.__init__(self, fields) if not all( [field_id in self.fields for field_id in self.scales]): raise Exception("Some fields are missing" " to generate a local cluster." " Please, provide a cluster with" " the complete list of fields.") else: raise Exception("The cluster isn't finished yet") else: raise Exception("Cannot create the Cluster instance. Could not" " find the 'clusters' key in the resource:\n\n%s" % cluster)
def reset_api(self): """Reset the api connection values """ if self.api is not None and self.api.dev_mode: world.project_id = None if self.api is None or self.api.dev_mode: self.api = BigML(self.USERNAME, self.API_KEY) self.api_dev_mode = BigML(self.USERNAME, self.API_KEY, dev_mode=True)
def __init__(self, cluster, api=None): if not (isinstance(cluster, dict) and 'resource' in cluster and cluster['resource'] is not None): if api is None: api = BigML(storage=STORAGE) self.resource_id = get_cluster_id(cluster) if self.resource_id is None: raise Exception(api.error_message(cluster, resource_type='cluster', method='get')) query_string = ONLY_MODEL cluster = retrieve_resource(api, self.resource_id, query_string=query_string) if 'object' in cluster and isinstance(cluster['object'], dict): cluster = cluster['object'] if 'clusters' in cluster and isinstance(cluster['clusters'], dict): status = get_status(cluster) if 'code' in status and status['code'] == FINISHED: clusters = cluster['clusters']['clusters'] self.centroids = [Centroid(centroid) for centroid in clusters] self.scales = {} self.scales.update(cluster['scales']) self.term_forms = {} self.tag_clouds = {} self.term_analysis = {} fields = cluster['clusters']['fields'] for field_id, field in fields.items(): if field['optype'] == 'text': self.term_forms[field_id] = {} self.term_forms[field_id].update(field[ 'summary']['term_forms']) self.tag_clouds[field_id] = {} self.tag_clouds[field_id].update(field[ 'summary']['tag_cloud']) self.term_analysis[field_id] = {} self.term_analysis[field_id].update( field['term_analysis']) ModelFields.__init__(self, fields) if not all([field_id in self.fields for field_id in self.scales]): raise Exception("Some fields are missing" " to generate a local cluster." " Please, provide a cluster with" " the complete list of fields.") else: raise Exception("The cluster isn't finished yet") else: raise Exception("Cannot create the Cluster instance. Could not" " find the 'clusters' key in the resource:\n\n%s" % cluster)
def createRacingModel(dataset, type=util.ML_BIGML): if type == util.ML_BIGML: api = BigML(config.BIGML_USER, config.BIGML_API_KEY) print("Creating model...") args = {"name": "Racing Model", "objective_field": "Movement"} model = api.create_deepnet(dataset, args) api.ok(model) resource = model["resource"] # Saves model id to a file file = open("saved_models.txt", "a+") file.write(f"\nracing-{resource}") file.close() # Creates LOCAL model model = Deepnet(resource, api) return model
def test_online_model(model_name): # Create local_model object print("Creating model from API .... ") predict_storage = os.path.join(PREDICT_STORAGE, model_name) if not os.path.exists(predict_storage): print("Creating predict directory .... ") os.makedirs(predict_storage) API_predict_storage = os.path.join(predict_storage, "API_result") if not os.path.exists(API_predict_storage): print("Creating predict directory .... ") os.makedirs(API_predict_storage) api = BigML(storage=API_predict_storage) print("Reading testing data .... ") test_source = api.create_source( os.path.join(DATASET_STORAGE, model_name, model_name + "_test.csv")) api.ok(test_source) test_dataset = api.create_dataset(test_source) api.ok(test_dataset) print("Start predicting .... ") print(" Opening testing data") training_data_path = os.path.join(DATASET_STORAGE, model_name, model_name) + "_test.csv" with open(training_data_path) as test_handler: reader = csv.DictReader(test_handler) counter = 1 for input_data in reader: print("=================================") print("===== Prediction ", counter, " ========") print("=================================") print("Input testing data : ", input_data) predict_result = api.create_prediction( 'model/{}'.format(models[model_name]), input_data) print(">> Prediction : ", predict_result, "\n") # predict_pprint counter = counter + 1 ## File conversion: Extract confidence path_API = os.path.join(PREDICT_STORAGE, model_name, "API_result") predictions = glob.glob(os.path.join(path_API, "prediction*")) big_array = [] with open(os.path.join(path_API, "probabilities.txt"), 'a') as fh: for prediction in predictions: with open(prediction, 'r') as pf: j = json.loads(pf.read()) input_dictionary = j["object"]["input_data"] dic = {} for each_answer in j["object"]["probabilities"]: dic[each_answer[0]] = each_answer[1] input_dictionary["probability"] = dic big_array.append(input_dictionary) print("Wrting to file >> ", input_dictionary) fh.write(str(big_array))
def __init__(self, ensemble, api=None, max_models=None): if api is None: self.api = BigML(storage=STORAGE) else: self.api = api self.ensemble_id = None if isinstance(ensemble, list): try: models = [get_model_id(model) for model in ensemble] except ValueError: raise ValueError('Failed to verify the list of models. Check ' 'your model id values.') self.distributions = None else: self.ensemble_id = get_ensemble_id(ensemble) ensemble = check_resource(ensemble, self.api.get_ensemble) models = ensemble['object']['models'] self.distributions = ensemble['object'].get('distributions', None) self.model_ids = models self.fields = self.all_model_fields() number_of_models = len(models) if max_models is None: self.models_splits = [models] else: self.models_splits = [models[index:(index + max_models)] for index in range(0, number_of_models, max_models)] if len(self.models_splits) == 1: models = [retrieve_resource(self.api, model_id, query_string=ONLY_MODEL) for model_id in self.models_splits[0]] self.multi_model = MultiModel(models, self.api)
def get_bigml_api(self, *args, **kwargs): try: self.bigml = BigML(*args, **kwargs) except AttributeError: raise NotConfigured(self.BIGML_AUTH_ERRMSG.format(errtype="Missing")) if not self.check_bigml_auth(): raise NotConfigured(self.BIGML_AUTH_ERRMSG.format(errtype="Invalid"))
def teardown_module(): """Operations to be performed after each module """ if os.path.exists('./tmp'): shutil.rmtree('./tmp') if not world.debug: try: world.delete_resources() except Exception, exc: print exc world.api = BigML(world.USERNAME, world.API_KEY, debug=world.debug, organization=BIGML_ORGANIZATION) project_stats = world.api.get_project( \ world.project_id)['object']['stats'] for resource_type, value in project_stats.items(): if value['count'] != 0: # assert False, ("Increment in %s: %s" % (resource_type, value)) print "WARNING: Increment in %s: %s" % (resource_type, value) world.api.delete_project(world.project_id)
def print_connection_info(self): self.USERNAME = os.environ.get('BIGML_USERNAME') self.API_KEY = os.environ.get('BIGML_API_KEY') self.EXTERNAL_CONN = get_env_connection_info() if self.USERNAME is None or self.API_KEY is None: assert False, ("Tests use the BIGML_USERNAME and BIGML_API_KEY" " environment variables to authenticate the" " connection, but they seem to be unset. Please," "set them before testing.") self.api = BigML(self.USERNAME, self.API_KEY, debug=self.debug, short_debug=self.short_debug, storage=(None if not (self.debug or self.short_debug) else "./debug_storage")) print(self.api.connection_info()) print(self.external_connection_info())
def __init__(self, model, api=None): if (isinstance(model, dict) and 'resource' in model and model['resource'] is not None): self.resource_id = model['resource'] else: if api is None: api = BigML(storage=STORAGE) self.resource_id = get_model_id(model) if self.resource_id is None: raise Exception(error_message(model, resource_type='model', method='get')) model = retrieve_model(api, self.resource_id) if ('object' in model and isinstance(model['object'], dict)): model = model['object'] if ('model' in model and isinstance(model['model'], dict)): status = get_status(model) if ('code' in status and status['code'] == FINISHED): if 'model_fields' in model['model']: fields = model['model']['model_fields'] # pagination or exclusion might cause a field not to # be in available fields dict if not all(key in model['model']['fields'] for key in fields.keys()): raise Exception("Some fields are missing" " to generate a local model." " Please, provide a model with" " the complete list of fields.") for field in fields: field_info = model['model']['fields'][field] fields[field]['summary'] = field_info['summary'] fields[field]['name'] = field_info['name'] else: fields = model['model']['fields'] self.inverted_fields = invert_dictionary(fields) self.all_inverted_fields = invert_dictionary(model['model'] ['fields']) self.tree = Tree( model['model']['root'], fields, model['objective_fields']) self.description = model['description'] self.field_importance = model['model'].get('importance', None) if self.field_importance: self.field_importance = [element for element in self.field_importance if element[0] in fields] self.locale = model.get('locale', DEFAULT_LOCALE) else: raise Exception("The model isn't finished yet") else: raise Exception("Cannot create the Model instance. Could not" " find the 'model' key in the resource:\n\n%s" % model)
def setup_module(): """Operations to be performed before each module """ # Project or Organization IDs world.bck_api = world.api world.api = BigML(world.USERNAME, world.API_KEY, debug=world.debug, organization=BIGML_ORGANIZATION) print world.api.connection_info() world.bck_project_id = world.project_id world.project_id = world.api.create_project( \ {"name": world.test_project_name})['resource'] world.api = BigML(world.USERNAME, world.API_KEY, debug=world.debug, project=world.project_id) print world.api.connection_info() world.clear()
def setup_resources(feature): world.USERNAME = os.environ['BIGML_USERNAME'] world.API_KEY = os.environ['BIGML_API_KEY'] assert world.USERNAME is not None assert world.API_KEY is not None world.api = BigML(world.USERNAME, world.API_KEY) world.api_dev_mode = BigML(world.USERNAME, world.API_KEY, dev_mode=True) sources = world.api.list_sources() assert sources['code'] == HTTP_OK world.init_sources_count = sources['meta']['total_count'] datasets = world.api.list_datasets() assert datasets['code'] == HTTP_OK world.init_datasets_count = datasets['meta']['total_count'] models = world.api.list_models() assert models['code'] == HTTP_OK world.init_models_count = models['meta']['total_count'] predictions = world.api.list_predictions() assert predictions['code'] == HTTP_OK world.init_predictions_count = predictions['meta']['total_count'] evaluations = world.api.list_evaluations() assert evaluations['code'] == HTTP_OK world.init_evaluations_count = evaluations['meta']['total_count'] ensembles = world.api.list_ensembles() assert ensembles['code'] == HTTP_OK world.init_ensembles_count = ensembles['meta']['total_count'] batch_predictions = world.api.list_batch_predictions() assert batch_predictions['code'] == HTTP_OK world.init_batch_predictions_count = batch_predictions['meta'][ 'total_count'] world.sources = [] world.datasets = [] world.models = [] world.predictions = [] world.folders = [] world.evaluations = [] world.ensembles = [] world.batch_predictions = []
def __init__(self, model, api=None): if not (isinstance(model, dict) and 'resource' in model and model['resource'] is not None): if api is None: api = BigML(storage=STORAGE) self.resource_id = get_model_id(model) if self.resource_id is None: raise Exception( api.error_message(model, resource_type='model', method='get')) query_string = ONLY_MODEL model = retrieve_model(api, self.resource_id, query_string=query_string) BaseModel.__init__(self, model, api=api) if ('object' in model and isinstance(model['object'], dict)): model = model['object'] if ('model' in model and isinstance(model['model'], dict)): status = get_status(model) if ('code' in status and status['code'] == FINISHED): distribution = model['model']['distribution']['training'] self.ids_map = {} self.tree = Tree(model['model']['root'], self.fields, objective_field=self.objective_field, root_distribution=distribution, parent_id=None, ids_map=self.ids_map) self.terms = {} else: raise Exception("The model isn't finished yet") else: raise Exception("Cannot create the Model instance. Could not" " find the 'model' key in the resource:\n\n%s" % model) if self.tree.regression: try: import numpy import scipy self.regression_ready = True except ImportError: self.regression_ready = False
def __init__(self, model, api=None): if not (isinstance(model, dict) and 'resource' in model and model['resource'] is not None): if api is None: api = BigML(storage=STORAGE) self.resource_id = get_model_id(model) if self.resource_id is None: raise Exception(api.error_message(model, resource_type='model', method='get')) query_string = ONLY_MODEL model = retrieve_model(api, self.resource_id, query_string=query_string) BaseModel.__init__(self, model, api=api) if ('object' in model and isinstance(model['object'], dict)): model = model['object'] if ('model' in model and isinstance(model['model'], dict)): status = get_status(model) if ('code' in status and status['code'] == FINISHED): distribution = model['model']['distribution']['training'] self.ids_map = {} self.tree = Tree( model['model']['root'], self.fields, objective_field=self.objective_field, root_distribution=distribution, parent_id=None, ids_map=self.ids_map) self.terms = {} else: raise Exception("The model isn't finished yet") else: raise Exception("Cannot create the Model instance. Could not" " find the 'model' key in the resource:\n\n%s" % model) if self.tree.regression: try: import numpy import scipy self.regression_ready = True except ImportError: self.regression_ready = False
def print_connection_info(self): self.USERNAME = os.environ.get('BIGML_USERNAME') self.API_KEY = os.environ.get('BIGML_API_KEY') if self.USERNAME is None or self.API_KEY is None: assert False, ("Tests use the BIGML_USERNAME and BIGML_API_KEY" " environment variables to authenticate the" " connection, but they seem to be unset. Please," "set them before testing.") else: assert True self.api = BigML(self.USERNAME, self.API_KEY) print self.api.connection_info() output_dir = "./last_run" for _, subFolders, _ in os.walk("./"): for folder in subFolders: if folder.startswith("scenario"): bigmler_delete(folder, output_dir=output_dir) if os.path.exists(output_dir): shutil.rmtree(output_dir)
def setup_module(): """Setup for the module """ world.bck_api = world.api world.api = BigML(world.USERNAME, world.API_KEY, debug=world.debug, organization=BIGML_ORGANIZATION) print world.api.connection_info() world.bck_project_id = world.project_id world.project_id = None world.clear()
def main(args=sys.argv[1:]): """Parses command-line parameters and calls the actual main function. """ # Process arguments parser = argparse.ArgumentParser( description="JSON PML to DOT", epilog="BigML, Inc") # Model parser.add_argument('--model', type=str, required=True, action='store', dest='model', default=None, help="Model identifier") # Output file parser.add_argument('--output', type=str, action='store', dest='output', default=None, help="Output file") # Parse args args = parser.parse_args(args) # Instantiate BigML API api = BigML() model = api.get_model(args.model) api.ok(model) if args.output: output = open(args.output, 'w') write_tree(model, output) output.close() else: write_tree(model)
def print_connection_info(): world.USERNAME = os.environ.get('BIGML_USERNAME') world.API_KEY = os.environ.get('BIGML_API_KEY') if world.USERNAME is None or world.API_KEY is None: assert False, ("Tests use the BIGML_USERNAME and BIGML_API_KEY" " environment variables to authenticate the" " connection, but they seem to be unset. Please," "set them before testing.") else: assert True world.api = BigML(world.USERNAME, world.API_KEY) print world.api.connection_info()
def authenticate(self,bigml_user,bigml_key): """ initialize the BigML API, do a short test to check authentication """ self.api = BigML(username=bigml_user,api_key=bigml_key) result = self.api.list_sources() if result['code'] == 200: self.authenticated = True else: self.authenticated = False
def setup_module(): """Operations to be performed before each module """ # Project or Organization IDs general_setup_module() world.bck_api = world.api world.api = BigML(world.USERNAME, world.API_KEY, debug=world.debug, project=world.project_id) print world.api.connection_info() world.clear()
def __init__(self, model, api=None): if api is None: api = BigML(storage=STORAGE) resource_id, model = extract_id(model, api) resource_type = get_resource_type(resource_id) kwargs = {"api": api} local_model = COMPONENT_CLASSES[resource_type](model, **kwargs) self.__class__.__bases__ = local_model.__class__.__bases__ for attr, value in local_model.__dict__.items(): setattr(self, attr, value) self.local_model = local_model
def get_fusion_resource(self, fusion): """Extracts the fusion resource info. The fusion argument can be - a path to a local file - an fusion id """ # the string can be a path to a JSON file if isinstance(fusion, basestring): try: path = os.path.dirname(os.path.abspath(fusion)) with open(fusion) as fusion_file: fusion = json.load(fusion_file) self.resource_id = get_fusion_id(fusion) if self.resource_id is None: raise ValueError("The JSON file does not seem" " to contain a valid BigML fusion" " representation.") else: self.api = BigML(storage=path) except IOError: # if it is not a path, it can be an fusion id self.resource_id = get_fusion_id(fusion) if self.resource_id is None: if fusion.find('fusion/') > -1: raise Exception( self.api.error_message(fusion, resource_type='fusion', method='get')) else: raise IOError("Failed to open the expected JSON file" " at %s" % fusion) except ValueError: raise ValueError("Failed to interpret %s." " JSON file expected.") if not isinstance(fusion, dict): fusion = retrieve_resource(self.api, self.resource_id, no_check_fields=False) return fusion
def __init__(self, ensemble, api=None, max_models=None): if api is None: self.api = BigML(storage=STORAGE) else: self.api = api self.ensemble_id = get_ensemble_id(ensemble) ensemble = check_resource(ensemble, self.api.get_ensemble) models = ensemble['object']['models'] self.model_ids = models number_of_models = len(models) if max_models is None: self.models_splits = [models] else: self.models_splits = [models[index:(index + max_models)] for index in range(0, number_of_models, max_models)]
def teardown_module(): """Teardown for the module """ if not world.debug: world.api = BigML(world.USERNAME, world.API_KEY, debug=world.debug, organization=BIGML_ORGANIZATION) world.project_id = world.project["resource"] project_stats = world.api.get_project( \ world.project_id)['object']['stats'] world.api.delete_project(world.project_id) world.project_id = world.bck_project_id world.api = world.bck_api print world.api.connection_info()
def bigml( train_csv, test_csv, result_csv ): api = BigML(dev_mode=True) # train model start_training = timer() source_train = api.create_source(train_csv) dataset_train = api.create_dataset(source_train) model = api.create_model(dataset_train) end_training = timer() print('Training model.') print('Training took %i Seconds.' % (end_training - start_training) ); # test create_model start_test = timer() source_test = api.create_source(test_csv) dataset_test = api.create_dataset(source_test) batch_prediction = api.create_batch_prediction( model, dataset_test, { "name": "census prediction", "all_fields": True, "header": False, "confidence": False } ) # wait until batch processing is finished while api.get_batch_prediction(batch_prediction)['object']['status']['progress'] != 1: print api.get_batch_prediction(batch_prediction)['object']['status']['progress'] time.sleep(1) end_test = timer() print('Testing took %i Seconds' % (end_test - start_test) ); api.download_batch_prediction(batch_prediction['resource'], filename=result_csv) # cleanup api.delete_source(source_train) api.delete_source(source_test) api.delete_dataset(dataset_train) api.delete_dataset(dataset_test) api.delete_model(model)
def __init__(self, time_series, api=None): self.resource_id = None self.input_fields = [] self.objective_fields = [] self.all_numeric_objectives = False self.period = 1 self.ets_models = {} self.error = None self.damped_trend = None self.seasonality = None self.trend = None self.time_range = {} self.field_parameters = {} self._forecast = [] # checks whether the information needed for local predictions is in # the first argument if isinstance(time_series, dict) and \ not check_model_fields(time_series): # if the fields used by the logistic regression are not # available, use only ID to retrieve it again time_series = get_time_series_id( \ time_series) self.resource_id = time_series if not (isinstance(time_series, dict) and 'resource' in time_series and time_series['resource'] is not None): if api is None: api = BigML(storage=STORAGE) self.resource_id = get_time_series_id(time_series) if self.resource_id is None: raise Exception( api.error_message(time_series, resource_type='time_series', method='get')) query_string = ONLY_MODEL time_series = retrieve_resource( api, self.resource_id, query_string=query_string) else: self.resource_id = get_time_series_id(time_series) if 'object' in time_series and \ isinstance(time_series['object'], dict): time_series = time_series['object'] try: self.input_fields = time_series.get("input_fields", []) self._forecast = time_series.get("forecast") self.objective_fields = time_series.get( "objective_fields", []) objective_field = time_series['objective_field'] if \ time_series.get('objective_field') else \ time_series['objective_fields'] except KeyError: raise ValueError("Failed to find the time series expected " "JSON structure. Check your arguments.") if 'time_series' in time_series and \ isinstance(time_series['time_series'], dict): status = get_status(time_series) if 'code' in status and status['code'] == FINISHED: time_series_info = time_series['time_series'] fields = time_series_info.get('fields', {}) self.fields = fields if not self.input_fields: self.input_fields = [ \ field_id for field_id, _ in sorted(self.fields.items(), key=lambda x: x[1].get("column_number"))] self.all_numeric_objectives = time_series_info.get( \ 'all_numeric_objectives') self.period = time_series_info.get('period', 1) self.ets_models = time_series_info.get('ets_models', {}) self.error = time_series_info.get('error') self.damped_trend = time_series_info.get('damped_trend') self.seasonality = time_series_info.get('seasonality') self.trend = time_series_info.get('trend') self.time_range = time_series_info.get('time_range') self.field_parameters = time_series_info.get( \ 'field_parameters', {}) objective_id = extract_objective(objective_field) ModelFields.__init__( self, fields, objective_id=objective_id) else: raise Exception("The time series isn't finished yet") else: raise Exception("Cannot create the TimeSeries instance." " Could not find the 'time_series' key" " in the resource:\n\n%s" % time_series)
from bigml.api import BigML api = BigML() source1 = api.create_source("iris.csv") api.ok(source1) dataset1 = api.create_dataset(source1, \ {'name': u'iris'}) api.ok(dataset1) model1 = api.create_model(dataset1, \ {'name': u'iris'}) api.ok(model1) prediction1 = api.create_prediction(model1, \ {u'petal length': 0.5}, \ {'name': u'my_prediction_name'}) api.ok(prediction1)
from bigml.api import BigML api = BigML() source1_file = "iris.csv" args = \ {'fields': {'000000': {'name': 'sepal length', 'optype': 'numeric'}, '000001': {'name': 'sepal width', 'optype': 'numeric'}, '000002': {'name': 'petal length', 'optype': 'numeric'}, '000003': {'name': 'petal width', 'optype': 'numeric'}, '000004': {'name': 'species', 'optype': 'categorical', 'term_analysis': {'enabled': True}}}, } source2 = api.create_source(source1_file, args) api.ok(source2) args = \ {'objective_field': {'id': '000004'}, } dataset1 = api.create_dataset(source2, args) api.ok(dataset1) args = \ {'all_fields': False, 'new_fields': [{'field': '(all-but "000001")', 'names': ['sepal length', 'petal length', 'petal width', 'species']}, {'field': '2', 'names': ['new']}], 'objective_field': {'id': '000004'}, } dataset2 = api.create_dataset(dataset1, args) api.ok(dataset2)
from bigml.api import BigML api = BigML() source1_file = "iris_sp_chars.csv" args = \ {'fields': {'000000': {'name': 'sépal.length', 'optype': 'numeric'}, '000001': {'name': 'sépal&width', 'optype': 'numeric'}, '000002': {'name': 'pétal.length', 'optype': 'numeric'}, '000003': {'name': 'pétal&width\x00', 'optype': 'numeric'}, '000004': {'name': 'spécies', 'optype': 'categorical', 'term_analysis': {'enabled': True}}}, } source2 = api.create_source(source1_file, args) api.ok(source2)
from bigml.api import BigML api = BigML() source1 = api.create_source("iris.csv") api.ok(source1) dataset1 = api.create_dataset(source1) api.ok(dataset1) cluster1 = api.create_cluster(dataset1) api.ok(cluster1) batchcentroid1 = api.create_batch_centroid(cluster1, dataset1, {"name": u"my_batch_centroid_name"}) api.ok(batchcentroid1)
from bigml.api import BigML api = BigML() source1 = api.create_source("iris.csv") api.ok(source1) dataset1 = api.create_dataset(source1) api.ok(dataset1) model1 = api.create_model(dataset1) api.ok(model1) batchprediction1 = api.create_batch_prediction(model1, dataset1, {"name": u"my_batch_prediction_name"}) api.ok(batchprediction1)
def __init__(self, logistic_regression, api=None): self.resource_id = None self.input_fields = [] self.term_forms = {} self.tag_clouds = {} self.term_analysis = {} self.items = {} self.item_analysis = {} self.categories = {} self.coefficients = {} self.data_field_types = {} self.numeric_fields = {} self.bias = None self.missing_numerics = None self.c = None self.eps = None self.lr_normalize = None self.regularization = None if not (isinstance(logistic_regression, dict) and 'resource' in logistic_regression and logistic_regression['resource'] is not None): if api is None: api = BigML(storage=STORAGE) self.resource_id = get_logistic_regression_id(logistic_regression) if self.resource_id is None: raise Exception( api.error_message(logistic_regression, resource_type='logistic_regression', method='get')) query_string = ONLY_MODEL logistic_regression = retrieve_resource( api, self.resource_id, query_string=query_string) else: self.resource_id = get_logistic_regression_id(logistic_regression) if 'object' in logistic_regression and \ isinstance(logistic_regression['object'], dict): logistic_regression = logistic_regression['object'] try: self.input_fields = logistic_regression.get("input_fields", []) self.dataset_field_types = logistic_regression.get( "dataset_field_types", {}) objective_field = logistic_regression['objective_fields'] if \ logistic_regression['objective_fields'] else \ logistic_regression['objective_field'] except KeyError: raise ValueError("Failed to find the logistic regression expected " "JSON structure. Check your arguments.") if 'logistic_regression' in logistic_regression and \ isinstance(logistic_regression['logistic_regression'], dict): status = get_status(logistic_regression) if 'code' in status and status['code'] == FINISHED: logistic_regression_info = logistic_regression[ \ 'logistic_regression'] fields = logistic_regression_info.get('fields', {}) if not self.input_fields: self.input_fields = [ \ field_id for field_id, _ in sorted(self.fields.items(), key=lambda x: x[1].get("column_number"))] self.coefficients.update(logistic_regression_info.get( \ 'coefficients', [])) self.bias = logistic_regression_info.get('bias', 0) self.c = logistic_regression_info.get('c') self.eps = logistic_regression_info.get('eps') self.lr_normalize = logistic_regression_info.get('normalize') self.regularization = logistic_regression_info.get( \ 'regularization') # old models have no such attribute, so we set it to False in # this case self.missing_numerics = logistic_regression_info.get( \ 'missing_numerics', False) objective_id = extract_objective(objective_field) for field_id, field in fields.items(): if field['optype'] == 'text': self.term_forms[field_id] = {} self.term_forms[field_id].update( field['summary']['term_forms']) self.tag_clouds[field_id] = [] self.tag_clouds[field_id] = [tag for [tag, _] in field[ 'summary']['tag_cloud']] self.term_analysis[field_id] = {} self.term_analysis[field_id].update( field['term_analysis']) if field['optype'] == 'items': self.items[field_id] = [] self.items[field_id] = [item for item, _ in \ field['summary']['items']] self.item_analysis[field_id] = {} self.item_analysis[field_id].update( field['item_analysis']) if field['optype'] == 'categorical': self.categories[field_id] = [category for \ [category, _] in field['summary']['categories']] if self.missing_numerics and field['optype'] == 'numeric': self.numeric_fields[field_id] = True ModelFields.__init__( self, fields, objective_id=objective_id) self.map_coefficients() else: raise Exception("The logistic regression isn't finished yet") else: raise Exception("Cannot create the LogisticRegression instance." " Could not find the 'logistic_regression' key" " in the resource:\n\n%s" % logistic_regression)
from bigml.api import BigML api = BigML() source1 = api.create_source("iris.csv") api.ok(source1) dataset1 = api.create_dataset(source1, \ {'name': u'iris'}) api.ok(dataset1) model1 = api.create_model(dataset1, \ {'name': u'iris'}) api.ok(model1) batchprediction1 = api.create_batch_prediction(model1, dataset1, \ {'name': u'my_batch_prediction_name'}) api.ok(batchprediction1)
from bigml.api import BigML api = BigML() source1 = api.create_source("iris.csv") api.ok(source1) dataset1 = api.create_dataset(source1) api.ok(dataset1) cluster1 = api.create_cluster(dataset1) api.ok(cluster1) centroid1 = api.create_centroid( cluster1, {u"petal length": 0.5, u"petal width": 0.5, u"sepal length": 1, u"sepal width": 1, u"species": u"Iris-setosa"}, {"name": u"my_centroid_name"}, ) api.ok(centroid1)
# -*- coding: utf-8 -*- # <nbformat>3.0</nbformat> # <codecell> import numpy as np import pandas as pd from bigml.api import BigML # <codecell> # Create a BigML instance api = BigML() # <codecell> # Create source instance with train dataset train_source = api.create_source('train.csv') # <codecell> # Create a BigML dataset from source instance train_dataset = api.create_dataset(train_source) # <codecell> # Fit a model to the dataset model = api.create_ensemble(train_dataset) # <codecell>
from bigml.api import BigML api = BigML() source1 = api.create_source("iris_sp_chars.csv", \ {'name': 'my_sóurcè_sp_name'}) api.ok(source1) source1 = api.update_source(source1, \ {'fields': {'000000': {'name': 'sépal.length', 'optype': 'numeric'}, '000001': {'name': 'sépal&width', 'optype': 'numeric'}, '000002': {'name': 'pétal.length', 'optype': 'numeric'}, '000003': {'name': 'pétal&width\x00', 'optype': 'numeric'}, '000004': {'name': 'spécies', 'optype': 'categorical'}}}) api.ok(source1)
from bigml.api import BigML api = BigML() source1 = api.create_source("iris.csv") api.ok(source1) dataset1 = api.create_dataset(source1, \ {'name': u'iris dataset'}) api.ok(dataset1) anomaly1 = api.create_anomaly(dataset1, \ {'name': u"iris dataset's anomaly detector"}) api.ok(anomaly1) batchanomalyscore1 = api.create_batch_anomaly_score(anomaly1, dataset1, \ {'name': u"Batch Anomaly Score of iris dataset's anomaly detector with iris dataset", 'output_dataset': True}) api.ok(batchanomalyscore1) dataset2 = api.get_dataset(batchanomalyscore1['object']['output_dataset_resource']) api.ok(dataset2) dataset2 = api.update_dataset(dataset2, \ {'fields': {u'000000': {'name': u'score'}}, 'name': u'my_dataset_from_batch_anomaly_score_name'}) api.ok(dataset2)
from bigml.api import BigML api = BigML() source1_file = "iris.csv" args = \ {u'fields': {u'000000': {u'name': u'sepal length', u'optype': u'numeric'}, u'000001': {u'name': u'sepal width', u'optype': u'numeric'}, u'000002': {u'name': u'petal length', u'optype': u'numeric'}, u'000003': {u'name': u'petal width', u'optype': u'numeric'}, u'000004': {u'name': u'species', u'optype': u'categorical', u'term_analysis': {u'enabled': True}}}} source2 = api.create_source(source1_file, args) api.ok(source2) args = \ {u'objective_field': {u'id': u'000004'}} dataset1 = api.create_dataset(source2, args) api.ok(dataset1) args = \ {u'anomaly_seed': u'bigml', u'seed': u'bigml'} anomaly1 = api.create_anomaly(dataset1, args) api.ok(anomaly1) args = \ {u'fields_map': {u'000000': u'000000', u'000001': u'000001', u'000002': u'000002', u'000003': u'000003', u'000004': u'000004'}, u'output_dataset': True}