def _save_impl(self, pickler): """ Save the model as a directory, which can be loaded with the :py:func:`~graphlab.load_model` method. Parameters ---------- pickler : GLPickler An opened GLPickle archive (Do not close the archive). See Also -------- graphlab.load_model Examples -------- >>> model.save('my_model_file') >>> loaded_model = graphlab.load_model('my_model_file') """ _mt._get_metric_tracker().track( 'toolkit.classifier.nearest_neighbor_classifier.save') state = self._state pickler.dump(state) knn_model = self._knn_model pickler.dump(knn_model)
def list_fields(self): """ List the fields stored in the model, including data, model, and training options. Each field can be queried with the ``get`` method. Returns ------- out : list List of fields queryable with the ``get`` method. See Also -------- get Examples -------- >>> data = graphlab.SFrame('http://s3.amazonaws.com/dato-datasets/regression/houses.csv') >>> data['is_expensive'] = data['price'] > 30000 >>> model = graphlab.svm_classifier.create(data, target='is_expensive', features=['bath', 'bedroom', 'size']) >>> model.list_fields() """ _mt._get_metric_tracker().track( 'toolkit.classifier.svm_classifier.list_fields') return super(_Classifier, self).list_fields()
def get_current_options(self): """ Return a dictionary with the options used to define and create this graph analytics model instance. Returns ------- out : dict Dictionary of options used to train this model. See Also -------- get_default_options, list_fields, get """ _mt._get_metric_tracker().track('toolkit.graph_analytics.get_current_options') dispatch_table = { 'ShortestPathModel': 'sssp_default_options', 'GraphColoringModel': 'graph_coloring_default_options', 'PagerankModel': 'pagerank_default_options', 'ConnectedComponentsModel': 'connected_components_default_options', 'TriangleCountingModel': 'triangle_counting_default_options', 'KcoreModel': 'kcore_default_options' } try: model_options = _main.run(dispatch_table[self.name()], {}) ## for each of the default options, update its current value by querying the model for key in model_options: current_value = self.get(key) model_options[key] = current_value return model_options except: raise RuntimeError('Model %s does not have options' % self.name())
def frequency_count(self, element): """ Returns a sketched estimate of the number of occurrences of a given element. This estimate is based on the count sketch. The element type must be of the same type as the input SArray. Throws an exception if element is of the incorrect type. Parameters ---------- element : val An element of the same type as the SArray. Raises ------ RuntimeError Throws an exception if element is of the incorrect type. Returns ------- out : int An estimate of the number of occurrences of the element. """ _mt._get_metric_tracker().track('sketch.frequency_count') with cython_context(): return int(self.__proxy__.frequency_count(element))
def show(obj, **kwargs): import graphlab.connect as _mt _mt._get_metric_tracker().track('sgraph.show') import graphlab.canvas import graphlab.canvas.inspect import graphlab.canvas.views.sgraph graphlab.canvas.inspect.find_vars(obj) if 'highlight' in kwargs: highlight = kwargs['highlight'] if isinstance(highlight, SArray): # convert to list highlight = list(highlight) kwargs['highlight'] = highlight if isinstance(highlight, list): # convert to dict highlight_color = kwargs[ 'highlight_color'] if 'highlight_color' in kwargs else [] highlight_color = [highlight_color] * len(highlight) highlight = dict(zip(highlight, highlight_color)) kwargs['highlight'] = highlight kwargs['highlight_color'] = highlight_color return graphlab.canvas.show( graphlab.canvas.views.sgraph.SGraphView(obj, params=kwargs))
def _save_impl(self, pickler): """ Save the model as a directory, which can be loaded with the :py:func:`~graphlab.load_model` method. Parameters ---------- pickler : GLPickler An opened GLPickle archive (Do not close the archive). See Also -------- graphlab.load_model Examples -------- >>> model.save('my_model_file') >>> loaded_model = graphlab.load_model('my_model_file') """ _mt._get_metric_tracker().track('toolkit.classifier.nearest_neighbor_classifier.save') state = self._state pickler.dump(state) knn_model = self._knn_model pickler.dump(knn_model)
def dict_key_summary(self): """ Returns the sketch summary for all dictionary keys. This is only valid for sketch object from an SArray of dict type. Dictionary keys are converted to strings and then do the sketch summary. Examples -------- >>> sa = graphlab.SArray([{'I':1, 'love': 2}, {'nature':3, 'beauty':4}]) >>> sa.sketch_summary().dict_key_summary() +------------------+-------+----------+ | item | value | is exact | +------------------+-------+----------+ | Length | 4 | Yes | | # Missing Values | 0 | Yes | | # unique values | 4 | No | +------------------+-------+----------+ Most frequent items: +-------+---+------+--------+--------+ | value | I | love | beauty | nature | +-------+---+------+--------+--------+ | count | 1 | 1 | 1 | 1 | +-------+---+------+--------+--------+ """ _mt._get_metric_tracker().track('sketch.dict_key_summary') with cython_context(): return Sketch(_proxy=self.__proxy__.dict_key_summary())
def predict_row(self, row): """ Use the model to predict sentiment of a single string. Parameters ---------- row : dict A dictionary representing a single row of new observations. Must include keys with the same names as the features used for model training, but does not require a target column. Additional columns are ignored. Returns ------- out : float Predicted sentiment, where smaller values (near 0) indicate negative sentiment and large values (approaching 1) indicate positive sentiment. Examples -------- >>> m = gl.product_sentiment.create(sf, features=['review']) >>> m.predict_row({'review': "I really like this burrito."}) """ _mt._get_metric_tracker().track('{}.predict_row'.format(__name__)) m = self.__proxy__['classifier'] f = self.__proxy__['feature_extractor'] return m.predict(f(row), output_type='probability')[0]
def predict(self, data): """ Use the model to predict sentiment of a document collection. Parameters ---------- data : SFrame Dataset of new observations. Must include columns with the same names as the features used for model training, but does not require a target column. Additional columns are ignored. Returns ------- out : SArray of float Predicted sentiment, where smaller values (near 0) indicate negative sentiment and large values (approaching 1) indicate positive sentiment. Examples -------- >>> import graphlab as gl >>> data = gl.SFrame({'rating': [1, 5], 'text': ['hate it', 'love it']}) >>> m = gl.sentiment_analysis.create(data, 'rating', features=['text']) >>> m.predict(data) """ _mt._get_metric_tracker().track('{}.predict'.format(__name__)) m = self.__proxy__['classifier'] f = self.__proxy__['feature_extractor'] return m.predict(f(data), output_type='probability')
def add_exception(self, e): """ Add a Python exception to show in the UI. Parameters ---------- e : (type, value, traceback) A tuple of exception values as returned by sys.exc_info. """ # don't print errors if the unity_server process is no longer running # this is normal on Python process shutdown try: if (graphlab.connect.main.get_server().proc): print('[ERROR] GraphLab Canvas: %s' % str(e)) except: pass # truncate type, message, stack_trace to a reasonable length # (100 for type/message, 1000 for stack_trace) # so that we don't hit any size limits on librato or mixpanel properties = { 'type': e[0].__name__[:100], 'message': str(e[1])[:100], 'stack_trace': traceback.format_tb(e[2])[:1000] } _mt._get_metric_tracker().track('canvas.unhandled_exception', properties=properties) with self.__var_lock: self.__exceptions.append(properties)
def start(self, num_tolerable_ping_failures=3): properties = dict(product_key=self.product_key) _get_metric_tracker().track('engine-started', value=1, properties=properties, send_sys_info=True) _get_metric_tracker().track('engine-started-remote', value=1) # try to establish a connection to the server. (client_public_key, client_secret_key) = ('', '') if self.public_key != '': (client_public_key, client_secret_key) = get_public_secret_key_pair() try: c = Client([], self.server_addr, num_tolerable_ping_failures, public_key=client_public_key, secret_key=client_secret_key, server_public_key=self.public_key) if self.auth_token: c.add_auth_method_token(self.auth_token) c.start() finally: c.stop()
def set_selected_variable(self, var): """ Marks the variable passed in by name as selected in the UI. Parameters ---------- var : str | unicode | tuple | SFrame | SArray The variable to select (by name or reference). """ name = None if isinstance(var, tuple): # look up by name name = var var = self.__lookup_var(name) else: ref = var if isinstance(var, graphlab.canvas.views.base.BaseView): # use underlying object, not view wrapper ref = var.obj name = self.__find_name(ref) if name is None: # if we can't find the name, add it as anonymous name = var.get_temporary_name() # make sure this variable exists. self.add_variable(name, var) # tracks type of variable added to Canvas _mt._get_metric_tracker().track('canvas.set_selected_variable.%s' % type(var).__name__) with self.__var_lock: self.__selected_var = (name, self.__lookup_var(name))
def list_fields(self): """ List of fields stored in the model. Each of these fields can be queried using the ``get`` function. Returns ------- out : list A list of fields that can be queried using the ``get`` method. See Also -------- get Examples -------- >>> data = graphlab.SFrame('https://static.turi.com/datasets/regression/houses.csv') >>> model = graphlab.linear_regression.create(data, target='price', features=['bath', 'bedroom', 'size']) >>> fields = model.list_fields() """ _mt._get_metric_tracker().track( 'toolkit.regression.linear_regression.list_fields') return super(LinearRegression, self).list_fields()
def get_current_options(self): """ A dictionary describing the options requested during training. Returns ------- out : dict A dictionary with option (name, value) pairs requested during train time. see also -------- get_current_options, list_fields, get Examples -------- >>> data = graphlab.SFrame('https://static.turi.com/datasets/regression/houses.csv') >>> model = graphlab.linear_regression.create(data, target='price', features=['bath', 'bedroom', 'size']) >>> current_options = model.get_current_options() """ _mt._get_metric_tracker().track( 'toolkit.regression.linear_regression.get_options') return super(LinearRegression, self).get_current_options()
def confusion_matrix(targets, predictions): r""" Compute the confusion matrix for classifier predictions. Parameters ---------- targets : SArray Ground truth class labels. predictions : SArray The prediction that corresponds to each target value. This vector must have the same length as ``targets``. Returns ------- out : SFrame An SFrame containing counts for 'target_label', 'predicted_label' and 'count' corresponding to each pair of true and predicted labels. See Also -------- accuracy Examples -------- >>> targets = graphlab.SArray([0, 1, 1, 0]) >>> predictions = graphlab.SArray([0.1, 0.35, 0.7, 0.99]) >>> graphlab.evaluation.confusion_matrix(targets, predictions) """ _mt._get_metric_tracker().track('evaluation.confusion_matrix') _supervised_evaluation_error_checking(targets, predictions) _check_same_type_not_float(targets, predictions) return _graphlab.extensions._supervised_streaming_evaluator(targets, predictions, "confusion_matrix_no_map", {})
def classify(self, data): """ Use the model to classify sentiment of a text collection. Parameters ---------- data : SFrame Dataset of new observations. Must include columns with the same names as the features used for model training, but does not require a target column. Additional columns are ignored. Returns ------- out : SArray of int Predicted sentiment, where 0 indicates negative sentiment and 1 indicates positive sentiment. Examples -------- >>> import graphlab as gl >>> data = gl.SFrame({'rating': [1, 5], 'text': ['hate it', 'love it']}) >>> m = gl.sentiment_analysis.create(data, 'rating', features=['text']) >>> m.predict(data) """ _mt._get_metric_tracker().track('{}.classify'.format(__name__)) m = self.__proxy__['classifier'] f = self.__proxy__['feature_extractor'] return m.classify(f(data))
def get_product_key(file = __default_config_path): """ Returns the product key found in file, which by default is ~/.graphlab/config or in environment variable GRAPHLAB_PRODUCT_KEY. Note: Environment variable takes precedence over config file. @param file optional parameter to specify which file to use for configuration (defaults to ~/.graphlab/config) @return Product key string, or None if not found. """ PRODUCT_KEY_ENV = 'GRAPHLAB_PRODUCT_KEY' if not PRODUCT_KEY_ENV in os.environ: import graphlab.connect as _mt # see if in ~/.graphlab/config config_file = file if (os.path.isfile(config_file)): try: import ConfigParser config = ConfigParser.ConfigParser() config.read(config_file) product_key = config.get(__section, __key) if product_key == -1: raise BaseException() # will fall into except block below else: # set the product key as an environment variable in this session os.environ[PRODUCT_KEY_ENV] = str(product_key).strip('"\'') except: msg = "Unable to parse product key out of %s. Make sure it is defined in the [%s] section, with key name: '%s'" % (config_file, __section, __key) _mt._get_metric_tracker().track('server_launch.config_parser_error') raise KeyError(msg) else: return None return os.environ[PRODUCT_KEY_ENV]
def get(self, field): """ Return the value of a given field. The list of all queryable fields is detailed below, and can be obtained programmatically with the :func:`~graphlab.frequent_pattern_mining.FrequentPatternMiner.list_fields` method. +------------------------+---------------------------------------------+ | Field | Description | +========================+=============================================+ | features | Feature column names | +------------------------+---------------------------------------------+ | frequent_patterns | Most frequent closed itemsets in the | | | training data | +------------------------+---------------------------------------------+ | item | Item column name | +------------------------+---------------------------------------------+ | max_patterns | Maximum number of itemsets to mine | +------------------------+---------------------------------------------+ | min_support | Minimum number of transactions for an | | | itemset to be frequent | +------------------------+---------------------------------------------+ | num_examples | Number of examples (transactions) in the | | | dataset | +------------------------+---------------------------------------------+ | num_features | Number of feature columns | +------------------------+---------------------------------------------+ | num_frequent_patterns | Number of frequent itemsets mined | +------------------------+---------------------------------------------+ | num_items | Number of unique items in the training data | +------------------------+---------------------------------------------+ | training_time | Total time taken to mine the data | +------------------------+---------------------------------------------+ Parameters ---------- field : string Name of the field to be retrieved. Returns ------- out Value of the requested fields. See Also -------- list_fields Examples -------- .. sourcecode:: python >>> model['num_frequent_patterns'] 500 """ _mt._get_metric_tracker().track( 'toolkits.frequent_pattern_mining.get') return self.__proxy__.get(field)
def quantile(self, quantile_val): """ Returns a sketched estimate of the value at a particular quantile between 0.0 and 1.0. The quantile is guaranteed to be accurate within 1%: meaning that if you ask for the 0.55 quantile, the returned value is guaranteed to be between the true 0.54 quantile and the true 0.56 quantile. The quantiles are only defined for numeric arrays and this function will throw an exception if called on a sketch constructed for a non-numeric column. Parameters ---------- quantile_val : float A value between 0.0 and 1.0 inclusive. Values below 0.0 will be interpreted as 0.0. Values above 1.0 will be interpreted as 1.0. Raises ------ RuntimeError If the sarray is a non-numeric type. Returns ------- out : float | str An estimate of the value at a quantile. """ _mt._get_metric_tracker().track('sketch.quantile.%g' % quantile_val) with cython_context(): return self.__proxy__.get_quantile(quantile_val)
def get_current_options(self): """ Return a dictionary with the options used to define and create the current NearestNeighborModel instance. Returns ------- out : dict Dictionary of options used to train the current instance of the NearestNeighborsModel. See Also -------- get_default_options, list_fields, get Examples -------- >>> sf = graphlab.SFrame({'label': range(3), ... 'feature1': [0.98, 0.62, 0.11], ... 'feature2': [0.69, 0.58, 0.36]}) >>> model = graphlab.nearest_neighbors.create(sf, 'label') >>> model.get_current_options() {'distance': 'euclidean', 'leaf_size': 1000} """ _mt._get_metric_tracker().track( 'toolkit.nearest_neighbors.get_current_options') opts = {'model': self.__proxy__, 'model_name': self.__name__} return _graphlab.toolkits._main.run( '_nearest_neighbors.get_current_options', opts)
def __init__(self, array=None, background=False, sub_sketch_keys=[], _proxy=None): """__init__(array) Construct a new Sketch from an SArray. Parameters ---------- array : SArray Array to sketch. background : boolean, optional If true, run the sketch in background. The the state of the sketch may be queried by calling (:func:`~graphlab.Sketch.sketch_ready`) default is False sub_sketch_keys : list The list of sub sketch to calculate, for SArray of dictionary type. key needs to be a string, for SArray of vector(array) type, the key needs to be positive integer """ _mt._get_metric_tracker().track('sketch.init') if (_proxy): self.__proxy__ = _proxy else: self.__proxy__ = UnitySketchProxy(glconnect.get_client()) if not isinstance(array, SArray): raise TypeError("Sketch object can only be constructed from SArrays") self.__proxy__.construct_from_sarray(array.__proxy__, background, sub_sketch_keys)
def create(function, parameter_set): """ Create a DML job. For now, we leverage map_job infrastructure for DML job. The difference is indicated through _job_type parameter. Parameters ---------- function : function Function to be executed, with arguments to pass to this function specified by parameter_set. parameter_set : iterable of dict Each element of the list corresponds to an evaluation of the function with the dictionary argument. """ environment = _gl._distributed_execution_environment.get_distributed_execution_environment() if environment is None: raise RuntimeError( "Please use graphlab.set_distributed_execution_environment() to set distributed execution environment first." ) if len(parameter_set) > environment.get_num_workers(): raise RuntimeError("Length of parameter_set cannot not exceed %d" % environment.get_num_workers()) _get_metric_tracker().track("jobs.dml_job") job = _gl.deploy.map_job._create_map_job( function, parameter_set=parameter_set, environment=environment, _job_type="DML" ) # submit to DMLExecutionEngine directly return environment.run_job(job)
def __set_license_info(product_key, license_info, config_path=__default_config_path): """ Sets the license info provided in file, which by default is ~/.graphlab/config Overwrites any existing product key in that file. Parameters ---------- license_info : str The license info returned from the Turi server. config_path : str, optional Specifies which file to use for configuration (defaults to ~/.graphlab/config) """ import graphlab.connect as _mt try: config = _ConfigParser.ConfigParser() config.read(config_path) if not (config.has_section(__section)): config.add_section(__section) config.set(__section, __key, product_key) config.set(__section, __license, license_info) with open(config_path, 'w') as config_file: config.write(config_file) _mt._get_metric_tracker().track('set_license_info.succeeded') except: _mt._get_metric_tracker().track('set_license_info.config_parser_error') __LOGGER__.warn( 'Unable to write current GraphLab Create license to %s. Ensure that this user account \ has write permission to %s to save the license for offline use.' % (config_path, config_path)) _os.environ['GRAPHLAB_LICENSE_INFO'] = license_info
def __set_license_info(product_key, license_info, config_path=(_os.path.join(_os.path.expanduser("~"), ".graphlab", "config"))): """ Sets the license info provided in file, which by default is ~/.graphlab/config Overwrites any existing product key in that file. Parameters ---------- license_info : str The license info returned from the Dato server. config_path : str, optional Specifies which file to use for configuration (defaults to ~/.graphlab/config) """ import graphlab.connect as _mt try: import ConfigParser config = ConfigParser.ConfigParser() config.read(config_path) if not(config.has_section(__section)): config.add_section(__section) config.set(__section, __key, product_key) config.set(__section, __license, license_info) with open(config_path, 'wb') as config_file: config.write(config_file) _mt._get_metric_tracker().track('set_license_info.succeeded') except: _mt._get_metric_tracker().track('set_license_info.config_parser_error') __LOGGER__.warn('Unable to write current GraphLab Create license to %s. Ensure that this user account has write permission to %s to save the license for offline use.' % (config_path, config_path)) _os.environ['GRAPHLAB_LICENSE_INFO'] = license_info
def connect_odbc(conn_str): """ Create a stateful connection with a database. An ODBC driver manager program (unixODBC) must be installed with one or more functional drivers in order to use this feature. Please see the `User Guide <http://www.graphlab.com/learn/userguide.html#ODBC_Integration>`_ for more details. Parameters ---------- conn_str : str A standard ODBC connection string. Returns ------- out : graphlab.extensions._odbc_connection.unity_odbc_connection Examples -------- >>> db = graphlab.connect_odbc("DSN=my_awesome_dsn;UID=user;PWD=mypassword") """ db = gl.extensions._odbc_connection.unity_odbc_connection() db._construct_from_odbc_conn_str(conn_str) _mt._get_metric_tracker().track('connect_odbc', properties={'dbms_name':db.dbms_name,'dbms_version':db.dbms_version}) return db
def summary(self, output=None): """ Print a summary of the model. The summary includes a description of training data, options, hyper-parameters, and statistics measured during model creation. Examples -------- >>> m.summary() Parameters ---------- output : string, None The type of summary to return. None or 'stdout' : prints directly to stdout 'str' : string of summary 'dict' : a dict with 'sections' and 'section_titles' ordered lists. The entries in the 'sections' list are tuples of the form ('label', 'value'). """ if output is None or output == 'stdout': pass elif (output == 'str'): return self.__repr__() elif output == 'dict': return _toolkit_serialize_summary_struct( self, \ *self._get_summary_struct() ) _mt._get_metric_tracker().track(self.__class__.__module__ + '.summary') try: print self.__repr__() except: return self.__class__.__name__
def list_fields(self): """ List of fields stored in the model. Each of these fields can be queried using the ``get`` function. Returns ------- out : list A list of fields that can be queried using the ``get`` method. See Also -------- get Examples -------- >>> data = graphlab.SFrame('http://s3.amazonaws.com/GraphLab-Datasets/regression/houses.csv') >>> model = graphlab.linear_regression.create(data, target='price', features=['bath', 'bedroom', 'size']) >>> fields = model.list_fields() """ _mt._get_metric_tracker().track( 'toolkit.regression.linear_regression.list_fields') return super(LinearRegression, self).list_fields()
def set_target(target): """ Set the target for GraphLab Canvas view output. By default, view output is set to 'browser'. To change output target, use graphlab.canvas.set_target(target). Specifying 'ipynb' will attempt to render to an output cell in the IPython Notebook. If any other value is specified, the target will not be changed. To set the output target to browser and use GraphLab Canvas as an interactive web application (default): >>> graphlab.canvas.set_target('browser') To set the output target to cells within the IPython Notebook: >>> graphlab.canvas.set_target('ipynb') To disable Canvas output: >>> graphlab.canvas.set_target('none') Parameters ---------- target : "browser" | "ipynb" """ import target as __target global _active_target # track metrics on target _mt._get_metric_tracker().track('canvas.set_target.%s' % target) if target == 'browser' and not isinstance(_active_target, __target.InteractiveTarget): _active_target = __target.InteractiveTarget() elif target == 'ipynb' and not isinstance(_active_target, __target.IPythonTarget): _active_target = __target.IPythonTarget() elif target == 'none' and not isinstance(_active_target, __target.NoneTarget): _active_target = __target.NoneTarget()
def __init__(self, num_hidden_layers, num_hidden_units, input_dimension=1, activation="sigmoid", **kwargs): super(self.__class__, self).__init__() _mt._get_metric_tracker().track("toolkit.deeplearning.MultiLayerPerceptrons") if input_dimension not in (1, 2): raise ValueError("input_dimension must be either 1 or 2") ActivationLayer = _activation_layer_from_string(activation) if num_hidden_layers != len(num_hidden_units): raise ValueError("Length of num_hidden_units must equal to num_hidden_layers") for i in range(0, num_hidden_layers - 1): self._layers.append(layers.FullConnectionLayer(num_hidden_units[i])) self._layers[-1]._set_params(**kwargs) self._layers.append(ActivationLayer()) self._layers[-1]._set_params(**kwargs) # output layer if num_hidden_units[-1] > 0: self._layers.append(layers.FullConnectionLayer(num_hidden_units[-1])) self._layers[-1]._set_params(**kwargs) if num_hidden_units[-1] > 1: self._layers.append(layers.SoftmaxLayer()) # input layer if input_dimension == 2: self._layers.insert(0, layers.FlattenLayer())
def predict(self, dataset): """ Predict the target column of the given dataset. The target column is provided during :func:`~graphlab.boosted_trees_regression.create`. If the target column is in the `dataset` it will be ignored. Parameters ---------- dataset : SFrame A dataset that has the same columns that were used during training. If the target column exists in ``dataset`` it will be ignored while making predictions. Returns ------- out : SArray Predicted target value for each example (i.e. row) in the dataset. See Also ---------- create, predict Examples -------- >>> m.predict(testdata) """ _mt._get_metric_tracker().track('toolkit.regression.boosted_trees_regression.predict') return super(BoostedTreesRegression, self).predict(dataset, output_type='margin');
def __init__( self, num_convolution_layers, kernel_size, num_channels, num_output_units, stride=layers._LayerDefaults.STRIDE_DEFAULT, pooling="max_pooling", **kwargs ): _mt._get_metric_tracker().track("toolkit.deeplearning.ConvolutionNet") super(self.__class__, self).__init__() PoolingLayer = _pooling_layer_from_string(pooling) for n in range(num_convolution_layers): self._layers.append( layers.ConvolutionLayer(kernel_size=kernel_size, num_channels=num_channels, stride=stride) ) self._layers[-1]._set_params(**kwargs) self._layers.append(PoolingLayer(kernel_size=kernel_size, stride=stride)) self._layers[-1]._set_params(**kwargs) self._layers.append(layers.FlattenLayer()) self._layers[-1]._set_params(**kwargs) # output layer if num_output_units > 0: self._layers.append(layers.FullConnectionLayer(num_output_units)) self._layers[-1]._set_params(**kwargs) if num_output_units > 1: self._layers.append(layers.SoftmaxLayer())
def _load_version(cls, unpickler, version): """ A function to load a previously saved SentenceSplitter instance. Parameters ---------- unpickler : GLUnpickler A GLUnpickler file handler. version : int Version number maintained by the class writer. """ _mt._get_metric_tracker().track(cls.__name__ + '.load_version') state, _exclude, _features = unpickler.load() features = state['features'] excluded_features = state['excluded_features'] model = cls.__new__(cls) model._setup() model.__proxy__.update(state) model._exclude = _exclude model._features = _features return model
def fit(self, data): """ Fit a transformer using the SFrame `data`. Parameters ---------- data : SFrame The data used to fit the transformer. Returns ------- self (A fitted version of the object) See Also -------- transform, fit_transform Examples -------- .. sourcecode:: python {examples} """ _raise_error_if_not_sframe(data, "data") _mt._get_metric_tracker().track(self.__class__.__module__ + '.fit') self.__proxy__.fit(data) return self
def set_product_key(product_key, file=(os.path.join(os.path.expanduser("~"), ".graphlab", "config"))): """ Sets the product key provided in file, which by default is ~/.graphlab/config Overwrites any existing product key in that file. Note: Environment variable GRAPHLAB_PRODUCT_KEY takes precedence over the config file and is not affected by this function. Parameters ---------- product_key : str The product key, provided by registration on https://dato.com/register file : str, optional Specifies which file to use for configuration (defaults to ~/.graphlab/config) """ import graphlab.connect as _mt try: import ConfigParser config = ConfigParser.ConfigParser() config.read(file) if not(config.has_section(__section)): config.add_section(__section) config.set(__section, __key, product_key) with open(file, 'wb') as config_file: config.write(config_file) _mt._get_metric_tracker().track('set_product_key.succeeded') except: _mt._get_metric_tracker().track('set_product_key.config_parser_error') raise
def transform(self, data): """ Transform the SFrame `data` using a fitted model. Parameters ---------- data : SFrame The data to be transformed. Returns ------- A transformed SFrame. See Also -------- fit, fit_transform Examples -------- .. sourcecode:: python {examples} """ _raise_error_if_not_sframe(data, "data") _mt._get_metric_tracker().track(self.__class__.__module__ + '.transform') return self.__proxy__.transform(data)
def __init__(self, array=None, background=False, sub_sketch_keys=[], _proxy=None): """__init__(array) Construct a new Sketch from an SArray. Parameters ---------- array : SArray Array to sketch. background : boolean, optional If true, run the sketch in background. The the state of the sketch may be queried by calling (:func:`~graphlab.Sketch.sketch_ready`) default is False sub_sketch_keys : list The list of sub sketch to calculate, for SArray of dictionary type. key needs to be a string, for SArray of vector(array) type, the key needs to be positive integer """ _mt._get_metric_tracker().track('sketch.init') if (_proxy): self.__proxy__ = _proxy else: self.__proxy__ = UnitySketchProxy(glconnect.get_client()) if not isinstance(array, SArray): raise TypeError( "Sketch object can only be constructed from SArrays") self.__proxy__.construct_from_sarray(array.__proxy__, background, sub_sketch_keys)
def fit_transform(self, data): """ First fit a transformer using the SFrame `data` and then return a transformed version of `data`. Parameters ---------- data : SFrame The data used to fit the transformer. The same data is then also transformed. Returns ------- Transformed SFrame. See Also -------- fit, transform Notes ------ - Fit transform modifies self. Examples -------- .. sourcecode:: python {examples} """ _raise_error_if_not_sframe(data, "data") _mt._get_metric_tracker().track(self.__class__.__module__ + '.fit_transform') return self.__proxy__.fit_transform(data)
def save(self, location): """ Save the transformer into a GraphLab archive. The object is saved as a directory which can then be loaded using the :py:func:`~graphlab.load_model` method. Parameters ---------- location : string Target destination for the model. Can be a local path or remote URL. See Also ---------- graphlab.load_model Examples ---------- .. sourcecode:: python >>> model.save('my_model_file') >>> loaded_model = gl.load_model('my_model_file') """ _mt._get_metric_tracker().track(self.__class__.__module__ + '.save') return glconnect.get_unity().save_model(self.__proxy__, _make_internal_url(location), self._get_wrapper())
def connect_odbc(conn_str): """ Create a stateful connection with a database. An ODBC driver manager program (unixODBC) must be installed with one or more functional drivers in order to use this feature. Please see the `User Guide <https://dato.com/learn/userguide/index.html#ODBC_Integration>`_ for more details. Parameters ---------- conn_str : str A standard ODBC connection string. Returns ------- out : graphlab.extensions._odbc_connection.unity_odbc_connection Examples -------- >>> db = graphlab.connect_odbc("DSN=my_awesome_dsn;UID=user;PWD=mypassword") """ db = gl.extensions._odbc_connection.unity_odbc_connection() db._construct_from_odbc_conn_str(conn_str) _mt._get_metric_tracker().track('connect_odbc', properties={ 'dbms_name': db.dbms_name, 'dbms_version': db.dbms_version }) return db
def get(self, field): """Return the value for the queried field. Each of these fields can be queried in one of two ways: >>> out = m['field'] >>> out = m.get('field') # equivalent to previous line Parameters ---------- field : string Name of the field to be retrieved. See Also --------- list_fields Returns ------- out : value The current value of the requested field. """ _mt._get_metric_tracker().track(self.__class__.__module__ + '.get') if field in self.list_fields(): return self.__proxy__.get(field) else: raise KeyError('Field \"%s\" not in model. Available fields are ' '%s.' % (field, ', '.join(self.list_fields())))
def get(self, field): """ Return the value contained in the model's ``field``. Parameters ---------- field : string Name of the field to be retrieved. Returns ------- out Value of the requested field. See Also -------- list_fields """ _mt._get_metric_tracker().track( 'toolkits.anomaly_detection.bayesian_changepoints.get') if field == "scores" and self.__proxy__.get( 'dataset_type') == 'TimeSeries': ts = self.__proxy__.get('scores') return _gl.TimeSeries(ts, index=self.__proxy__.get_index_col_name()) else: return self.__proxy__.get(field)
def dict_key_summary(self): """ Returns the sketch summary for all dictionary keys. This is only valid for sketch object from an SArray of dict type. Dictionary keys are converted to strings and then do the sketch summary. Examples -------- >>> sa = graphlab.SArray([{'I':1, 'love': 2}, {'nature':3, 'beauty':4}]) >>> sa.sketch_summary().dict_key_summary() +------------------+-------+----------+ | item | value | is exact | +------------------+-------+----------+ | Length | 4 | Yes | | # Missing Values | 0 | Yes | | # unique values | 4 | No | +------------------+-------+----------+ Most frequent items: +-------+---+------+--------+--------+ | value | I | love | beauty | nature | +-------+---+------+--------+--------+ | count | 1 | 1 | 1 | 1 | +-------+---+------+--------+--------+ """ _mt._get_metric_tracker().track('sketch.dict_key_summary') with cython_context(): return Sketch(_proxy = self.__proxy__.dict_key_summary())
def accuracy(targets, predictions): r""" Compute the proportion of correct predictions. Parameters ---------- targets : SArray Ground truth class labels. predictions : SArray The prediction that corresponds to each target value. This vector must have the same length as ``targets``. Returns ------- out : float The ratio of the number of correct classifications and the total number of data points. See Also -------- confusion_matrix Examples -------- >>> targets = graphlab.SArray([0, 1, 1, 0]) >>> predictions = graphlab.SArray([0.1, 0.35, 0.7, 0.99]) >>> graphlab.evaluation.accuracy(targets, predictions) """ _mt._get_metric_tracker().track('evaluation.accuracy') _supervised_evaluation_error_checking(targets, predictions) return _graphlab.extensions._supervised_streaming_evaluator(targets, predictions, "accuracy")
def set_product_key(product_key, file=__default_config_path): """ Sets the product key provided in file, which by default is ~/.graphlab/config Overwrites any existing product key in that file. Note: Environment variable GRAPHLAB_PRODUCT_KEY takes precedence over the config file and is not affected by this function. Parameters ---------- product_key : str The product key file : str, optional Specifies which file to use for configuration (defaults to ~/.graphlab/config) """ import graphlab.connect as _mt try: config = _ConfigParser.ConfigParser() config.read(file) if not (config.has_section(__section)): config.add_section(__section) config.set(__section, __key, product_key) with open(file, 'w') as config_file: config.write(config_file) _mt._get_metric_tracker().track('set_product_key.succeeded') except: _mt._get_metric_tracker().track('set_product_key.config_parser_error') __LOGGER__.warn( 'Unable to write GraphLab Create product key to %s. Ensure that this user account \ has write permission to %s to save the product key locally.' % (file, file)) _os.environ['GRAPHLAB_PRODUCT_KEY'] = str(product_key)
def fit(self, data): """ Fit a transformer using the SFrame `data`. Parameters ---------- data : SFrame The data used to fit the transformer. Returns ------- self (A fitted version of the object) See Also -------- transform fit_transform Examples -------- .. sourcecode:: python {examples} """ _raise_error_if_not_sframe(data, "data") _mt._get_metric_tracker().track(self.__class__.__module__ + '.fit') self.__proxy__.fit(data) return self
def get_product_key(file=__default_config_path): """ Returns the product key found in file, which by default is ~/.graphlab/config or in environment variable GRAPHLAB_PRODUCT_KEY. Note: Environment variable takes precedence over config file. @param file optional parameter to specify which file to use for configuration (defaults to ~/.graphlab/config) @return Product key string, or None if not found. """ PRODUCT_KEY_ENV = 'GRAPHLAB_PRODUCT_KEY' if not PRODUCT_KEY_ENV in os.environ: import graphlab.connect as _mt # see if in ~/.graphlab/config config_file = file if (os.path.isfile(config_file)): try: import ConfigParser config = ConfigParser.ConfigParser() config.read(config_file) product_key = config.get(__section, __key) if product_key == -1: raise BaseException() # will fall into except block below else: # set the product key as an environment variable in this session os.environ[PRODUCT_KEY_ENV] = str(product_key).strip('"\'') except: msg = "Unable to parse product key out of %s. Make sure it is defined in the [%s] section, with key name: '%s'" % ( config_file, __section, __key) _mt._get_metric_tracker().track( 'server_launch.config_parser_error') raise KeyError(msg) else: return None return os.environ[PRODUCT_KEY_ENV]
def transform(self, data): """ Transform the SFrame `data` using a fitted model. Parameters ---------- data : SFrame The data to be transformed. Returns ------- A transformed SFrame. See Also -------- transform fit_transform Examples -------- .. sourcecode:: python {examples} """ _raise_error_if_not_sframe(data, "data") _mt._get_metric_tracker().track(self.__class__.__module__ + '.transform') return self.__proxy__.transform(data)
def get_current_options(self): """ Return a dictionary with the options used to define and train the model. Returns ------- out : dict Dictionary with options used to define and train the model. See Also -------- get_default_options, list_fields, get Examples -------- >>> data = graphlab.SFrame('http://s3.amazonaws.com/dato-datasets/regression/houses.csv') >>> data['is_expensive'] = data['price'] > 30000 >>> model = graphlab.svm_classifier.create(data, target='is_expensive', features=['bath', 'bedroom', 'size']) >>> current_options = model.get_current_options() """ _mt._get_metric_tracker().track('toolkit.classifier.svm_classifier.get_current_options') return super(_Classifier, self).get_current_options()
def fit_transform(self, data): """ First fit a transformer using the SFrame `data` and then return a transformed version of `data`. Parameters ---------- data : SFrame The data used to fit the transformer. The same data is then also transformed. Returns ------- Transformed SFrame. See Also -------- fit fit_transform Notes ------ - Fit transform modifies self. Examples -------- .. sourcecode:: python {examples} """ _raise_error_if_not_sframe(data, "data") _mt._get_metric_tracker().track(self.__class__.__module__ + '.fit_transform') return self.__proxy__.fit_transform(data)
def get_current_options(self): """ A dictionary describing the options requested during training. Returns ------- out : dict A dictionary with option (name, value) pairs requested during train time. see also -------- get_current_options, list_fields, get Examples -------- >>> data = graphlab.SFrame('http://s3.amazonaws.com/GraphLab-Datasets/regression/houses.csv') >>> model = graphlab.linear_regression.create(data, target='price', features=['bath', 'bedroom', 'size']) >>> current_options = model.get_current_options() """ _mt._get_metric_tracker().track('toolkit.regression.linear_regression.get_options') return super(LinearRegression, self).get_current_options()
def get_current_options(self): """ Return a dictionary with the options used to define and create the current KmeansModel instance. Returns ------- out : dict Dictionary of options used to train the current instance of the KmeansModel. See Also -------- get_default_options, list_fields, get Examples -------- >>> sf = graphlab.SFrame({'a' : [0.1, 8, 3.5], 'b':[-3, 7.6, 3]}) >>> model = graphlab.kmeans.create(sf, 2) >>> model.get_current_options() {'num_clusters': 2, 'max_iterations': 10} """ _mt._get_metric_tracker().track('toolkit.kmeans.get_current_options') opts = {'model': self.__proxy__, 'model_name': self.__name__} return _graphlab.toolkits._main.run( 'kmeans_get_current_options', opts)
def get(self, field): """Return the value for the queried field. Each of these fields can be queried in one of two ways: >>> out = m['field'] >>> out = m.get('field') # equivalent to previous line Parameters ---------- field : string Name of the field to be retrieved. See Also --------- list_fields Returns ------- out : value The current value of the requested field. """ _mt._get_metric_tracker().track(self.__class__.__module__ + '.get') if field in self.list_fields(): return self.__proxy__.get(field) else: raise KeyError('Field \"%s\" not in model. Available fields are' '%s.' % (field, ', '.join(self.list_fields())))
def set_product_key(product_key, file=(os.path.join(os.path.expanduser("~"), ".graphlab", "config"))): """ Sets the product key provided in file, which by default is ~/.graphlab/config Overwrites any existing product key in that file. Note: Environment variable GRAPHLAB_PRODUCT_KEY takes precedence over the config file and is not affected by this function. Parameters ---------- product_key : str The product key, provided by registration on https://dato.com/register file : str, optional Specifies which file to use for configuration (defaults to ~/.graphlab/config) """ import graphlab.connect as _mt try: import ConfigParser config = ConfigParser.ConfigParser() config.read(file) if not (config.has_section(__section)): config.add_section(__section) config.set(__section, __key, product_key) with open(file, 'wb') as config_file: config.write(config_file) _mt._get_metric_tracker().track('set_product_key.succeeded') except: _mt._get_metric_tracker().track('set_product_key.config_parser_error') raise
def summary(self): """ Display a summary of the TopicModel. Examples -------- >>> docs = graphlab.SArray('http://s3.amazonaws.com/GraphLab-Datasets/nips-text') >>> m = graphlab.topic_model.create(docs) >>> m.summary() Topic Model Data: Vocabulary size: 12375 Settings: Number of topics: 10 alpha: 5.0 beta: 0.1 Iterations: 10 Verbose: False Accessible attributes: m['topics'] An SFrame containing the topics. m['vocabulary'] An SArray containing the topics. Useful methods: m.get_topics() Get the most probable words per topic. m.predict(new_docs) Make predictions for new documents. """ _mt._get_metric_tracker().track('toolkit.text.topic_model.summary') print self.__repr__()
def _save_impl(self, pickler): """ Save the model. The model is saved as a directory which can then be loaded using the :py:func:`~graphlab.load_model` method. Parameters ---------- pickler : GLPickler An opened GLPickle archive (Do not close the archive.) See Also ---------- graphlab.load_model Examples ---------- >>> model.save('my_model_file') >>> loaded_model = graphlab.load_model('my_model_file') """ _mt._get_metric_tracker().track(self.__module__ + '.save_impl') state = self.__proxy__ pickler.dump(state)
def __init__(self, num_convolution_layers, kernel_size, num_channels, num_output_units, stride=layers._LayerDefaults.STRIDE_DEFAULT, pooling='max_pooling', **kwargs): _mt._get_metric_tracker().track('toolkit.deeplearning.ConvolutionNet') super(self.__class__, self).__init__() PoolingLayer = _pooling_layer_from_string(pooling) for n in range(num_convolution_layers): self._layers.append( layers.ConvolutionLayer(kernel_size=kernel_size, num_channels=num_channels, stride=stride)) self._layers[-1]._set_params(**kwargs) self._layers.append( PoolingLayer(kernel_size=kernel_size, stride=stride)) self._layers[-1]._set_params(**kwargs) self._layers.append(layers.FlattenLayer()) self._layers[-1]._set_params(**kwargs) # output layer if (num_output_units > 0): self._layers.append(layers.FullConnectionLayer(num_output_units)) self._layers[-1]._set_params(**kwargs) if (num_output_units > 1): self._layers.append(layers.SoftmaxLayer())