コード例 #1
0
ファイル: views.py プロジェクト: ramram1234/machine-learning
def remove_collection():
    '''

    This router function removes a collection, with respect to a database type.

    @collection, indicates a nosql implementation
    @entity, indicates a sql database

    '''

    if request.method == 'POST':
        # local variables
        response = None
        entity = Entity()
        collection = Collection()

        # programmatic-interface
        if request.get_json():
            r = request.get_json()
            uid = r['uid']
            type = r['type']
            cname = r['collection']

            if (cname and type == 'collection'):
                payload = {'properties.uid': uid}
                response = collection.query(cname, 'drop_collection', payload)

            elif (type == 'entity'):
                response = entity.remove_entity(uid, cname)

        # lastrowid returned must be greater than 0
        if response and response['result']:
            return json.dumps({'response': response['result']})
        else:
            return json.dumps({'response': response})
コード例 #2
0
ファイル: views.py プロジェクト: ramram1234/machine-learning
def document_count():
    '''

    This router function retrieves the number of documents in a specified
    collection.

    '''

    if request.method == 'POST':
        # local variables
        count = None
        collection = Collection()

        # programmatic-interface
        if request.get_json():
            r = request.get_json()
            cname = r['collection']
            count = collection.query(cname, 'count_documents')

        if (
            count and
            count['status'] and
            isinstance(count['result'], (int, long))
        ):
            return json.dumps({'count': count['result']})
        else:
            return json.dumps({'count': -1})
コード例 #3
0
    def save_premodel_dataset(self):
        '''

        This method saves the entire the dataset collection, as a json
        document, into the nosql implementation.

        '''

        # save dataset
        collection = self.premodel_data['properties']['collection']
        collection_adjusted = collection.lower().replace(' ', '_')
        cursor = Collection()
        document = {
            'properties': self.premodel_data['properties'],
            'dataset': self.dataset
        }

        response = cursor.query(collection_adjusted, 'insert_one', document)

        # return result
        if response and response['error']:
            self.list_error.append(response['error'])
            return {'result': None, 'error': response['error']}

        elif response and response['result']:
            return {'result': response['result'], 'error': None}

        else:
            return {'result': None, 'error': 'no dataset provided'}
コード例 #4
0
ファイル: base_data.py プロジェクト: Vitao18/machine-learning
    def save_premodel_dataset(self):
        '''

        This method saves the entire the dataset collection, as a json
        document, into the nosql implementation.

        '''

        # local variables
        entity = Entity()
        cursor = Collection()
        collection = self.premodel_data['properties']['collection']
        collection_adjusted = collection.lower().replace(' ', '_')
        collection_count = entity.get_collection_count(self.uid)
        document_count = cursor.query(collection_adjusted, 'count_documents')

        # enfore collection limit: oldest collection name is obtained from the
        #     sql database. Then, the corresponding collection (i.e. target) is
        #     removed from the nosql database.
        if (not self.uid and collection_count
                and collection_count['result'] >= self.max_collection
                and collection_adjusted):
            target = entity.get_collections(self.uid)['result'][0]
            cursor.query(target, 'drop_collection')
            entity.remove_entity(self.uid, target)
            collection_count = entity.get_collection_count(self.uid)
            document_count = cursor.query(collection_adjusted,
                                          'count_documents')

        # save dataset
        if (collection_adjusted and collection_count
                and collection_count['result'] < self.max_collection
                and document_count
                and document_count['result'] < self.max_document):
            current_utc = datetime.datetime.utcnow().strftime(
                "%Y-%m-%dT%H:%M:%S")
            self.premodel_data['properties']['datetime_saved'] = current_utc
            self.premodel_data['properties']['uid'] = self.uid
            document = {
                'properties': self.premodel_data['properties'],
                'dataset': self.dataset
            }

            response = cursor.query(collection_adjusted, 'insert_one',
                                    document)

        else:
            response = None

        # return result
        if response and response['error']:
            self.list_error.append(response['error'])
            return {'result': None, 'error': response['error']}

        elif response and response['result']:
            return {'result': response['result'], 'error': None}

        else:
            return {'result': None, 'error': 'no dataset provided'}
コード例 #5
0
    def save_entity(self, session_type, session_id):
        '''

        This method overrides the identical method from the inherited
        superclass, 'BaseData'. Specifically, this method updates an
        existing entity within the corresponding database table,
        'tbl_dataset_entity'.

        @session_id, is synonymous to 'entity_id', and provides context to
            update 'modified_xx' columns within the 'tbl_dataset_entity'
            database table.

        '''

        # local variables
        db_return = None
        entity = Entity()
        cursor = Collection()
        premodel_settings = self.premodel_data['properties']
        collection = premodel_settings['collection']
        collection_adjusted = collection.lower().replace(' ', '_')
        collection_count = entity.get_collection_count(self.uid)
        document_count = cursor.query(collection_adjusted, 'count_documents')

        # define entity properties
        premodel_entity = {
            'title': premodel_settings.get('session_name', None),
            'uid': self.uid,
            'id_entity': session_id,
        }

        # store entity values in database
        if (collection_adjusted and collection_count
                and collection_count['result'] < self.max_collection
                and document_count
                and document_count['result'] < self.max_document):
            db_save = Entity(premodel_entity, session_type)
            db_return = db_save.save()

            if db_return and db_return['status']:
                return {'status': True, 'error': None}

            else:
                self.list_error.append(db_return['error'])
                return {'status': False, 'error': self.list_error}

        else:
            return {'status': True, 'error': None}
コード例 #6
0
ファイル: sv.py プロジェクト: ramram1234/machine-learning
def generate(model, kernel_type, collection, payload, list_error):
    '''

    This method generates an sv (i.e. svm, or svr) model using feature data,
    retrieved from the database. The generated model, is then stored within the
    NoSQL datastore.

    @grouped_features, a matrix of observations, where each nested vector,
        or python list, is a collection of features within the containing
        observation.
    @encoded_labels, observation labels (dependent variable labels),
        encoded into a unique integer representation.

    '''

    # local variables
    sorted_labels = False
    label_encoder = preprocessing.LabelEncoder()
    list_model_type = current_app.config.get('MODEL_TYPE')
    collection_adjusted = collection.lower().replace(' ', '_')
    cursor = Collection()

    # get datasets
    datasets = cursor.query(collection_adjusted, 'aggregate', payload)

    # restructure dataset into arrays
    observation_labels = []
    grouped_features = []

    for dataset in datasets['result']:
        for observation in dataset['dataset']:
            indep_variables = observation['independent-variables']

            for features in indep_variables:
                # svm case
                if model == list_model_type[0]:
                    observation_labels.append(
                        observation['dependent-variable'])
                    sorted_features = [v for k, v in sorted(features.items())]

                # svr case
                elif model == list_model_type[1]:
                    observation_labels.append(
                        float(observation['dependent-variable']))
                    sorted_features = [
                        float(v) for k, v in sorted(features.items())
                    ]

                grouped_features.append(sorted_features)

                if not sorted_labels:
                    sorted_labels = [k for k, v in sorted(features.items())]

    # generate svm model
    if model == list_model_type[0]:
        # convert observation labels to a unique integer representation
        label_encoder = preprocessing.LabelEncoder()
        label_encoder.fit(observation_labels)
        encoded_labels = label_encoder.transform(observation_labels)

        # create model
        clf = svm.SVC(kernel=kernel_type, probability=True)

        # cache encoded labels
        Model(label_encoder).cache(model + '_labels', collection_adjusted)

        # fit model
        clf.fit(grouped_features, encoded_labels)

    # generate svr model
    elif model == list_model_type[1]:
        # create model
        clf = svm.SVR(kernel=kernel_type)

        # fit model
        clf.fit(grouped_features, observation_labels)

        # compute, and cache coefficient of determination
        r2 = clf.score(grouped_features, observation_labels)
        Hset().cache(model + '_r2', collection_adjusted, r2)

    # cache model
    Model(clf).cache(model + '_model', collection_adjusted)

    # cache feature labels, with respect to given collection
    Hset().cache(model + '_feature_labels', collection,
                 json.dumps(sorted_labels))

    # return error(s) if exists
    return {'error': list_error}
コード例 #7
0
    def save_entity(self, session_type, id_entity=None):
        '''

        This method overrides the identical method from the inherited
        superclass, 'BaseData'. Specifically, this method updates an
        existing entity within the corresponding database table,
        'tbl_dataset_entity'.

        @session_id, is synonymous to 'entity_id', and provides context to
            update 'modified_xx' columns within the 'tbl_dataset_entity'
            database table.

        @numeric_model_type, list indices begin at 0, and needs to be corrected
            by adding 1. This allows the numeric representation of the
            'model_type' to relate to another database table, which maps
            integer values with the corresponding 'model_type' name. The
            integer column of the mapping table begins at 1.

        '''

        # local variables
        db_return = None
        entity = Entity()
        cursor = Collection()
        premodel_settings = self.premodel_data['properties']
        collection = premodel_settings['collection']
        collection_adjusted = collection.lower().replace(' ', '_')
        collection_count = entity.get_collection_count(self.uid)
        document_count = cursor.query(collection_adjusted, 'count_documents')

        # assign numerical representation
        numeric_model_type = self.list_model_type.index(self.model_type) + 1

        # define entity properties
        premodel_entity = {
            'title': premodel_settings.get('session_name', None),
            'collection': collection,
            'model_type': numeric_model_type,
            'uid': self.uid,
        }

        # store entity values in database
        if (
            collection_adjusted and
            collection_count and
            collection_count['result'] < self.max_collection and
            document_count and
            document_count['result'] < self.max_document
        ):
            entity = Entity(premodel_entity, session_type)
            db_return = entity.save()

        # return
        if db_return and db_return['error']:
            self.list_error.append(db_return['error'])
            return {'status': False, 'error': self.list_error}

        elif db_return and db_return['status']:
            return {'status': True, 'error': None, 'id': db_return['id']}

        else:
            return {'status': True, 'error': 'Entity was not saved', 'id': None}