Python MongoClient.delete_many Exemples, pymongo.MongoClient.delete_many Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : test_api.py Projet : igormcsouza/full-stack-todo

def _setup(samples: bool = True):
    try:
        collection = MongoClient(getenv('MONGO_URI'))['todos']['main']
        collection.delete_many({})
    except Exception as e:
        print("There was an error Setting Up:", e)

    if samples:
        todos = [
            {
                'task': 'Finish This',
                'when': datetime.timestamp(datetime.now()),
                'by': 'me',
                'done': False
            },
            {
                'task': 'Goto the Supermarket',
                'when': datetime.timestamp(datetime.now()),
                'by': 'me',
                'done': True
            },
            {
                'task': 'Implement from Database',
                'when': datetime.timestamp(datetime.now()),
                'by': 'someone',
                'done': False
            },
        ]

        r = collection.insert_many(todos)

    return collection

Exemple #2

0

Afficher le fichier

Fichier : cache.py Projet : Nixon-/Matsuo

class DatabaseCache:
    def __init__(self,
                 database_name="Matsuo",
                 collection_name="ImageCache",
                 expiration_time=datetime.timedelta(minutes=10)):
        self.client = MongoClient()[database_name][collection_name]
        self.expiration_delta = expiration_time
        if self.expiration_delta is not None:
            self.client.create_index(
                [(DatabaseCacheItem.expiration_index, pymongo.ASCENDING)],
                expireAfterSeconds=0)
            self.client.create_index([(DatabaseCacheItem.expiration_index,
                                       pymongo.TEXT)])

    def add_item(self, key, data):
        item = DatabaseCacheItem(
            key, bson.binary.Binary(data.read()),
            datetime.datetime.now() + self.expiration_delta)
        self.client.update_one(item.get_id(),
                               update=item.get_update_form(),
                               upsert=True)

    def get_item(self, key):
        element = self.client.find_one({DatabaseCacheItem.filename_key: key})
        return element[DatabaseCacheItem.data_key] if element else None

    def remove_item(self, key):
        self.client.delete_many({DatabaseCacheItem.filename_key: key})

Exemple #3

0

Afficher le fichier

def setup():
    load_dotenv()

    try:
        collection = MongoClient(getenv('MONGO_URI'))['todos']['test']
        collection.delete_many({})
    except Exception as e:
        print("There was an error Setting Up:", e)

Exemple #4

0

Afficher le fichier

def teardown():
    load_dotenv()

    try:
        collection = MongoClient(getenv('MONGO_URI'))['todos']['test']
        collection.delete_many({})
    except Exception as e:
        print("There was an error Tearing Down:", e)

Exemple #5

0

Afficher le fichier

Fichier : db.py Projet : alextanton/pythonClient

class DB:
    def __init__(self):
        self.collection = MongoClient().local.connections

    def REMOVEALL(self):
        self.collection.delete_many({})

    def remove(self, connection, field):
        if ("ip" in field):
            self.collection.delete_many({"ip": connection.ip})
        elif ("hostname" in field):
            self.collection.delete_many({"hostname": connection.hostname})
        else:
            return False
        return True

    def insert(self, connection):
        data = json.dumps(connection.socket, -1)
        self.collection.insert_one({
            "ip": connection.ip,
            "hostname": connection.hostname,
            "uniq": str(connection.unique)
        })

    def getAllConnectionsPrint(self):
        darr = []
        docs = self.collection.find()
        for doc in docs:
            darr.append(doc)
        return darr

    def getCollection(self):
        return self.collection

Exemple #6

0

Afficher le fichier

Fichier : dialogue.py Projet : Kumo-YZX/kumoRail

class diaDb(object):
    def __init__(self, address=config.databaseIp, port=config.databasePort):
        from pymongo import MongoClient
        self.__diaset = MongoClient(address, port).diadb.diaset
        print 'diadb init done'

    def write(self, user, word, reply):
        import datetime
        self.__diaset.insert_one({
            "time": datetime.datetime.now(),
            "user": user,
            "word": word,
            "reply": reply
        })
        return 1

    def read(self, startTime, endTime):
        res = self.__diaset.find({"time": {"$gt": startTime, "$lt": endTime}})
        if res.count():
            return 1, res
        else:
            return 0, []

    def delete(self, user):
        if user == '':
            deleteObj = self.__diaset.delete_many({})
            return deleteObj.deleted_count
        else:
            deleteObj = self.__diaset.delete_many({})
            return deleteObj.deleted_count

    def all(self):
        res = self.__diaset.find({})
        if res.count():
            return 1, res
        else:
            return 0, []

Exemple #7

0

Afficher le fichier

        segments_37[line[0]] = line[1:]

# Generate new cnv collections by copying orignals and updating coordinates
cnv_37 = []
for sample in collection_in.find({},{'_id':0}):
    key = sample['id']
    if key in segments_37:
        sample['start'] = int(segments_37[key][1])
        sample['end'] = int(segments_37[key][2])
        sample['variantset_id'] = 'AM_VS_GRCH37'
        cnv_37.append(sample)
    

# write to db
collection_out = MongoClient()['arraymap_ga4gh']['variants_cnv_grch37']
collection_out.delete_many({})
collection_out.insert_many(cnv_37)


##########################################
##### Read in lifted file of grch38 ######
##########################################
segments_38 = {}
with open('/Users/bogao/DataFiles/tmp/segments_38.txt', 'r') as fi:
    next(fi)
    for line in fi:
        line = line.strip().split('\t')
        segments_38[line[0]] = line[1:]
        

# Generate new cnv collections by copying orignals and updating coordinates

Exemple #8

0

Afficher le fichier

class Server(object):
    def __init__(self):
        self.db = MongoClient().data.service
        self.classifier = MyRandomForest({}, {}, DATA_MODEL_PATH)
        self.classifier.train([])

    ################ Stuff for use in this file ################

    def _create_column(self,
                       column,
                       type_id,
                       column_name,
                       source_name,
                       model,
                       force=False):
        """
        Create a column in a semantic type and return the column's id if it was created successfully.

        Notes: If the column already exists and force is not set to true, a 409 will be returned and no data will be modified.

        :param type_id:     Id of the semantic type this column belongs to
        :param column_name: Name of the column to be created
        :param source_name: Name of the source of the column to be created
        :param model:       Model of the column to be created
        :param data:        Data which will be added to the column on creation
        :param force:       Force create the column, if this is true and the column exists the old column will be deleted (with all of its data) before creation
        :return: The id of the new column and a response code of 201 if the creation was successful, otherwise it will be an error message with the appropriate error code
        """
        column_id = get_column_id(type_id, column_name, source_name, model)
        db_body = {
            ID: column_id,
            DATA_TYPE: DATA_TYPE_COLUMN,
            TYPE_ID: type_id,
            COLUMN_NAME: column_name,
            SOURCE_NAME: source_name,
            MODEL: model
        }
        if self.db.find_one(db_body):
            if force:
                self.db.delete_many(db_body)
            else:
                return "Column already exists", 409
        db_body.update(column.to_json())
        self.db.insert_one(db_body)
        return column_id, 201

    def _predict_column(self, column_name, source_names, data):
        """
        Predicts the semantic type of a column.

        :param column_name:  Name of the column
        :param source_names: List of source names
        :param data:         The data to predict based opon
        :return: A list of dictionaries which each contain the semantic type and confidence score
        """
        att = Column(column_name, source_names[0])

        # print(data)
        for value in data:
            att.add_value(value)
        att.semantic_type = "to_predict"
        att.prepare_data()
        return att.predict_type(
            searcher.search_types_data(INDEX_NAME, source_names),
            searcher.search_similar_text_data(INDEX_NAME, att.value_text,
                                              source_names), self.classifier)

    def _update_bulk_add_model(self, model, column_model):
        """
        Updates the bulk add model in the db and also returns it.

        :param model:        The current bulk add model
        :param column_model: The model of the columns which are being updated against
        :return: The updated bulk add model
        """
        for n in model[BAC_GRAPH][BAC_NODES]:
            if n.get(BAC_COLUMN_NAME):
                if n[BAC_COLUMN_NAME] == BAC_COLUMN_NAME_FILE_NAME:
                    continue
                column_id = get_column_id(
                    get_type_id(
                        n[BAC_USER_SEMANTIC_TYPES][0][BAC_CLASS][BAC_URI],
                        n[BAC_USER_SEMANTIC_TYPES][0][BAC_PROPERTY][BAC_URI]),
                    n[BAC_COLUMN_NAME], model[BAC_NAME], column_model)
                prediction = self._predict_column(
                    n[BAC_COLUMN_NAME], [model[BAC_NAME]],
                    self.db.find_one({
                        DATA_TYPE: DATA_TYPE_COLUMN,
                        ID: column_id
                    })[DATA])
                n[BAC_LEARNED_SEMANTIC_TYPES] = []
                for t in prediction:
                    type_info = decode_type_id(t[SL_SEMANTIC_TYPE])
                    od = collections.OrderedDict()
                    od[BAC_CLASS] = {BAC_URI: type_info[0]}
                    od[BAC_PROPERTY] = {BAC_URI: type_info[1]}
                    od[BAC_CONFIDENCE_SCORE] = t[SL_CONFIDENCE_SCORE]
                    n[BAC_LEARNED_SEMANTIC_TYPES].append(od)
        self.db.update_one({
            DATA_TYPE: DATA_TYPE_MODEL,
            ID: model[BAC_ID]
        }, {"$set": {
            BULK_ADD_MODEL_DATA: model
        }})
        return model

    ################ Predict ################

    def predict_post(self,
                     data,
                     namespaces=None,
                     column_names=None,
                     source_names=None,
                     models=None):
        """
        Predicts the semantic type of the given data.

        :param namespaces:   List of allowed namespaces
        :param column_names: List of allowed column names
        :param source_names: List of allowed source names
        :param models:       List of allowed column models
        :param data:         List of the data values to predict.
        :return: A return message (if it is successful this will be a list of the predicted types) and a return code
        """
        data = [x.strip() for x in data]
        data = [x for x in data if x]
        if not data:
            return "Predicting data cannot be empty", 500
        if source_names is None:
            # If no source names are given just use all of the source names in the db
            source_names = set()
            for col in self.db.find({DATA_TYPE: DATA_TYPE_COLUMN}):
                source_names.add(col[SOURCE_NAME])
            source_names = list(source_names)
        if len(source_names) < 1:
            return "You must have columns to be able to predict", 400

        #### Predict the types
        ## Do the actual predicting using the semantic labeler
        predictions = self._predict_column(column_names[0], source_names, data)
        if len(predictions) < 1: return "No matches found", 404

        ## Filter the results
        allowed_ids_namespaces = None
        allowed_ids_models = None
        all_allowed_ids = None
        if namespaces is not None:
            allowed_ids_namespaces = set()
            current_allowed_types = list(
                self.db.find({
                    DATA_TYPE: DATA_TYPE_SEMANTIC_TYPE,
                    NAMESPACE: {
                        "$in": namespaces
                    }
                }))
            for prediction in current_allowed_types:
                allowed_ids_namespaces.add(prediction[ID])
        if models:
            allowed_ids_models = set()
            current_allowed_types = list(
                self.db.find({
                    DATA_TYPE: DATA_TYPE_COLUMN,
                    MODEL: {
                        "$in": models
                    }
                }))
            for c in current_allowed_types:
                allowed_ids_models.add(c[TYPE_ID])
        if allowed_ids_namespaces is not None and allowed_ids_models is not None:
            all_allowed_ids = allowed_ids_namespaces & allowed_ids_models
        elif allowed_ids_namespaces is not None and allowed_ids_models is None:
            all_allowed_ids = allowed_ids_namespaces
        elif allowed_ids_namespaces is None and allowed_ids_models is not None:
            all_allowed_ids = allowed_ids_models
        return_body = []
        for prediction in predictions:
            print(prediction)
            for type_id, exact_score in prediction[1]:
                if all_allowed_ids is not None:
                    if prediction[SL_SEMANTIC_TYPE] not in all_allowed_ids:
                        continue
                obj_dict = {TYPE_ID_PATH: type_id, SCORE: exact_score}
                type_class_property = decode_type_id(type_id)
                obj_dict[CLASS] = type_class_property[0]
                obj_dict[PROPERTY] = type_class_property[1]
                return_body.append(obj_dict)
        return_body.sort(key=lambda x: x[SCORE], reverse=True)
        return json_response(return_body, 200)

    ################ SemanticTypes ################

    def semantic_types_get(self,
                           class_=None,
                           property_=None,
                           namespaces=None,
                           source_names=None,
                           column_names=None,
                           column_ids=None,
                           models=None,
                           return_columns=False,
                           return_column_data=False):
        """
        Returns all of the semantic types (and optionally their columns and columns' data) filtered by the given parameters.

        :param class_:             The class of the semantic types to get
        :param property_:          The property of the semantic types to get
        :param namespaces:         The possible namespaces of the semantic types to get
        :param source_names:       The possible source names of at least one column of a semantic type must have
        :param column_names:       The possible column names of at least one column of a semantic type must have
        :param column_ids:         The possible column ids of at least one column of a semantic type must have
        :param models:             The possible column model of at least one column of a semantic type must have
        :param return_columns:     True if all of the columns (but not the data in the columns) should be returned with the semantic types
        :param return_column_data: True if all of the columns and their data should be returned with the semantic types
        :return: All of the semantic types which fit the following parameters
        """
        # Find all of the type ids that satisfy the class, property, and namespaces
        db_body = {DATA_TYPE: DATA_TYPE_SEMANTIC_TYPE}
        if class_ is not None: db_body[CLASS] = class_
        if property_ is not None: db_body[PROPERTY] = property_
        if namespaces is not None: db_body[NAMESPACE] = {"$in": namespaces}
        possible_result = list(self.db.find(db_body))
        possible_type_ids = set()
        for t in possible_result:
            possible_type_ids.add(t[ID])

        # Find all of the type ids from the columns which satisfy the other parameters
        if source_names or column_names or column_ids or models:
            db_body = {DATA_TYPE: DATA_TYPE_COLUMN}
            if source_names is not None:
                db_body[SOURCE_NAME] = {"$in": source_names}
            if column_names is not None:
                db_body[COLUMN_NAME] = {"$in": column_names}
            if column_ids is not None: db_body[ID] = {"$in": column_ids}
            if models is not None: db_body[MODEL] = {"$in": models}
            other_possible_ids = set()
            for col in self.db.find(db_body):
                other_possible_ids.add(col[TYPE_ID])
            possible_type_ids = possible_type_ids & other_possible_ids

        # Construct the return body
        return_body = []
        for t in possible_result:
            if t[ID] in possible_type_ids:
                o = collections.OrderedDict()
                o[TYPE_ID_PATH] = t[ID]
                o[CLASS] = t[CLASS]
                o[PROPERTY] = t[PROPERTY]
                o[NAMESPACE] = t[NAMESPACE]
                return_body.append(o)

        # Add the column data if requested
        if return_columns:
            db_body = {DATA_TYPE: DATA_TYPE_COLUMN}
            for type_ in return_body:
                db_body[TYPE_ID] = type_[TYPE_ID_PATH]
                type_[COLUMNS] = clean_columns_output(self.db.find(db_body),
                                                      return_column_data)

        if len(return_body) < 1:
            return "No Semantic types matching the given parameters were found", 404
        return json_response(return_body, 200)

    def semantic_types_post_put(self, class_, property_, force=False):
        """
        Creates a semantic type and returns the id if it was successful.

        Notes: If the type already exists and force is not set to true a 409 will be returned and no data will be modified

        :param class_:    The class of the semantic type, note that this must be a valid URL
        :param property_: The property of the semantic type
        :param force:     Force create the semantic type, if this is true and the type already exists the existing type (and all of its columns and data) will be deleted before creation
        :return: The id of the new semantic type and a response code of 201 if the creation was successful, otherwise it will be an error message with the appropriate error code
        """
        class_ = class_.rstrip("/")
        property_ = property_.rstrip("/")

        ## Verify that class is a valid uri and namespace is a valid uri
        namespace = "/".join(class_.replace("#", "/").split("/")[:-1])

        ## Actually add the type
        type_id = get_type_id(class_, property_)
        db_body = {
            ID: type_id,
            DATA_TYPE: DATA_TYPE_SEMANTIC_TYPE,
            CLASS: class_,
            PROPERTY: property_,
            NAMESPACE: namespace
        }
        if self.db.find_one(db_body):
            if force:
                self.db.delete_many({
                    DATA_TYPE: DATA_TYPE_COLUMN,
                    TYPE_ID: type_id
                })
                self.db.delete_many(db_body)
            else:
                return type_id, 409
        self.db.insert_one(db_body)
        return type_id, 201

    def semantic_types_delete(self,
                              class_=None,
                              property_=None,
                              type_ids=None,
                              namespaces=None,
                              source_names=None,
                              column_names=None,
                              column_ids=None,
                              models=None,
                              delete_all=False):
        """
        Deletes all of the semantic types (and all of their columns/data) that fit the given parameters.

        :param class_:       The class of the semantic types to delete
        :param property_:    The property of the semantic types to delete
        :param type_ids:     The possible ids of the semantic types to delete
        :param namespaces:   The possible namespaces of the semantic types to delete
        :param source_names: The possible source names of at least one column of a semantic type must have
        :param column_names: The possible column names of at least one column of a semantic type must have
        :param column_ids:   The possible column ids of at least one column of a semantic type must have
        :param models:       The possible column model of at least one column of a semantic type must have
        :param delete_all:   Set this to true if all semantic types should be deleted
        :return: The amount of semantic types deleted and a 200 if it worked, otherwise and error message with the appropriate code
        """
        if class_ is None and property_ is None and type_ids is None and namespaces is None and source_names is None and column_names is None and column_ids is None and models is None and not delete_all:
            return "To delete all semantic types give deleteAll as true", 400
            return "All " + str(
                self.db.delete_many({
                    DATA_TYPE: {
                        "$in": [DATA_TYPE_SEMANTIC_TYPE, DATA_TYPE_COLUMN]
                    }
                }).deleted_count
            ) + " semantic types and their data were deleted", 200

        print str(class_) + " " + str(property_) + " " + str(
            type_ids) + " " + str(namespaces) + " " + str(
                source_names) + " " + str(column_names) + " " + str(
                    column_ids) + " " + str(models) + " " + str(delete_all)

        # Find the parent semantic types and everything below them of everything which meets column requirements
        type_ids_to_delete = []
        db_body = {DATA_TYPE: DATA_TYPE_COLUMN}
        db_body_id = {DATA_TYPE: DATA_TYPE_SEMANTIC_TYPE}
        if type_ids is not None:
            db_body[TYPE_ID] = {"$in": type_ids}
            db_body_id[ID] = {"$in": type_ids}

        if source_names is not None:
            db_body[SOURCE_NAME] = {"$in": source_names}
        if column_names is not None:
            db_body[COLUMN_NAME] = {"$in": column_names}
        if column_ids is not None:
            db_body[COLUMN_ID_PATH] = {"$in": column_ids}
        if models is not None: db_body[MODEL] = {"$in": models}
        for col in self.db.find(db_body):
            print "col[TYPE_ID] = " + str(col[TYPE_ID])
            if col[TYPE_ID] not in type_ids_to_delete:
                type_ids_to_delete.append(col[TYPE_ID])
        for col in self.db.find(db_body_id):
            print "col[ID] = " + str(col[ID])
            if col[ID] not in type_ids_to_delete:
                type_ids_to_delete.append(col[ID])
        # Find the semantic types which meet the other requirements and delete all types which need to be
        possible_types = []
        db_body = {DATA_TYPE: DATA_TYPE_SEMANTIC_TYPE}
        if class_ is not None: db_body[CLASS] = class_
        if property_ is not None: db_body[PROPERTY] = property_
        if namespaces is not None: db_body[NAMESPACE] = {"$in": namespaces}

        if type_ids is None and source_names is None and column_names is None and column_ids is None and models is None:
            deleted = self.db.delete_many(db_body).deleted_count
        else:
            for t in self.db.find(db_body):
                if t[ID] not in possible_types:
                    possible_types.append(t[ID])
            for t in self.db.find(db_body_id):
                if t[ID] not in possible_types:
                    possible_types.append(t[ID])
            for id_ in type_ids_to_delete:
                if id_ not in possible_types:
                    type_ids_to_delete.remove(id_)
            db_body = {
                DATA_TYPE: DATA_TYPE_COLUMN,
                TYPE_ID: {
                    "$in": type_ids_to_delete
                }
            }
            self.db.delete_many(db_body)
            deleted = self.db.delete_many({
                DATA_TYPE: DATA_TYPE_SEMANTIC_TYPE,
                ID: {
                    "$in": type_ids_to_delete
                }
            }).deleted_count
        if deleted < 1:
            return "No semantic types with the given parameters were found", 404
        return str(
            deleted
        ) + " semantic types matched parameters and were deleted", 200

    ################ SemanticTypesColumns ################

    def semantic_types_columns_get(self,
                                   type_id,
                                   column_ids=None,
                                   column_names=None,
                                   source_names=None,
                                   models=None,
                                   return_column_data=False):
        """
        Returns all of the columns in a semantic type that fit the given parameters.

        :param type_id:            The id of the semantic type
        :param column_ids:         The possible ids of the columns to be returned
        :param column_names:       The possible names of the columns to be returned
        :param source_names:       The possible source names of the columns to be returned
        :param models:             The possible models of the columns to be returned
        :param return_column_data: True if all of the data in the column should be returned with the columns
        :return: All of the columns in the semantic type that fit the given parameters
        """
        print(type_id)
        db_body = {DATA_TYPE: DATA_TYPE_COLUMN, TYPE_ID: type_id}
        if source_names is not None:
            db_body[SOURCE_NAME] = {"$in": source_names}
        if column_names is not None:
            db_body[COLUMN_NAME] = {"$in": column_names}
        if column_ids is not None: db_body[ID] = {"$in": column_ids}
        if models is not None: db_body[MODEL] = {"$in": models}
        result = list(self.db.find(db_body))
        if len(result) < 1:
            return "No columns matching the given parameters were found", 404
        return json_response(clean_columns_output(result, return_column_data),
                             200)

    def semantic_types_columns_post_put(self,
                                        type_id,
                                        column_name,
                                        source_name,
                                        model,
                                        data=[],
                                        force=False):
        """
        Create a column in a semantic type, optionally with data.

        :param type_id:     Id of the semantic type to create the column in
        :param column_name: The name of the column to be created
        :param source_name: The name of the source of the column to be created
        :param model:       The model of the column to be created
        :param data:        The (optional) list of data to put into the column on creation
        :param force:       True if the column should be replaced if it already exists
        :return: The id of the newly created with a 201 if it was successful, otherwise an error message with the appropriate error code
        """
        column = Column(column_name, source_name)
        column.semantic_type = type_id

        #if the size of the training data is MORE than a threshold value, then sample the threshold values randomly
        if (len(data) > SAMPLE_SIZE): data = random.sample(data, SAMPLE_SIZE)

        for value in data:
            column.add_value(value)
        result = self._create_column(column, type_id, column_name, source_name,
                                     model, force)
        return result

    def semantic_types_columns_delete(self,
                                      type_id,
                                      column_ids=None,
                                      column_names=None,
                                      source_names=None,
                                      models=None):
        """
        Delete all of the columns in a semantic type that match the given parameters.

        :param type_id:      The id of the semantic type to delete the columns from
        :param column_ids:   The possible ids of the columns to delete
        :param source_names: The possible names of the columns to delete
        :param column_names: The possible source names of the columns to delete
        :param models:       The possible models of the columns to delete
        :return: The number of columns deteled with a 200 if successful, otherwise an error message with an appropriate error code
        """
        db_body = {DATA_TYPE: DATA_TYPE_COLUMN, TYPE_ID: type_id}
        if source_names is not None:
            db_body[SOURCE_NAME] = {"$in": source_names}
        if column_names is not None:
            db_body[COLUMN_NAME] = {"$in": column_names}
        if column_ids is not None: db_body[ID] = {"$in": column_ids}
        if models is not None: db_body[MODEL] = {"$in": models}
        found_columns = list(self.db.find(db_body))
        if len(found_columns) < 1:
            return "No columns were found with the given parameters", 404
        return str(self.db.delete_many(
            db_body).deleted_count) + " columns deleted successfully", 200

    ################ SemanticTypesColumnData ################

    def semantic_types_column_data_get(self, column_id):
        """
        Returns all of the data in the column

        :param column_id: Id of the column to get the data from
        :return: The column and all of its info
        """
        result = list(
            self.db.find({
                DATA_TYPE: DATA_TYPE_COLUMN,
                ID: column_id
            }))
        if len(result) < 1: return "No column with that id was found", 404
        if len(result) > 1:
            return "More than one column was found with that id", 500
        return json_response(clean_column_output(result[0]), 200)

    def semantic_types_column_data_post_put(self,
                                            column_id,
                                            body,
                                            force=False):
        """
        Add or replace data on an existing column

        Notes: If the column does not exist a 404 will be returned

        :param column_id: Id of the column to add/replace the data of
        :param body:      An array of the new data
        :param force:     True if the current data in the column should be replaced, false if the new data should just be appended
        :return: A conformation with a 201 if it was added successfully or an error message with an appropriate error code if it was not successful
        """

        column_data = self.db.find_one({
            DATA_TYPE: DATA_TYPE_COLUMN,
            ID: column_id
        })
        if column_data.matched_count < 1:
            return "No column with that id was found", 404
        if column_data.matched_count > 1:
            return "More than one column was found with that id", 500

        column = Column(column_data[COLUMN_NAME], column_data[SOURCE_NAME],
                        get_type_from_column_id(column_id))
        if not force:
            column.read_json_to_column(column_data)

        for value in body:
            column.add_value(value)

        data = column.to_json()
        self.db.update_many(data)

        return "Column data updated", 201

    def semantic_types_column_data_delete(self, column_id):
        """
        Delete the data from the column with the given id

        :param column_id: Id of the column to delete the data from
        :return: A deletion conformation with a 200 if successful, otherwise an error message with an appropriate error code
        """
        result = self.db.update_many(
            {
                DATA_TYPE: DATA_TYPE_COLUMN,
                ID: column_id
            }, {"$set": {
                DATA: []
            }})
        if result.matched_count < 1:
            return "No column with that id was found", 404
        if result.matched_count > 1:
            return "More than one column was found with that id", 500
        column = self.db.find_one({DATA_TYPE: DATA_TYPE_COLUMN, ID: column_id})

        self.db.delete_one({
            DATA_TYPE: DATA_TYPE_COLUMN,
            TYPE_ID: get_type_from_column_id(column_id)
        })

        self.db.delete_one({DATA_TYPE: DATA_TYPE_COLUMN, ID: column_id})
        return "Column data deleted", 200

    ################ BulkAddModels ################

    def bulk_add_models_get(self,
                            model_ids=None,
                            model_names=None,
                            model_desc=None,
                            show_all=False,
                            crunch_data=True):
        """
        Returns the current state of all of the bulk add models.

        :param model_ids:   The possible ids of the models to get
        :param model_names: The possible names of the models to get
        :param model_desc:  The possible descriptions of the models to get
        :param show_all:    True if the whole model should be returned
        :param crunch_data: False if learnedSemanticTypes should not be generated and the version in the db should be used instead, note that the data in the db is updated every time a get is run with crunch_data=true
        :return: All of the models that fit the given parameters
        """
        db_body = {DATA_TYPE: DATA_TYPE_MODEL}
        if model_ids is not None: db_body[ID] = {"$in": model_ids}
        if model_names is not None: db_body[NAME] = {"$in": model_names}
        if model_desc is not None: db_body[MODEL_DESC] = model_desc
        db_result = list(self.db.find(db_body))
        if len(db_result) < 1:
            return "No models were found with the given parameters", 404

        # Construct the return body
        return_body = []
        for mod in db_result:
            o = collections.OrderedDict()
            o[MODEL_ID] = mod[ID]
            o[NAME] = mod[NAME]
            o[DESC] = mod[DESC]
            if show_all:
                o[MODEL] = self._update_bulk_add_model(
                    mod[BULK_ADD_MODEL_DATA],
                    mod[MODEL]) if crunch_data else mod[BULK_ADD_MODEL_DATA]
            return_body.append(o)
        return json_response(return_body, 200)

    def bulk_add_models_post(self, model, column_model=DEFAULT_BULK_MODEL):
        """
        Add a bulk add model.

        :param column_model: The model that all of the created columns should have
        :param model:        A dictionary of the model
        :return: Stats of the data added
        """
        #### Assert the required elements exist
        if BAC_ID not in model: return "The given model must have an id", 400
        if BAC_NAME not in model:
            return "The given model must have a name", 400
        if BAC_DESC not in model:
            return "The given model must have a description", 400
        if BAC_GRAPH not in model:
            return "The given model must have a graph", 400
        if BAC_NODES not in model[BAC_GRAPH]:
            return "The given model must have nodes within the graph", 400
        if len(list(self.db.find({ID: model[BAC_ID]}))) > 0:
            return "Model id already exists", 409

        #### Parse and add the model
        # Try to add of the given semantic types and columns
        new_type_count = 0
        new_column_count = 0
        existed_type_count = 0
        existed_column_count = 0
        for n in model[BAC_GRAPH][BAC_NODES]:
            if n.get(BAC_USER_SEMANTIC_TYPES):
                for ust in n[BAC_USER_SEMANTIC_TYPES]:
                    semantic_status = self.semantic_types_post_put(
                        ust[BAC_CLASS][BAC_URI], ust[BAC_PROPERTY][BAC_URI],
                        False)
                    if semantic_status[1] == 201:
                        new_type_count += 1
                    elif semantic_status[1] == 409:
                        existed_type_count += 1
                    elif semantic_status[1] == 400:
                        return semantic_status
                    else:
                        return "Error occurred while adding semantic type: " + str(
                            ust), 500
                    column_status = self._create_column(
                        get_type_id(ust[BAC_CLASS][BAC_URI],
                                    ust[BAC_PROPERTY][BAC_URI]),
                        n[BAC_COLUMN_NAME], model[BAC_NAME], column_model)
                    if column_status[1] == 201:
                        new_column_count += 1
                    elif column_status[1] == 409:
                        existed_column_count += 1
                    elif column_status[1] == 400:
                        return column_status
                    else:
                        return "Error occurred while adding column for semantic type: " + str(
                            ust), 500

        # Nothing bad happened when creating the semantic types and columns, so add the model to the DB
        self.db.insert_one({
            DATA_TYPE: DATA_TYPE_MODEL,
            ID: model["id"],
            NAME: model[BAC_NAME],
            DESC: model["description"],
            MODEL: column_model,
            BULK_ADD_MODEL_DATA: model
        })
        return "Model and columns added, " + str(new_type_count) + " semantic types created, " + \
               str(existed_type_count) + " semantic types already existed, " + \
               str(new_column_count) + " columns created, and " + \
               str(existed_column_count) + " columns already existed.", 201

    def bulk_add_models_delete(self,
                               model_ids=None,
                               model_names=None,
                               model_desc=None):
        """
        Delete all of the bulk add models which fit the given parameters

        :param model_ids:   The possible ids of the models to delete
        :param model_names: The possible names of the models to delete
        :param model_desc:  The possible descriptions of the models to delete
        :return: The amount of models deleted with a 200 if successful, otherwise an error message with the appropriate code
        """
        db_body = {DATA_TYPE: DATA_TYPE_MODEL}
        if model_ids is not None:
            db_body[ID] = {"$in": model_ids}
        if model_names is not None:
            db_body[NAME] = {"$in": model_names}
        if model_desc is not None:
            db_body[MODEL_DESC] = model_desc
        deleted_count = self.db.delete_many(db_body).deleted_count

        if deleted_count < 1:
            return "No models were found with the given parameters", 404
        return str(deleted_count) + " models deleted successfully", 200

    ################ BulkAddModelData ################

    def bulk_add_model_data_get(self, model_id, crunch_data):
        """
        Returns the current state of the bulk add model

        :param model_id:    The id of the model to get
        :param crunch_data: False if learnedSemanticTypes should not be generated and the version in the db should be used instead, note that the data in the db is updated every time a get is run with crunch_data=true
        :return: The current state of the bulk add model
        """
        db_result = list(
            self.db.find({
                DATA_TYPE: DATA_TYPE_MODEL,
                ID: model_id
            }))
        if len(db_result) < 1:
            return "A model was not found with the given id", 404
        if len(db_result) > 1:
            return "More than one model was found with the given id", 500
        db_result = db_result[0]
        return json_response(
            self._update_bulk_add_model(db_result[BULK_ADD_MODEL_DATA],
                                        db_result[MODEL])
            if crunch_data else db_result[BULK_ADD_MODEL_DATA], 200)

    def bulk_add_model_data_post(self, model_id, column_model, data):
        """
        Add data to the service with a bulk add model

        :param model_id:     The id of the model to add off of
        :param column_model: The model of the columns being used with that model
        :param data:         The list of dictionaries with all of the data to add
        :return: A conformation message with a 201 if it was successful, otherwise an error message with the appropriate code
        """
        # Get the model and parse the json lines
        model = list(self.db.find({DATA_TYPE: DATA_TYPE_MODEL, ID: model_id}))
        if len(model) < 1:
            return "The given model was not found", 404
        if len(model) > 1:
            return "More than one model was found with the id", 500
        model = model[0][BULK_ADD_MODEL_DATA]
        # Get all of the data in each column
        for n in model[BAC_GRAPH][BAC_NODES]:
            column_data = []
            for line in data:
                if n.get(BAC_COLUMN_NAME):
                    column_data.append(line[n[BAC_COLUMN_NAME]])
            # Add it to the db
            if n.get(BAC_USER_SEMANTIC_TYPES):
                for ust in n[BAC_USER_SEMANTIC_TYPES]:
                    result = self.semantic_types_column_data_post_put(
                        get_column_id(
                            get_type_id(ust[BAC_CLASS][BAC_URI],
                                        ust[BAC_PROPERTY][BAC_URI]),
                            n[BAC_COLUMN_NAME], model[BAC_NAME], column_model),
                        column_data, False)[1]
                    if result == 201:
                        continue
                    elif result == 404:
                        return "A required column was not found", 404
                    else:
                        return "Error occurred while adding data to the column", 500

        return "Data successfully added to columns", 201

Exemple #9

0

Afficher le fichier

    def post(self, file_id):
        new_text = request.get_json()['data']
        files_collection.update_one(
            {'_id': ObjectId(file_id)},
            {'$set': {'file_text': new_text}},
            upsert=True
        )
        return '', 204


api.add_resource(FileServer, '/<string:file_id>')

if __name__ == '__main__':
    if len(sys.argv) == 3:
        if os.environ.get('WERKZEUG_RUN_MAIN') == 'true':
            print('Initing node')
            requests.post(
                server_util.url_builder(DS_ADDR[0], DS_ADDR[1], 'config'),
                json={'ip': sys.argv[1], 'port': sys.argv[2]}
            )

        app.run(debug=True, host=sys.argv[1], port=int(sys.argv[2]))
        requests.delete(
            server_util.url_builder(DS_ADDR[0], DS_ADDR[1], 'config'),
            json={'ip': sys.argv[1], 'port': sys.argv[2]}
        )

        # Catastrophic delete EVERYTHING if node goes down! (Purposeful)
        files_collection.delete_many({})
    else:
        print('Supply an IP and Port')

Exemple #10

0

Afficher le fichier

Fichier : spiderman.py Projet : YotamAlon/Crawl-and-PageRank

from scrapy.spiders import Spider
from scrapy.selector import Selector
from basic_crawler.items import BasicCrawlerItem
from scrapy.http import Request
import re
from pymongo import MongoClient

global db
db = MongoClient().db.links
if not db:
    print('Connected to DB')
res = db.delete_many({})
if res.acknowledged:
    print('Clean successfull')
else:
    print('Clean unsuccessfull')

global visited_links
visited_links = []


class MySpider(Spider):
    name = "basic_crawler"
    allowed_domains = ['math.hmc.edu']
    start_urls = ["https://www.math.hmc.edu/funfacts/"]

    def parse(self, response):
        global db
        global visited_links
        hxs = Selector(response)
        url = response.url

Exemple #11

0

Afficher le fichier

class TestDeviceDatabaseMongoDB(unittest.TestCase):
    def setUp(self):
        self._database = tests_util.get_mongo_database()
        self._direct_database = MongoClient()["Hestia"]["testing"]

    def tearDown(self):
        self._direct_database.delete_many({})

    def test_get_all_devices(self):
        device_data = self._get_device_data()
        device_data["_id"] = str(ObjectId())
        self._direct_database.insert_one(device_data)

        retrieved_devices = self._database.get_all_devices()

        self.assertEqual(1, len(retrieved_devices))

        device_data["_id"] = str(ObjectId())
        self._direct_database.insert_one(device_data)

        retrieved_devices = self._database.get_all_devices()

        self.assertEqual(2, len(retrieved_devices))

    def test_get_device(self):
        device_data = self._get_device_data()
        device_data["_id"] = str(ObjectId())
        self._direct_database.insert_one(device_data)

        device = self._database.get_device(device_data["_id"])

        self.assertEqual(device_data["name"], device.name)
        self.assertIsInstance(device, Device)

    def test_add_device(self):
        device_data = self._get_device_data()
        initial_count = self._direct_database.count()

        self._database.add_device(device_data)

        self.assertEqual(initial_count + 1, self._direct_database.count())

    def test_delete_device(self):
        device_data = self._get_device_data()
        device_data["_id"] = str(ObjectId())
        self._direct_database.insert_one(device_data)
        initial_count = self._direct_database.count()

        self._database.delete_device(device_data["_id"])

        self.assertEqual(initial_count - 1, self._direct_database.count())

    def test_update_field(self):
        device_data = self._get_device_data()
        device_data["_id"] = str(ObjectId())
        self._direct_database.insert_one(device_data)
        new_name = "Hestia"

        self._database.update_field(device_data["_id"], "name", new_name)

        device = self._direct_database.find_one({"_id": device_data["_id"]})
        self.assertEqual(device["name"], new_name)

    def test_get_field(self):
        device_data = self._get_device_data()
        device_data["_id"] = str(ObjectId())
        self._direct_database.insert_one(device_data)

        name = self._database.get_field(device_data["_id"], "name")

        self.assertEqual(device_data["name"], name)

    def test_get_activator_field(self):
        device_data = self._get_device_data()
        device_data["_id"] = str(ObjectId())
        self._direct_database.insert_one(device_data)

        activators = self._direct_database.find_one(
            {"_id": device_data["_id"]})["activators"]
        act_id = list(activators.keys())[0]

        activator_name = self._database.get_activator_field(
            device_data["_id"], act_id, "name")

        real_name = device_data["activators"][act_id]["name"]

        self.assertEqual(real_name, activator_name)

    def test_update_activator_field(self):
        device_data = self._get_device_data()
        device_data["_id"] = str(ObjectId())
        self._direct_database.insert_one(device_data)

        activators = self._direct_database.find_one(
            {"_id": device_data["_id"]})["activators"]
        act_id = list(activators.keys())[0]

        new_name = "new_name"
        self._database.update_activator_field(device_data["_id"], act_id,
                                              "name", new_name)

        device_in_db = self._direct_database.find_one(
            {"_id": device_data["_id"]})
        activator_name_in_db = device_in_db["activators"][act_id]["name"]

        self.assertEqual(activator_name_in_db, new_name)

    def _get_device_data(self):
        device_data = {
            "module":
            "plugins.mock.devices.lock.Lock",
            "class":
            "Lock",
            "type":
            "Lock",
            "name":
            "TestDevice",
            "options": {
                "bridge_ip": "127.0.2.1",
                "bridge_port": 90
            },
            "activators": [{
                "module": "plugins.mock.activators.ActivateLock",
                "rank": 0,
                "class": "ActivateLock",
                "name": "Activate",
                "type": "bool",
                "state": True
            }]
        }
        activators = device_data.pop("activators", None)
        device_data["activators"] = {}
        for activator in activators:
            _id = str(ObjectId())
            device_data["activators"][_id] = activator
        return device_data

Exemple #12

0

Afficher le fichier

class DataBase:
    def __init__(self, name: str, auth_data: dict):
        db_name = auth_data['questionnaire'][name]
        user = auth_data['user']
        password = auth_data['password']

        appeal = f'mongodb+srv://{user}:{password}@cluster0.sonqc.mongodb.net/{db_name}?retryWrites=true&w=majority'

        self.db = MongoClient(appeal)['questionnaire'][db_name]
        self.__lambda_fun()

    def __lambda_fun(self):
        self._get_last_id = lambda: len(list(self.db.find())) - 1
        self.get_list_data = lambda: list(self.db.find())
        self.get_questions_ids = lambda: [el['_id'] for el in self.get_list_data()]
        self.remove_all_data = lambda: self.db.delete_many({})
        self.remove_questions = lambda *ids: [self.db.delete_one({'_id': id_}) for id_ in ids]

        self._remove_arg = lambda question_id, arg: self.db.update_one({
            '_id': question_id
        }, {
            '$pull': {'answers': arg}
        })

    def add(self, data: str, question_id: int = None):
        if question_id is None:
            last_id = self._get_last_id()
            post = {'_id': last_id + 1, 'question': data, 'answers': []}

            self.db.insert_one(post)
            return

        self.db.update_one({'_id': question_id}, {'$push': {'answers': data}})

    def show_all(self, file_name: str):
        rows = self.get_list_data()
        columns = [*rows[0].keys()] if rows else []

        with open(f'{file_name}.csv', 'w', newline='') as file:
            writer = csv.DictWriter(file, delimiter=';', fieldnames=columns)
            writer.writeheader()
            writer.writerows(rows)

    def show_ans(self, question_id: int):
        file_name = str(question_id)

        columns = ['answer_id', 'answer']
        rows = enumerate(self.db.find_one({'_id': question_id})['answers'])

        with open(f'{file_name}.csv', 'w', newline='') as file:
            writer = csv.writer(file, delimiter=';')
            writer.writerow(columns)
            writer.writerows(rows)

    def remove_answers(self, question_id: int, *ids: int):
        questions = self.get_list_data()

        for question in questions:
            if question['_id'] == question_id:
                answers = question['answers']
                break
        else:
            return

        for id_ in ids:
            answer = answers[id_]
            self._remove_arg(question_id, answer)

Exemple #13

0

Afficher le fichier

                  charset='utf8')
    cursor = cnx.cursor(dictionary=True)

    # shop_ids = [12988, 12382, 11077, 12823, 10377, 15081, 2397]
    shop_ids = [17065, 17066]
    sql = 'select s.*, o.tel ' \
          'from f_shop.shop s ' \
          'inner join f_shop.shop_owner o on s.shop_owner_id = o.id ' \
          'where s.id in (%s)' % ','.join(['%s'] * len(shop_ids))
    print(sql)
    cursor.execute(sql, shop_ids)

    mc = MongoClient(host='123.56.117.75', port=27017)['profile']['shop']

    # 清理旧数据
    mc.delete_many({'deprecated.id': {'$in': shop_ids}})
    new_shop_records = [{
        'tel': record['tel'],
        'password': record['password'],
        'name': record['shop_name'],
        'avatar': record['avatar'],
        'status': 'STATUS_INIT',
        'create_time': record['create_time'],
        'accounts': [],
        'loc': {
            'province': record['province'],
            'province_code': record['province_code'],
            'district': record['district'],
            'city': record['city'],
            'address': ifnull(record['address']),
            'street_code': record['street_code'],

Exemple #14

0

Afficher le fichier

Fichier : serverLogic.py Projet : usc-isi-i2/SemanticLabelingService

class Server(object):
    def __init__(self):
        self.db = MongoClient().data.service
        self.classifier = MyRandomForest({}, {}, DATA_MODEL_PATH)
        self.classifier.train([])

    ################ Stuff for use in this file ################

    def _create_column(self, column, type_id, column_name, source_name, model, force=False):
        """
        Create a column in a semantic type and return the column's id if it was created successfully.

        Notes: If the column already exists and force is not set to true, a 409 will be returned and no data will be modified.

        :param type_id:     Id of the semantic type this column belongs to
        :param column_name: Name of the column to be created
        :param source_name: Name of the source of the column to be created
        :param model:       Model of the column to be created
        :param data:        Data which will be added to the column on creation
        :param force:       Force create the column, if this is true and the column exists the old column will be deleted (with all of its data) before creation
        :return: The id of the new column and a response code of 201 if the creation was successful, otherwise it will be an error message with the appropriate error code
        """
        column_id = get_column_id(type_id, column_name, source_name, model)
        db_body = {ID: column_id, DATA_TYPE: DATA_TYPE_COLUMN, TYPE_ID: type_id, COLUMN_NAME: column_name,
                   SOURCE_NAME: source_name, MODEL: model}
        if self.db.find_one(db_body):
            if force:
                self.db.delete_many(db_body)
            else:
                return "Column already exists", 409
        db_body.update(column.to_json())
        self.db.insert_one(db_body)
        return column_id, 201

    def _predict_column(self, column_name, source_names, data):
        """
        Predicts the semantic type of a column.

        :param column_name:  Name of the column
        :param source_names: List of source names
        :param data:         The data to predict based opon
        :return: A list of dictionaries which each contain the semantic type and confidence score
        """
        att = Column(column_name, source_names[0])

        # print(data)
        for value in data:
            att.add_value(value)
        att.semantic_type = "to_predict"
        att.prepare_data()
        return att.predict_type(searcher.search_types_data(INDEX_NAME, source_names), searcher.search_similar_text_data(INDEX_NAME, att.value_text, source_names), self.classifier)

    def _update_bulk_add_model(self, model, column_model):
        """
        Updates the bulk add model in the db and also returns it.

        :param model:        The current bulk add model
        :param column_model: The model of the columns which are being updated against
        :return: The updated bulk add model
        """
        for n in model[BAC_GRAPH][BAC_NODES]:
            if n.get(BAC_COLUMN_NAME):
                if n[BAC_COLUMN_NAME] == BAC_COLUMN_NAME_FILE_NAME:
                    continue
                column_id = get_column_id(get_type_id(n[BAC_USER_SEMANTIC_TYPES][0][BAC_CLASS][BAC_URI],
                                                      n[BAC_USER_SEMANTIC_TYPES][0][BAC_PROPERTY][BAC_URI]),
                                          n[BAC_COLUMN_NAME], model[BAC_NAME], column_model)
                prediction = self._predict_column(n[BAC_COLUMN_NAME], [model[BAC_NAME]],
                                                  self.db.find_one({DATA_TYPE: DATA_TYPE_COLUMN, ID: column_id})[DATA])
                n[BAC_LEARNED_SEMANTIC_TYPES] = []
                for t in prediction:
                    type_info = decode_type_id(t[SL_SEMANTIC_TYPE])
                    od = collections.OrderedDict()
                    od[BAC_CLASS] = {BAC_URI: type_info[0]}
                    od[BAC_PROPERTY] = {BAC_URI: type_info[1]}
                    od[BAC_CONFIDENCE_SCORE] = t[SL_CONFIDENCE_SCORE]
                    n[BAC_LEARNED_SEMANTIC_TYPES].append(od)
        self.db.update_one({DATA_TYPE: DATA_TYPE_MODEL, ID: model[BAC_ID]}, {"$set": {BULK_ADD_MODEL_DATA: model}})
        return model

    ################ Predict ################

    def predict_post(self, data, namespaces=None, column_names=None, source_names=None, models=None):
        """
        Predicts the semantic type of the given data.

        :param namespaces:   List of allowed namespaces
        :param column_names: List of allowed column names
        :param source_names: List of allowed source names
        :param models:       List of allowed column models
        :param data:         List of the data values to predict.
        :return: A return message (if it is successful this will be a list of the predicted types) and a return code
        """
        data = [x.strip() for x in data]
        data = [x for x in data if x]
        if not data:
            return "Predicting data cannot be empty", 500
        if source_names is None:
            # If no source names are given just use all of the source names in the db
            source_names = set()
            for col in self.db.find({DATA_TYPE: DATA_TYPE_COLUMN}):
                source_names.add(col[SOURCE_NAME])
            source_names = list(source_names)
        if len(source_names) < 1: return "You must have columns to be able to predict", 400

        #### Predict the types
        ## Do the actual predicting using the semantic labeler
        predictions = self._predict_column(column_names[0], source_names, data)
        if len(predictions) < 1: return "No matches found", 404

        ## Filter the results
        allowed_ids_namespaces = None
        allowed_ids_models = None
        all_allowed_ids = None
        if namespaces is not None:
            allowed_ids_namespaces = set()
            current_allowed_types = list(
                self.db.find({DATA_TYPE: DATA_TYPE_SEMANTIC_TYPE, NAMESPACE: {"$in": namespaces}}))
            for prediction in current_allowed_types:
                allowed_ids_namespaces.add(prediction[ID])
        if models:
            allowed_ids_models = set()
            current_allowed_types = list(self.db.find({DATA_TYPE: DATA_TYPE_COLUMN, MODEL: {"$in": models}}))
            for c in current_allowed_types:
                allowed_ids_models.add(c[TYPE_ID])
        if allowed_ids_namespaces is not None and allowed_ids_models is not None:
            all_allowed_ids = allowed_ids_namespaces & allowed_ids_models
        elif allowed_ids_namespaces is not None and allowed_ids_models is None:
            all_allowed_ids = allowed_ids_namespaces
        elif allowed_ids_namespaces is None and allowed_ids_models is not None:
            all_allowed_ids = allowed_ids_models
        return_body = []
        for prediction in predictions:
            print(prediction)
            for type_id, exact_score in prediction[1]:
                if all_allowed_ids is not None:
                    if prediction[SL_SEMANTIC_TYPE] not in all_allowed_ids:
                        continue
                obj_dict = {TYPE_ID_PATH: type_id, SCORE: exact_score}
                type_class_property = decode_type_id(type_id)
                obj_dict[CLASS] = type_class_property[0]
                obj_dict[PROPERTY] = type_class_property[1]
                return_body.append(obj_dict)
        return_body.sort(key=lambda x: x[SCORE], reverse=True)
        return json_response(return_body, 200)

    ################ SemanticTypes ################

    def semantic_types_get(self, class_=None, property_=None, namespaces=None, source_names=None, column_names=None,
                           column_ids=None, models=None, return_columns=False, return_column_data=False):
        """
        Returns all of the semantic types (and optionally their columns and columns' data) filtered by the given parameters.

        :param class_:             The class of the semantic types to get
        :param property_:          The property of the semantic types to get
        :param namespaces:         The possible namespaces of the semantic types to get
        :param source_names:       The possible source names of at least one column of a semantic type must have
        :param column_names:       The possible column names of at least one column of a semantic type must have
        :param column_ids:         The possible column ids of at least one column of a semantic type must have
        :param models:             The possible column model of at least one column of a semantic type must have
        :param return_columns:     True if all of the columns (but not the data in the columns) should be returned with the semantic types
        :param return_column_data: True if all of the columns and their data should be returned with the semantic types
        :return: All of the semantic types which fit the following parameters
        """
        # Find all of the type ids that satisfy the class, property, and namespaces
        db_body = {DATA_TYPE: DATA_TYPE_SEMANTIC_TYPE}
        if class_ is not None: db_body[CLASS] = class_
        if property_ is not None: db_body[PROPERTY] = property_
        if namespaces is not None: db_body[NAMESPACE] = {"$in": namespaces}
        possible_result = list(self.db.find(db_body))
        possible_type_ids = set()
        for t in possible_result:
            possible_type_ids.add(t[ID])

        # Find all of the type ids from the columns which satisfy the other parameters
        if source_names or column_names or column_ids or models:
            db_body = {DATA_TYPE: DATA_TYPE_COLUMN}
            if source_names is not None: db_body[SOURCE_NAME] = {"$in": source_names}
            if column_names is not None: db_body[COLUMN_NAME] = {"$in": column_names}
            if column_ids is not None: db_body[ID] = {"$in": column_ids}
            if models is not None: db_body[MODEL] = {"$in": models}
            other_possible_ids = set()
            for col in self.db.find(db_body):
                other_possible_ids.add(col[TYPE_ID])
            possible_type_ids = possible_type_ids & other_possible_ids

        # Construct the return body
        return_body = []
        for t in possible_result:
            if t[ID] in possible_type_ids:
                o = collections.OrderedDict()
                o[TYPE_ID_PATH] = t[ID]
                o[CLASS] = t[CLASS]
                o[PROPERTY] = t[PROPERTY]
                o[NAMESPACE] = t[NAMESPACE]
                return_body.append(o)

        # Add the column data if requested
        if return_columns:
            db_body = {DATA_TYPE: DATA_TYPE_COLUMN}
            for type_ in return_body:
                db_body[TYPE_ID] = type_[TYPE_ID_PATH]
                type_[COLUMNS] = clean_columns_output(self.db.find(db_body), return_column_data)

        if len(return_body) < 1: return "No Semantic types matching the given parameters were found", 404
        return json_response(return_body, 200)

    def semantic_types_post_put(self, class_, property_, force=False):
        """
        Creates a semantic type and returns the id if it was successful.

        Notes: If the type already exists and force is not set to true a 409 will be returned and no data will be modified

        :param class_:    The class of the semantic type, note that this must be a valid URL
        :param property_: The property of the semantic type
        :param force:     Force create the semantic type, if this is true and the type already exists the existing type (and all of its columns and data) will be deleted before creation
        :return: The id of the new semantic type and a response code of 201 if the creation was successful, otherwise it will be an error message with the appropriate error code
        """
        class_ = class_.rstrip("/")
        property_ = property_.rstrip("/")

        ## Verify that class is a valid uri and namespace is a valid uri
        namespace = "/".join(class_.replace("#", "/").split("/")[:-1])

        ## Actually add the type
        type_id = get_type_id(class_, property_)
        db_body = {ID: type_id, DATA_TYPE: DATA_TYPE_SEMANTIC_TYPE, CLASS: class_, PROPERTY: property_,
                   NAMESPACE: namespace}
        if self.db.find_one(db_body):
            if force:
                self.db.delete_many({DATA_TYPE: DATA_TYPE_COLUMN, TYPE_ID: type_id})
                self.db.delete_many(db_body)
            else:
                return type_id, 409
        self.db.insert_one(db_body)
        return type_id, 201

    def semantic_types_delete(self, class_=None, property_=None, type_ids=None, namespaces=None, source_names=None,
                              column_names=None, column_ids=None, models=None, delete_all=False):
        """
        Deletes all of the semantic types (and all of their columns/data) that fit the given parameters.

        :param class_:       The class of the semantic types to delete
        :param property_:    The property of the semantic types to delete
        :param type_ids:     The possible ids of the semantic types to delete
        :param namespaces:   The possible namespaces of the semantic types to delete
        :param source_names: The possible source names of at least one column of a semantic type must have
        :param column_names: The possible column names of at least one column of a semantic type must have
        :param column_ids:   The possible column ids of at least one column of a semantic type must have
        :param models:       The possible column model of at least one column of a semantic type must have
        :param delete_all:   Set this to true if all semantic types should be deleted
        :return: The amount of semantic types deleted and a 200 if it worked, otherwise and error message with the appropriate code
        """
        if class_ is None and property_ is None and type_ids is None and namespaces is None and source_names is None and column_names is None and column_ids is None and models is None and not delete_all:
            return "To delete all semantic types give deleteAll as true", 400
            return "All " + str(self.db.delete_many({DATA_TYPE: {"$in": [DATA_TYPE_SEMANTIC_TYPE,
                                                                         DATA_TYPE_COLUMN]}}).deleted_count) + " semantic types and their data were deleted", 200


        print str(class_)+" "+str(property_)+" "+str(type_ids)+" "+str(namespaces)+" "+str(source_names)+" "+str(column_names)+" "+str(column_ids)+" "+str(models)+" "+str(delete_all)

        # Find the parent semantic types and everything below them of everything which meets column requirements
        type_ids_to_delete = []
        db_body = {DATA_TYPE: DATA_TYPE_COLUMN}
        db_body_id = {DATA_TYPE: DATA_TYPE_SEMANTIC_TYPE}
        if type_ids is not None:
            db_body[TYPE_ID] = {"$in": type_ids}
            db_body_id[ID] = {"$in": type_ids}

        if source_names is not None: db_body[SOURCE_NAME] = {"$in": source_names}
        if column_names is not None: db_body[COLUMN_NAME] = {"$in": column_names}
        if column_ids is not None: db_body[COLUMN_ID_PATH] = {"$in": column_ids}
        if models is not None: db_body[MODEL] = {"$in": models}
        for col in self.db.find(db_body):
            print "col[TYPE_ID] = "+str(col[TYPE_ID])
            if col[TYPE_ID] not in type_ids_to_delete:
                type_ids_to_delete.append(col[TYPE_ID])
        for col in self.db.find(db_body_id):
            print "col[ID] = "+str(col[ID])
            if col[ID] not in type_ids_to_delete:
                type_ids_to_delete.append(col[ID])
        # Find the semantic types which meet the other requirements and delete all types which need to be
        possible_types = []
        db_body = {DATA_TYPE: DATA_TYPE_SEMANTIC_TYPE}
        if class_ is not None: db_body[CLASS] = class_
        if property_ is not None: db_body[PROPERTY] = property_
        if namespaces is not None: db_body[NAMESPACE] = {"$in": namespaces}

        if type_ids is None and source_names is None and column_names is None and column_ids is None and models is None:
            deleted = self.db.delete_many(db_body).deleted_count
        else:
            for t in self.db.find(db_body):
                if t[ID] not in possible_types:
                    possible_types.append(t[ID])
            for t in self.db.find(db_body_id):
                if t[ID] not in possible_types:
                    possible_types.append(t[ID])
            for id_ in type_ids_to_delete:
                if id_ not in possible_types:
                    type_ids_to_delete.remove(id_)
            db_body = {DATA_TYPE: DATA_TYPE_COLUMN, TYPE_ID: {"$in": type_ids_to_delete}}
            self.db.delete_many(db_body)
            deleted = self.db.delete_many(
                {DATA_TYPE: DATA_TYPE_SEMANTIC_TYPE, ID: {"$in": type_ids_to_delete}}).deleted_count
        if deleted < 1: return "No semantic types with the given parameters were found", 404
        return str(deleted) + " semantic types matched parameters and were deleted", 200

    ################ SemanticTypesColumns ################

    def semantic_types_columns_get(self, type_id, column_ids=None, column_names=None, source_names=None, models=None,
                                   return_column_data=False):
        """
        Returns all of the columns in a semantic type that fit the given parameters.

        :param type_id:            The id of the semantic type
        :param column_ids:         The possible ids of the columns to be returned
        :param column_names:       The possible names of the columns to be returned
        :param source_names:       The possible source names of the columns to be returned
        :param models:             The possible models of the columns to be returned
        :param return_column_data: True if all of the data in the column should be returned with the columns
        :return: All of the columns in the semantic type that fit the given parameters
        """
        print(type_id)
        db_body = {DATA_TYPE: DATA_TYPE_COLUMN, TYPE_ID: type_id}
        if source_names is not None: db_body[SOURCE_NAME] = {"$in": source_names}
        if column_names is not None: db_body[COLUMN_NAME] = {"$in": column_names}
        if column_ids is not None: db_body[ID] = {"$in": column_ids}
        if models is not None: db_body[MODEL] = {"$in": models}
        result = list(self.db.find(db_body))
        if len(result) < 1: return "No columns matching the given parameters were found", 404
        return json_response(clean_columns_output(result, return_column_data), 200)

    def semantic_types_columns_post_put(self, type_id, column_name, source_name, model, data=[], force=False):
        """
        Create a column in a semantic type, optionally with data.

        :param type_id:     Id of the semantic type to create the column in
        :param column_name: The name of the column to be created
        :param source_name: The name of the source of the column to be created
        :param model:       The model of the column to be created
        :param data:        The (optional) list of data to put into the column on creation
        :param force:       True if the column should be replaced if it already exists
        :return: The id of the newly created with a 201 if it was successful, otherwise an error message with the appropriate error code
        """
        column = Column(column_name, source_name)
        column.semantic_type = type_id

        #if the size of the training data is MORE than a threshold value, then sample the threshold values randomly
        if(len(data)>SAMPLE_SIZE): data = random.sample(data, SAMPLE_SIZE)

        for value in data:
            column.add_value(value)
        result = self._create_column(column, type_id, column_name, source_name, model, force)
        return result

    def semantic_types_columns_delete(self, type_id, column_ids=None, column_names=None, source_names=None,
                                      models=None):
        """
        Delete all of the columns in a semantic type that match the given parameters.

        :param type_id:      The id of the semantic type to delete the columns from
        :param column_ids:   The possible ids of the columns to delete
        :param source_names: The possible names of the columns to delete
        :param column_names: The possible source names of the columns to delete
        :param models:       The possible models of the columns to delete
        :return: The number of columns deteled with a 200 if successful, otherwise an error message with an appropriate error code
        """
        db_body = {DATA_TYPE: DATA_TYPE_COLUMN, TYPE_ID: type_id}
        if source_names is not None: db_body[SOURCE_NAME] = {"$in": source_names}
        if column_names is not None: db_body[COLUMN_NAME] = {"$in": column_names}
        if column_ids is not None: db_body[ID] = {"$in": column_ids}
        if models is not None: db_body[MODEL] = {"$in": models}
        found_columns = list(self.db.find(db_body))
        if len(found_columns) < 1: return "No columns were found with the given parameters", 404
        return str(self.db.delete_many(db_body).deleted_count) + " columns deleted successfully", 200

    ################ SemanticTypesColumnData ################

    def semantic_types_column_data_get(self, column_id):
        """
        Returns all of the data in the column

        :param column_id: Id of the column to get the data from
        :return: The column and all of its info
        """
        result = list(self.db.find({DATA_TYPE: DATA_TYPE_COLUMN, ID: column_id}))
        if len(result) < 1: return "No column with that id was found", 404
        if len(result) > 1: return "More than one column was found with that id", 500
        return json_response(clean_column_output(result[0]), 200)

    def semantic_types_column_data_post_put(self, column_id, body, force=False):
        """
        Add or replace data on an existing column

        Notes: If the column does not exist a 404 will be returned

        :param column_id: Id of the column to add/replace the data of
        :param body:      An array of the new data
        :param force:     True if the current data in the column should be replaced, false if the new data should just be appended
        :return: A conformation with a 201 if it was added successfully or an error message with an appropriate error code if it was not successful
        """

        column_data = self.db.find_one({DATA_TYPE: DATA_TYPE_COLUMN, ID: column_id})
        if column_data.matched_count < 1: return "No column with that id was found", 404
        if column_data.matched_count > 1: return "More than one column was found with that id", 500

        column = Column(column_data[COLUMN_NAME], column_data[SOURCE_NAME], get_type_from_column_id(column_id))
        if not force:
            column.read_json_to_column(column_data)


        for value in body:
            column.add_value(value)

        data = column.to_json()
        self.db.update_many(data)

        return "Column data updated", 201

    def semantic_types_column_data_delete(self, column_id):
        """
        Delete the data from the column with the given id

        :param column_id: Id of the column to delete the data from
        :return: A deletion conformation with a 200 if successful, otherwise an error message with an appropriate error code
        """
        result = self.db.update_many({DATA_TYPE: DATA_TYPE_COLUMN, ID: column_id}, {"$set": {DATA: []}})
        if result.matched_count < 1: return "No column with that id was found", 404
        if result.matched_count > 1: return "More than one column was found with that id", 500
        column = self.db.find_one({DATA_TYPE: DATA_TYPE_COLUMN, ID: column_id})

        self.db.delete_one({DATA_TYPE: DATA_TYPE_COLUMN, TYPE_ID: get_type_from_column_id(column_id)})

        self.db.delete_one({DATA_TYPE: DATA_TYPE_COLUMN, ID: column_id})
        return "Column data deleted", 200

    ################ BulkAddModels ################

    def bulk_add_models_get(self, model_ids=None, model_names=None, model_desc=None, show_all=False, crunch_data=True):
        """
        Returns the current state of all of the bulk add models.

        :param model_ids:   The possible ids of the models to get
        :param model_names: The possible names of the models to get
        :param model_desc:  The possible descriptions of the models to get
        :param show_all:    True if the whole model should be returned
        :param crunch_data: False if learnedSemanticTypes should not be generated and the version in the db should be used instead, note that the data in the db is updated every time a get is run with crunch_data=true
        :return: All of the models that fit the given parameters
        """
        db_body = {DATA_TYPE: DATA_TYPE_MODEL}
        if model_ids is not None: db_body[ID] = {"$in": model_ids}
        if model_names is not None: db_body[NAME] = {"$in": model_names}
        if model_desc is not None: db_body[MODEL_DESC] = model_desc
        db_result = list(self.db.find(db_body))
        if len(db_result) < 1: return "No models were found with the given parameters", 404

        # Construct the return body
        return_body = []
        for mod in db_result:
            o = collections.OrderedDict()
            o[MODEL_ID] = mod[ID]
            o[NAME] = mod[NAME]
            o[DESC] = mod[DESC]
            if show_all: o[MODEL] = self._update_bulk_add_model(mod[BULK_ADD_MODEL_DATA],
                                                                mod[MODEL]) if crunch_data else mod[BULK_ADD_MODEL_DATA]
            return_body.append(o)
        return json_response(return_body, 200)

    def bulk_add_models_post(self, model, column_model=DEFAULT_BULK_MODEL):
        """
        Add a bulk add model.

        :param column_model: The model that all of the created columns should have
        :param model:        A dictionary of the model
        :return: Stats of the data added
        """
        #### Assert the required elements exist
        if BAC_ID not in model: return "The given model must have an id", 400
        if BAC_NAME not in model: return "The given model must have a name", 400
        if BAC_DESC not in model: return "The given model must have a description", 400
        if BAC_GRAPH not in model: return "The given model must have a graph", 400
        if BAC_NODES not in model[BAC_GRAPH]: return "The given model must have nodes within the graph", 400
        if len(list(self.db.find({ID: model[BAC_ID]}))) > 0: return "Model id already exists", 409

        #### Parse and add the model
        # Try to add of the given semantic types and columns
        new_type_count = 0
        new_column_count = 0
        existed_type_count = 0
        existed_column_count = 0
        for n in model[BAC_GRAPH][BAC_NODES]:
            if n.get(BAC_USER_SEMANTIC_TYPES):
                for ust in n[BAC_USER_SEMANTIC_TYPES]:
                    semantic_status = self.semantic_types_post_put(ust[BAC_CLASS][BAC_URI], ust[BAC_PROPERTY][BAC_URI],
                                                                   False)
                    if semantic_status[1] == 201:
                        new_type_count += 1
                    elif semantic_status[1] == 409:
                        existed_type_count += 1
                    elif semantic_status[1] == 400:
                        return semantic_status
                    else:
                        return "Error occurred while adding semantic type: " + str(ust), 500
                    column_status = self._create_column(
                        get_type_id(ust[BAC_CLASS][BAC_URI], ust[BAC_PROPERTY][BAC_URI]), n[BAC_COLUMN_NAME],
                        model[BAC_NAME], column_model)
                    if column_status[1] == 201:
                        new_column_count += 1
                    elif column_status[1] == 409:
                        existed_column_count += 1
                    elif column_status[1] == 400:
                        return column_status
                    else:
                        return "Error occurred while adding column for semantic type: " + str(ust), 500

        # Nothing bad happened when creating the semantic types and columns, so add the model to the DB
        self.db.insert_one(
            {DATA_TYPE: DATA_TYPE_MODEL, ID: model["id"], NAME: model[BAC_NAME], DESC: model["description"],
             MODEL: column_model, BULK_ADD_MODEL_DATA: model})
        return "Model and columns added, " + str(new_type_count) + " semantic types created, " + \
               str(existed_type_count) + " semantic types already existed, " + \
               str(new_column_count) + " columns created, and " + \
               str(existed_column_count) + " columns already existed.", 201

    def bulk_add_models_delete(self, model_ids=None, model_names=None, model_desc=None):
        """
        Delete all of the bulk add models which fit the given parameters

        :param model_ids:   The possible ids of the models to delete
        :param model_names: The possible names of the models to delete
        :param model_desc:  The possible descriptions of the models to delete
        :return: The amount of models deleted with a 200 if successful, otherwise an error message with the appropriate code
        """
        db_body = {DATA_TYPE: DATA_TYPE_MODEL}
        if model_ids is not None:
            db_body[ID] = {"$in": model_ids}
        if model_names is not None:
            db_body[NAME] = {"$in": model_names}
        if model_desc is not None:
            db_body[MODEL_DESC] = model_desc
        deleted_count = self.db.delete_many(db_body).deleted_count

        if deleted_count < 1:
            return "No models were found with the given parameters", 404
        return str(deleted_count) + " models deleted successfully", 200

    ################ BulkAddModelData ################

    def bulk_add_model_data_get(self, model_id, crunch_data):
        """
        Returns the current state of the bulk add model

        :param model_id:    The id of the model to get
        :param crunch_data: False if learnedSemanticTypes should not be generated and the version in the db should be used instead, note that the data in the db is updated every time a get is run with crunch_data=true
        :return: The current state of the bulk add model
        """
        db_result = list(self.db.find({DATA_TYPE: DATA_TYPE_MODEL, ID: model_id}))
        if len(db_result) < 1:
            return "A model was not found with the given id", 404
        if len(db_result) > 1:
            return "More than one model was found with the given id", 500
        db_result = db_result[0]
        return json_response(
            self._update_bulk_add_model(db_result[BULK_ADD_MODEL_DATA], db_result[MODEL]) if crunch_data else db_result[
                BULK_ADD_MODEL_DATA], 200)

    def bulk_add_model_data_post(self, model_id, column_model, data):
        """
        Add data to the service with a bulk add model

        :param model_id:     The id of the model to add off of
        :param column_model: The model of the columns being used with that model
        :param data:         The list of dictionaries with all of the data to add
        :return: A conformation message with a 201 if it was successful, otherwise an error message with the appropriate code
        """
        # Get the model and parse the json lines
        model = list(self.db.find({DATA_TYPE: DATA_TYPE_MODEL, ID: model_id}))
        if len(model) < 1:
            return "The given model was not found", 404
        if len(model) > 1:
            return "More than one model was found with the id", 500
        model = model[0][BULK_ADD_MODEL_DATA]
        # Get all of the data in each column
        for n in model[BAC_GRAPH][BAC_NODES]:
            column_data = []
            for line in data:
                if n.get(BAC_COLUMN_NAME):
                    column_data.append(line[n[BAC_COLUMN_NAME]])
            # Add it to the db
            if n.get(BAC_USER_SEMANTIC_TYPES):
                for ust in n[BAC_USER_SEMANTIC_TYPES]:
                    result = self.semantic_types_column_data_post_put(
                        get_column_id(get_type_id(ust[BAC_CLASS][BAC_URI], ust[BAC_PROPERTY][BAC_URI]),
                                      n[BAC_COLUMN_NAME], model[BAC_NAME], column_model), column_data, False)[1]
                    if result == 201:
                        continue
                    elif result == 404:
                        return "A required column was not found", 404
                    else:
                        return "Error occurred while adding data to the column", 500

        return "Data successfully added to columns", 201

Exemple #15

0

Afficher le fichier

Fichier : entry.py Projet : chsivateja/bitfinex-arbitrage

from client import TradeClient, Client
import time
import sys
sys.path.append('../')
import config

bfxclient = Client()

order = MongoClient().wtracker.orders
lookback = 1

while (1):

    t = int(time.time()) - lookback
    cur = order.find({'ts': {'$gt': t}})
    order.delete_many({'ts': {'$lt': t - lookback}})

    # look for most expensive in asks.
    # look for cheapest in bids.
    group = {}
    disqualified = []
    count = 0
    base_cur = {}

    for x in cur:
        count += 1
        if count > 2:
            break

        _id = x['ts']
        pair = x['pair']

Exemple #16

0

Afficher le fichier

Fichier : IO.py Projet : lcynju/ChineseTokenizerService-1

class ConfigurationIO:
    def __init__(self):
        self.config_db = MongoClient('localhost', 20000).get_database("tokenizer").get_collection(
            'TextLibrary')

        self.train_db = MongoClient('localhost', 20000).get_database("tokenizer").get_collection(
            'TextTrained')

        # self.label_db = MongoClient('localhost', 20000).get_database("tokenizer").get_collection(
        #     'Labels')

        self.task_db = MongoClient('localhost', 20000).get_database("tokenizer").get_collection(
            'Tasks')


        print('configuration initialization done')

    def insertTextIntoDatabase(self, sentences, database):
        if (self.config_db.find().count() == 0 and self.train_db.find().count() == 0):
            max_id = 0

        elif (self.config_db.find().count() == 0 and self.train_db.find().count() != 0):
            max_id = self.train_db.find_one(sort=[("_id", -1)])["_id"]

        elif (self.config_db.find().count() != 0 and self.train_db.find().count() == 0):
            max_id = self.config_db.find_one(sort=[("_id", -1)])["_id"]

        elif (self.config_db.find().count() != 0 and self.train_db.find().count() != 0):
            max_id = max(self.config_db.find_one(sort=[("_id", -1)])["_id"],
                         self.train_db.find_one(sort=[("_id", -1)])["_id"])

        sentence_state = [{"_id": index + 1 + max_id, "text": s, "database": database} for index, s in
                          enumerate(sentences)]
        saveJsonObj = json.dumps(sentence_state, ensure_ascii=False)
        print(saveJsonObj)
        # self.config_db.delete_many({})
        self.config_db.insert(json.loads(saveJsonObj))

    def insertTask(self, databaseName, type, tags, description):
        task = {}
        task['database'] = databaseName
        task['category'] = type
        task['description'] = description
        task['tags'] = tags
        task['timeAdded'] = strftime("%Y-%m-%d %H:%M:%S", gmtime())
        self.task_db.insert(task)

    def getCategoryOfDatabase(self, database):
        print(database)
        cursor = self.task_db.find_one({'database': database}, {'category': 1})
        print(cursor)
        return cursor['category']

    def deleteDb(self, dbName):
        self.config_db.delete_many({'database': dbName})


    def getTrainedDatabases(self):
        cursor = self.train_db.find({'_id': {'$gt': 0}}, {'database': 1})
        list = []
        for item in cursor:
            if item['database'] not in list:
                list.append(item['database'])
        return list

    def getUntrainedDatabases(self):
        cursor = self.config_db.find({'_id': {'$gt': 0}}, {'database': 1})
        list = []
        for item in cursor:
            if item['database'] not in list:
                list.append(item['database'])
        return list

    def getUntrainedDatabasesGroupByCategory(self):
        cursor = self.config_db.find({'_id': {'$gt': 0}}, {'database': 1, 'category': 1})
        # list = [{"name": doc['database'], "category": doc['category']} for doc in cursor]
        list = []
        for item in cursor:
            doc = {"name": item['database'], "category": item['category']}
            if doc not in list:
                list.append(doc)
        return list

    def getSubmittedSentencesFromDatabase(self, database):
        text = 'database:' + database
        trainCursor = self.train_db.find({"database": database}, {"database": 0, "id": 0})
        data = [doc for doc in trainCursor]
        return data

Exemple #17

0

Afficher le fichier

Fichier : test_mongo_utils.py Projet : diogenes1oliveira/lambda-mongo-utils

def test_mongo_dump_and_restore(docker_container, tmp_path):
    # Dummy data insertion
    docs = [
        {'name': 'col1doc1'},
        {'name': 'col1doc2'},
        {'name': 'col1doc3'},
    ]
    inserted_doc_ids = None
    port = 27020
    client = MongoClient(f'mongodb://localhost:{port}')
    uri = 'mongodb://localhost/tmpdb'
    dump_path = str(tmp_path / 'dump1.tgz')

    with docker_container('mongo:4.0', ports={'27017/tcp': str(port)}, appdir=str(tmp_path)) as container:  # noqa: E501
        wait_for_mongo_to_be_up(container)
        cmd_prefix = f'docker exec -i {container.id} '
        inserted_doc_ids = client.db1['col1'].insert_many(docs).inserted_ids

        # Get a dump after inserting the documents
        with mongo_utils.mongo_dump(cmd_prefix=cmd_prefix, uri=uri, collection='col1', db='db1') as (stream, stats):  # noqa: E501
            with open(dump_path, 'wb') as fp:
                fp.write(stream.read())

        assert stats.num_docs == 3

        # Doesn't count the number of docs if requested not to
        with mongo_utils.mongo_dump(cmd_prefix=cmd_prefix, uri=uri, collection='col1', db='db1', count=False) as (_, stats):  # noqa: E501
            pass
        assert not stats.num_docs

        # Test if a dummy falsey command throws
        with pytest.raises(Exception) as exc:
            with mongo_utils.mongo_dump(cmd_prefix=cmd_prefix + ' false ', uri=uri, collection='col1', db='db1') as _:  # noqa: E501
                pass
        assert re.search('exited with error code', str(exc))

    with docker_container('mongo:4.0', ports={'27017/tcp': str(port)}, appdir=str(tmp_path)) as container:  # noqa: E501
        wait_for_mongo_to_be_up(container)

        def restore_dump(**kwargs):
            with open(dump_path, 'rb') as fp:
                return mongo_utils.mongo_restore(
                    stream=fp,
                    cmd_prefix=f'docker exec -i {container.id} ',
                    uri=uri,
                    collection='col2',
                    db='db2',
                    **kwargs,
                )

        # Insert one document and check if it wasn't overwritten
        col = MongoClient(f'mongodb://localhost:{port}').db2['col2']
        col.insert_one({
            '_id': inserted_doc_ids[0],
            'name': 'test',
        })
        stats = restore_dump()
        assert {d['name'] for d in col.find()} == {
            'test', 'col1doc2', 'col1doc3',
        }
        assert stats.num_docs == 2

        # Checking if duplicated docs are properly returned
        col.drop()
        col.insert_one({
            '_id': inserted_doc_ids[0],
            'name': 'new doc 1',
        })
        col.insert_one({'name': 'new doc 2'})
        stats = restore_dump()

        assert stats.duplicated_ids == [inserted_doc_ids[0]]

        r = col.delete_many({'_id': {'$in': stats.duplicated_ids}})
        LOGGER.warning(r.raw_result)
        stats = restore_dump()
        assert stats.num_docs == 1
        assert set(stats.duplicated_ids) == set(inserted_doc_ids[1:])
        assert {d['name'] for d in col.find()} == {
            'col1doc1', 'new doc 2', 'col1doc2', 'col1doc3',
        }

        # Now drop the collection
        col.insert_one({'name': 'new doc'})
        stats = restore_dump(drop=True)
        assert {d['name'] for d in col.find()} == {
            'col1doc1', 'col1doc2', 'col1doc3',
        }
        assert stats.num_docs == 3

Exemple #18

0

Afficher le fichier

class MongoDatabase(Database):
    """
    This class implements the abstract class Database and communicates with the MongoDB database. 
    It has several methods for this communication.
    """
    def __init__(self, collection):
        self._devices = MongoClient()["Hestia"][collection]

    def get_all_devices(self):
        """Instantiates all devices in database"""
        devices = []
        for data in self._devices.find():
            _id = data["_id"]
            device = self._get_class(data["module"], data["class"])(self, _id)
            devices.append(device)
        return devices

    def get_device(self, device_id):
        """Instantiates the device with the given device_id"""
        data = self.__get_device_data(device_id)
        device = self._get_class(data["module"], data["class"])
        return device(self, device_id)

    def add_device(self, plugin):
        """Adds the given plugin info as a new device"""
        plugin["_id"] = str(ObjectId())
        self._devices.insert_one(plugin)

    def delete_device(self, device_id):
        self._devices.delete_one({"_id": device_id})

    def update_field(self, device_id, field, new_value):
        self._devices.find_one_and_update({"_id": device_id},
                                          {"$set": {
                                              field: new_value
                                          }})

    def get_field(self, device_id, field):
        data = self.__get_device_data(device_id)
        return data[field]

    def get_activator_field(self, device_id, activator_id, field):
        data = self.__get_device_data(device_id)
        activator = self.__get_activator(data, activator_id)
        return activator[field]

    def update_activator_field(self, device_id, activator_id, field,
                               new_value):
        self._devices.find_one_and_update(
            {"_id": device_id},
            {"$set": {
                "activators." + activator_id + "." + field: new_value
            }})

    def delete_all_devices(self):
        self._devices.delete_many({})

    def __get_device_data(self, device_id):
        """Get data of device based on its id"""
        data = self._devices.find_one(device_id)
        if data is None:
            raise NotFoundException("device")
        else:
            return data

    @staticmethod
    def __get_activator(data, activator_id):
        try:
            return data["activators"][activator_id]
        except KeyError as exception:
            raise NotFoundException("activator")