def _setup(samples: bool = True):
        collection = MongoClient(getenv('MONGO_URI'))['todos']['main']
    except Exception as e:
        print("There was an error Setting Up:", e)

    if samples:
        todos = [
                'task': 'Finish This',
                'when': datetime.timestamp(,
                'by': 'me',
                'done': False
                'task': 'Goto the Supermarket',
                'when': datetime.timestamp(,
                'by': 'me',
                'done': True
                'task': 'Implement from Database',
                'when': datetime.timestamp(,
                'by': 'someone',
                'done': False

        r = collection.insert_many(todos)

    return collection
Exemple #2
class DatabaseCache:
    def __init__(self,
        self.client = MongoClient()[database_name][collection_name]
        self.expiration_delta = expiration_time
        if self.expiration_delta is not None:
                [(DatabaseCacheItem.expiration_index, pymongo.ASCENDING)],

    def add_item(self, key, data):
        item = DatabaseCacheItem(
            key, bson.binary.Binary(,
   + self.expiration_delta)

    def get_item(self, key):
        element = self.client.find_one({DatabaseCacheItem.filename_key: key})
        return element[DatabaseCacheItem.data_key] if element else None

    def remove_item(self, key):
        self.client.delete_many({DatabaseCacheItem.filename_key: key})
Exemple #3
def setup():

        collection = MongoClient(getenv('MONGO_URI'))['todos']['test']
    except Exception as e:
        print("There was an error Setting Up:", e)
Exemple #4
def teardown():

        collection = MongoClient(getenv('MONGO_URI'))['todos']['test']
    except Exception as e:
        print("There was an error Tearing Down:", e)
Exemple #5
class DB:
    def __init__(self):
        self.collection = MongoClient().local.connections

    def REMOVEALL(self):

    def remove(self, connection, field):
        if ("ip" in field):
            self.collection.delete_many({"ip": connection.ip})
        elif ("hostname" in field):
            self.collection.delete_many({"hostname": connection.hostname})
            return False
        return True

    def insert(self, connection):
        data = json.dumps(connection.socket, -1)
            "ip": connection.ip,
            "hostname": connection.hostname,
            "uniq": str(connection.unique)

    def getAllConnectionsPrint(self):
        darr = []
        docs = self.collection.find()
        for doc in docs:
        return darr

    def getCollection(self):
        return self.collection
Exemple #6
class diaDb(object):
    def __init__(self, address=config.databaseIp, port=config.databasePort):
        from pymongo import MongoClient
        self.__diaset = MongoClient(address, port).diadb.diaset
        print 'diadb init done'

    def write(self, user, word, reply):
        import datetime
            "user": user,
            "word": word,
            "reply": reply
        return 1

    def read(self, startTime, endTime):
        res = self.__diaset.find({"time": {"$gt": startTime, "$lt": endTime}})
        if res.count():
            return 1, res
            return 0, []

    def delete(self, user):
        if user == '':
            deleteObj = self.__diaset.delete_many({})
            return deleteObj.deleted_count
            deleteObj = self.__diaset.delete_many({})
            return deleteObj.deleted_count

    def all(self):
        res = self.__diaset.find({})
        if res.count():
            return 1, res
            return 0, []
Exemple #7
        segments_37[line[0]] = line[1:]

# Generate new cnv collections by copying orignals and updating coordinates
cnv_37 = []
for sample in collection_in.find({},{'_id':0}):
    key = sample['id']
    if key in segments_37:
        sample['start'] = int(segments_37[key][1])
        sample['end'] = int(segments_37[key][2])
        sample['variantset_id'] = 'AM_VS_GRCH37'

# write to db
collection_out = MongoClient()['arraymap_ga4gh']['variants_cnv_grch37']

##### Read in lifted file of grch38 ######
segments_38 = {}
with open('/Users/bogao/DataFiles/tmp/segments_38.txt', 'r') as fi:
    for line in fi:
        line = line.strip().split('\t')
        segments_38[line[0]] = line[1:]

# Generate new cnv collections by copying orignals and updating coordinates
Exemple #8
class Server(object):
    def __init__(self):
        self.db = MongoClient().data.service
        self.classifier = MyRandomForest({}, {}, DATA_MODEL_PATH)

    ################ Stuff for use in this file ################

    def _create_column(self,
        Create a column in a semantic type and return the column's id if it was created successfully.

        Notes: If the column already exists and force is not set to true, a 409 will be returned and no data will be modified.

        :param type_id:     Id of the semantic type this column belongs to
        :param column_name: Name of the column to be created
        :param source_name: Name of the source of the column to be created
        :param model:       Model of the column to be created
        :param data:        Data which will be added to the column on creation
        :param force:       Force create the column, if this is true and the column exists the old column will be deleted (with all of its data) before creation
        :return: The id of the new column and a response code of 201 if the creation was successful, otherwise it will be an error message with the appropriate error code
        column_id = get_column_id(type_id, column_name, source_name, model)
        db_body = {
            ID: column_id,
            TYPE_ID: type_id,
            COLUMN_NAME: column_name,
            SOURCE_NAME: source_name,
            MODEL: model
        if self.db.find_one(db_body):
            if force:
                return "Column already exists", 409
        return column_id, 201

    def _predict_column(self, column_name, source_names, data):
        Predicts the semantic type of a column.

        :param column_name:  Name of the column
        :param source_names: List of source names
        :param data:         The data to predict based opon
        :return: A list of dictionaries which each contain the semantic type and confidence score
        att = Column(column_name, source_names[0])

        # print(data)
        for value in data:
        att.semantic_type = "to_predict"
        return att.predict_type(
            searcher.search_types_data(INDEX_NAME, source_names),
            searcher.search_similar_text_data(INDEX_NAME, att.value_text,
                                              source_names), self.classifier)

    def _update_bulk_add_model(self, model, column_model):
        Updates the bulk add model in the db and also returns it.

        :param model:        The current bulk add model
        :param column_model: The model of the columns which are being updated against
        :return: The updated bulk add model
        for n in model[BAC_GRAPH][BAC_NODES]:
            if n.get(BAC_COLUMN_NAME):
                column_id = get_column_id(
                    n[BAC_COLUMN_NAME], model[BAC_NAME], column_model)
                prediction = self._predict_column(
                    n[BAC_COLUMN_NAME], [model[BAC_NAME]],
                        DATA_TYPE: DATA_TYPE_COLUMN,
                        ID: column_id
                n[BAC_LEARNED_SEMANTIC_TYPES] = []
                for t in prediction:
                    type_info = decode_type_id(t[SL_SEMANTIC_TYPE])
                    od = collections.OrderedDict()
                    od[BAC_CLASS] = {BAC_URI: type_info[0]}
                    od[BAC_PROPERTY] = {BAC_URI: type_info[1]}
            ID: model[BAC_ID]
        }, {"$set": {
            BULK_ADD_MODEL_DATA: model
        return model

    ################ Predict ################

    def predict_post(self,
        Predicts the semantic type of the given data.

        :param namespaces:   List of allowed namespaces
        :param column_names: List of allowed column names
        :param source_names: List of allowed source names
        :param models:       List of allowed column models
        :param data:         List of the data values to predict.
        :return: A return message (if it is successful this will be a list of the predicted types) and a return code
        data = [x.strip() for x in data]
        data = [x for x in data if x]
        if not data:
            return "Predicting data cannot be empty", 500
        if source_names is None:
            # If no source names are given just use all of the source names in the db
            source_names = set()
            for col in self.db.find({DATA_TYPE: DATA_TYPE_COLUMN}):
            source_names = list(source_names)
        if len(source_names) < 1:
            return "You must have columns to be able to predict", 400

        #### Predict the types
        ## Do the actual predicting using the semantic labeler
        predictions = self._predict_column(column_names[0], source_names, data)
        if len(predictions) < 1: return "No matches found", 404

        ## Filter the results
        allowed_ids_namespaces = None
        allowed_ids_models = None
        all_allowed_ids = None
        if namespaces is not None:
            allowed_ids_namespaces = set()
            current_allowed_types = list(
                    NAMESPACE: {
                        "$in": namespaces
            for prediction in current_allowed_types:
        if models:
            allowed_ids_models = set()
            current_allowed_types = list(
                    DATA_TYPE: DATA_TYPE_COLUMN,
                    MODEL: {
                        "$in": models
            for c in current_allowed_types:
        if allowed_ids_namespaces is not None and allowed_ids_models is not None:
            all_allowed_ids = allowed_ids_namespaces & allowed_ids_models
        elif allowed_ids_namespaces is not None and allowed_ids_models is None:
            all_allowed_ids = allowed_ids_namespaces
        elif allowed_ids_namespaces is None and allowed_ids_models is not None:
            all_allowed_ids = allowed_ids_models
        return_body = []
        for prediction in predictions:
            for type_id, exact_score in prediction[1]:
                if all_allowed_ids is not None:
                    if prediction[SL_SEMANTIC_TYPE] not in all_allowed_ids:
                obj_dict = {TYPE_ID_PATH: type_id, SCORE: exact_score}
                type_class_property = decode_type_id(type_id)
                obj_dict[CLASS] = type_class_property[0]
                obj_dict[PROPERTY] = type_class_property[1]
        return_body.sort(key=lambda x: x[SCORE], reverse=True)
        return json_response(return_body, 200)

    ################ SemanticTypes ################

    def semantic_types_get(self,
        Returns all of the semantic types (and optionally their columns and columns' data) filtered by the given parameters.

        :param class_:             The class of the semantic types to get
        :param property_:          The property of the semantic types to get
        :param namespaces:         The possible namespaces of the semantic types to get
        :param source_names:       The possible source names of at least one column of a semantic type must have
        :param column_names:       The possible column names of at least one column of a semantic type must have
        :param column_ids:         The possible column ids of at least one column of a semantic type must have
        :param models:             The possible column model of at least one column of a semantic type must have
        :param return_columns:     True if all of the columns (but not the data in the columns) should be returned with the semantic types
        :param return_column_data: True if all of the columns and their data should be returned with the semantic types
        :return: All of the semantic types which fit the following parameters
        # Find all of the type ids that satisfy the class, property, and namespaces
        if class_ is not None: db_body[CLASS] = class_
        if property_ is not None: db_body[PROPERTY] = property_
        if namespaces is not None: db_body[NAMESPACE] = {"$in": namespaces}
        possible_result = list(self.db.find(db_body))
        possible_type_ids = set()
        for t in possible_result:

        # Find all of the type ids from the columns which satisfy the other parameters
        if source_names or column_names or column_ids or models:
            db_body = {DATA_TYPE: DATA_TYPE_COLUMN}
            if source_names is not None:
                db_body[SOURCE_NAME] = {"$in": source_names}
            if column_names is not None:
                db_body[COLUMN_NAME] = {"$in": column_names}
            if column_ids is not None: db_body[ID] = {"$in": column_ids}
            if models is not None: db_body[MODEL] = {"$in": models}
            other_possible_ids = set()
            for col in self.db.find(db_body):
            possible_type_ids = possible_type_ids & other_possible_ids

        # Construct the return body
        return_body = []
        for t in possible_result:
            if t[ID] in possible_type_ids:
                o = collections.OrderedDict()
                o[TYPE_ID_PATH] = t[ID]
                o[CLASS] = t[CLASS]
                o[PROPERTY] = t[PROPERTY]
                o[NAMESPACE] = t[NAMESPACE]

        # Add the column data if requested
        if return_columns:
            db_body = {DATA_TYPE: DATA_TYPE_COLUMN}
            for type_ in return_body:
                db_body[TYPE_ID] = type_[TYPE_ID_PATH]
                type_[COLUMNS] = clean_columns_output(self.db.find(db_body),

        if len(return_body) < 1:
            return "No Semantic types matching the given parameters were found", 404
        return json_response(return_body, 200)

    def semantic_types_post_put(self, class_, property_, force=False):
        Creates a semantic type and returns the id if it was successful.

        Notes: If the type already exists and force is not set to true a 409 will be returned and no data will be modified

        :param class_:    The class of the semantic type, note that this must be a valid URL
        :param property_: The property of the semantic type
        :param force:     Force create the semantic type, if this is true and the type already exists the existing type (and all of its columns and data) will be deleted before creation
        :return: The id of the new semantic type and a response code of 201 if the creation was successful, otherwise it will be an error message with the appropriate error code
        class_ = class_.rstrip("/")
        property_ = property_.rstrip("/")

        ## Verify that class is a valid uri and namespace is a valid uri
        namespace = "/".join(class_.replace("#", "/").split("/")[:-1])

        ## Actually add the type
        type_id = get_type_id(class_, property_)
        db_body = {
            ID: type_id,
            CLASS: class_,
            PROPERTY: property_,
            NAMESPACE: namespace
        if self.db.find_one(db_body):
            if force:
                    DATA_TYPE: DATA_TYPE_COLUMN,
                    TYPE_ID: type_id
                return type_id, 409
        return type_id, 201

    def semantic_types_delete(self,
        Deletes all of the semantic types (and all of their columns/data) that fit the given parameters.

        :param class_:       The class of the semantic types to delete
        :param property_:    The property of the semantic types to delete
        :param type_ids:     The possible ids of the semantic types to delete
        :param namespaces:   The possible namespaces of the semantic types to delete
        :param source_names: The possible source names of at least one column of a semantic type must have
        :param column_names: The possible column names of at least one column of a semantic type must have
        :param column_ids:   The possible column ids of at least one column of a semantic type must have
        :param models:       The possible column model of at least one column of a semantic type must have
        :param delete_all:   Set this to true if all semantic types should be deleted
        :return: The amount of semantic types deleted and a 200 if it worked, otherwise and error message with the appropriate code
        if class_ is None and property_ is None and type_ids is None and namespaces is None and source_names is None and column_names is None and column_ids is None and models is None and not delete_all:
            return "To delete all semantic types give deleteAll as true", 400
            return "All " + str(
                    DATA_TYPE: {
                        "$in": [DATA_TYPE_SEMANTIC_TYPE, DATA_TYPE_COLUMN]
            ) + " semantic types and their data were deleted", 200

        print str(class_) + " " + str(property_) + " " + str(
            type_ids) + " " + str(namespaces) + " " + str(
                source_names) + " " + str(column_names) + " " + str(
                    column_ids) + " " + str(models) + " " + str(delete_all)

        # Find the parent semantic types and everything below them of everything which meets column requirements
        type_ids_to_delete = []
        db_body = {DATA_TYPE: DATA_TYPE_COLUMN}
        db_body_id = {DATA_TYPE: DATA_TYPE_SEMANTIC_TYPE}
        if type_ids is not None:
            db_body[TYPE_ID] = {"$in": type_ids}
            db_body_id[ID] = {"$in": type_ids}

        if source_names is not None:
            db_body[SOURCE_NAME] = {"$in": source_names}
        if column_names is not None:
            db_body[COLUMN_NAME] = {"$in": column_names}
        if column_ids is not None:
            db_body[COLUMN_ID_PATH] = {"$in": column_ids}
        if models is not None: db_body[MODEL] = {"$in": models}
        for col in self.db.find(db_body):
            print "col[TYPE_ID] = " + str(col[TYPE_ID])
            if col[TYPE_ID] not in type_ids_to_delete:
        for col in self.db.find(db_body_id):
            print "col[ID] = " + str(col[ID])
            if col[ID] not in type_ids_to_delete:
        # Find the semantic types which meet the other requirements and delete all types which need to be
        possible_types = []
        if class_ is not None: db_body[CLASS] = class_
        if property_ is not None: db_body[PROPERTY] = property_
        if namespaces is not None: db_body[NAMESPACE] = {"$in": namespaces}

        if type_ids is None and source_names is None and column_names is None and column_ids is None and models is None:
            deleted = self.db.delete_many(db_body).deleted_count
            for t in self.db.find(db_body):
                if t[ID] not in possible_types:
            for t in self.db.find(db_body_id):
                if t[ID] not in possible_types:
            for id_ in type_ids_to_delete:
                if id_ not in possible_types:
            db_body = {
                DATA_TYPE: DATA_TYPE_COLUMN,
                TYPE_ID: {
                    "$in": type_ids_to_delete
            deleted = self.db.delete_many({
                ID: {
                    "$in": type_ids_to_delete
        if deleted < 1:
            return "No semantic types with the given parameters were found", 404
        return str(
        ) + " semantic types matched parameters and were deleted", 200

    ################ SemanticTypesColumns ################

    def semantic_types_columns_get(self,
        Returns all of the columns in a semantic type that fit the given parameters.

        :param type_id:            The id of the semantic type
        :param column_ids:         The possible ids of the columns to be returned
        :param column_names:       The possible names of the columns to be returned
        :param source_names:       The possible source names of the columns to be returned
        :param models:             The possible models of the columns to be returned
        :param return_column_data: True if all of the data in the column should be returned with the columns
        :return: All of the columns in the semantic type that fit the given parameters
        db_body = {DATA_TYPE: DATA_TYPE_COLUMN, TYPE_ID: type_id}
        if source_names is not None:
            db_body[SOURCE_NAME] = {"$in": source_names}
        if column_names is not None:
            db_body[COLUMN_NAME] = {"$in": column_names}
        if column_ids is not None: db_body[ID] = {"$in": column_ids}
        if models is not None: db_body[MODEL] = {"$in": models}
        result = list(self.db.find(db_body))
        if len(result) < 1:
            return "No columns matching the given parameters were found", 404
        return json_response(clean_columns_output(result, return_column_data),

    def semantic_types_columns_post_put(self,
        Create a column in a semantic type, optionally with data.

        :param type_id:     Id of the semantic type to create the column in
        :param column_name: The name of the column to be created
        :param source_name: The name of the source of the column to be created
        :param model:       The model of the column to be created
        :param data:        The (optional) list of data to put into the column on creation
        :param force:       True if the column should be replaced if it already exists
        :return: The id of the newly created with a 201 if it was successful, otherwise an error message with the appropriate error code
        column = Column(column_name, source_name)
        column.semantic_type = type_id

        #if the size of the training data is MORE than a threshold value, then sample the threshold values randomly
        if (len(data) > SAMPLE_SIZE): data = random.sample(data, SAMPLE_SIZE)

        for value in data:
        result = self._create_column(column, type_id, column_name, source_name,
                                     model, force)
        return result

    def semantic_types_columns_delete(self,
        Delete all of the columns in a semantic type that match the given parameters.

        :param type_id:      The id of the semantic type to delete the columns from
        :param column_ids:   The possible ids of the columns to delete
        :param source_names: The possible names of the columns to delete
        :param column_names: The possible source names of the columns to delete
        :param models:       The possible models of the columns to delete
        :return: The number of columns deteled with a 200 if successful, otherwise an error message with an appropriate error code
        db_body = {DATA_TYPE: DATA_TYPE_COLUMN, TYPE_ID: type_id}
        if source_names is not None:
            db_body[SOURCE_NAME] = {"$in": source_names}
        if column_names is not None:
            db_body[COLUMN_NAME] = {"$in": column_names}
        if column_ids is not None: db_body[ID] = {"$in": column_ids}
        if models is not None: db_body[MODEL] = {"$in": models}
        found_columns = list(self.db.find(db_body))
        if len(found_columns) < 1:
            return "No columns were found with the given parameters", 404
        return str(self.db.delete_many(
            db_body).deleted_count) + " columns deleted successfully", 200

    ################ SemanticTypesColumnData ################

    def semantic_types_column_data_get(self, column_id):
        Returns all of the data in the column

        :param column_id: Id of the column to get the data from
        :return: The column and all of its info
        result = list(
                DATA_TYPE: DATA_TYPE_COLUMN,
                ID: column_id
        if len(result) < 1: return "No column with that id was found", 404
        if len(result) > 1:
            return "More than one column was found with that id", 500
        return json_response(clean_column_output(result[0]), 200)

    def semantic_types_column_data_post_put(self,
        Add or replace data on an existing column

        Notes: If the column does not exist a 404 will be returned

        :param column_id: Id of the column to add/replace the data of
        :param body:      An array of the new data
        :param force:     True if the current data in the column should be replaced, false if the new data should just be appended
        :return: A conformation with a 201 if it was added successfully or an error message with an appropriate error code if it was not successful

        column_data = self.db.find_one({
            ID: column_id
        if column_data.matched_count < 1:
            return "No column with that id was found", 404
        if column_data.matched_count > 1:
            return "More than one column was found with that id", 500

        column = Column(column_data[COLUMN_NAME], column_data[SOURCE_NAME],
        if not force:

        for value in body:

        data = column.to_json()

        return "Column data updated", 201

    def semantic_types_column_data_delete(self, column_id):
        Delete the data from the column with the given id

        :param column_id: Id of the column to delete the data from
        :return: A deletion conformation with a 200 if successful, otherwise an error message with an appropriate error code
        result = self.db.update_many(
                DATA_TYPE: DATA_TYPE_COLUMN,
                ID: column_id
            }, {"$set": {
                DATA: []
        if result.matched_count < 1:
            return "No column with that id was found", 404
        if result.matched_count > 1:
            return "More than one column was found with that id", 500
        column = self.db.find_one({DATA_TYPE: DATA_TYPE_COLUMN, ID: column_id})

            TYPE_ID: get_type_from_column_id(column_id)

        self.db.delete_one({DATA_TYPE: DATA_TYPE_COLUMN, ID: column_id})
        return "Column data deleted", 200

    ################ BulkAddModels ################

    def bulk_add_models_get(self,
        Returns the current state of all of the bulk add models.

        :param model_ids:   The possible ids of the models to get
        :param model_names: The possible names of the models to get
        :param model_desc:  The possible descriptions of the models to get
        :param show_all:    True if the whole model should be returned
        :param crunch_data: False if learnedSemanticTypes should not be generated and the version in the db should be used instead, note that the data in the db is updated every time a get is run with crunch_data=true
        :return: All of the models that fit the given parameters
        db_body = {DATA_TYPE: DATA_TYPE_MODEL}
        if model_ids is not None: db_body[ID] = {"$in": model_ids}
        if model_names is not None: db_body[NAME] = {"$in": model_names}
        if model_desc is not None: db_body[MODEL_DESC] = model_desc
        db_result = list(self.db.find(db_body))
        if len(db_result) < 1:
            return "No models were found with the given parameters", 404

        # Construct the return body
        return_body = []
        for mod in db_result:
            o = collections.OrderedDict()
            o[MODEL_ID] = mod[ID]
            o[NAME] = mod[NAME]
            o[DESC] = mod[DESC]
            if show_all:
                o[MODEL] = self._update_bulk_add_model(
                    mod[MODEL]) if crunch_data else mod[BULK_ADD_MODEL_DATA]
        return json_response(return_body, 200)

    def bulk_add_models_post(self, model, column_model=DEFAULT_BULK_MODEL):
        Add a bulk add model.

        :param column_model: The model that all of the created columns should have
        :param model:        A dictionary of the model
        :return: Stats of the data added
        #### Assert the required elements exist
        if BAC_ID not in model: return "The given model must have an id", 400
        if BAC_NAME not in model:
            return "The given model must have a name", 400
        if BAC_DESC not in model:
            return "The given model must have a description", 400
        if BAC_GRAPH not in model:
            return "The given model must have a graph", 400
        if BAC_NODES not in model[BAC_GRAPH]:
            return "The given model must have nodes within the graph", 400
        if len(list(self.db.find({ID: model[BAC_ID]}))) > 0:
            return "Model id already exists", 409

        #### Parse and add the model
        # Try to add of the given semantic types and columns
        new_type_count = 0
        new_column_count = 0
        existed_type_count = 0
        existed_column_count = 0
        for n in model[BAC_GRAPH][BAC_NODES]:
            if n.get(BAC_USER_SEMANTIC_TYPES):
                for ust in n[BAC_USER_SEMANTIC_TYPES]:
                    semantic_status = self.semantic_types_post_put(
                        ust[BAC_CLASS][BAC_URI], ust[BAC_PROPERTY][BAC_URI],
                    if semantic_status[1] == 201:
                        new_type_count += 1
                    elif semantic_status[1] == 409:
                        existed_type_count += 1
                    elif semantic_status[1] == 400:
                        return semantic_status
                        return "Error occurred while adding semantic type: " + str(
                            ust), 500
                    column_status = self._create_column(
                        n[BAC_COLUMN_NAME], model[BAC_NAME], column_model)
                    if column_status[1] == 201:
                        new_column_count += 1
                    elif column_status[1] == 409:
                        existed_column_count += 1
                    elif column_status[1] == 400:
                        return column_status
                        return "Error occurred while adding column for semantic type: " + str(
                            ust), 500

        # Nothing bad happened when creating the semantic types and columns, so add the model to the DB
            ID: model["id"],
            NAME: model[BAC_NAME],
            DESC: model["description"],
            MODEL: column_model,
            BULK_ADD_MODEL_DATA: model
        return "Model and columns added, " + str(new_type_count) + " semantic types created, " + \
               str(existed_type_count) + " semantic types already existed, " + \
               str(new_column_count) + " columns created, and " + \
               str(existed_column_count) + " columns already existed.", 201

    def bulk_add_models_delete(self,
        Delete all of the bulk add models which fit the given parameters

        :param model_ids:   The possible ids of the models to delete
        :param model_names: The possible names of the models to delete
        :param model_desc:  The possible descriptions of the models to delete
        :return: The amount of models deleted with a 200 if successful, otherwise an error message with the appropriate code
        db_body = {DATA_TYPE: DATA_TYPE_MODEL}
        if model_ids is not None:
            db_body[ID] = {"$in": model_ids}
        if model_names is not None:
            db_body[NAME] = {"$in": model_names}
        if model_desc is not None:
            db_body[MODEL_DESC] = model_desc
        deleted_count = self.db.delete_many(db_body).deleted_count

        if deleted_count < 1:
            return "No models were found with the given parameters", 404
        return str(deleted_count) + " models deleted successfully", 200

    ################ BulkAddModelData ################

    def bulk_add_model_data_get(self, model_id, crunch_data):
        Returns the current state of the bulk add model

        :param model_id:    The id of the model to get
        :param crunch_data: False if learnedSemanticTypes should not be generated and the version in the db should be used instead, note that the data in the db is updated every time a get is run with crunch_data=true
        :return: The current state of the bulk add model
        db_result = list(
                DATA_TYPE: DATA_TYPE_MODEL,
                ID: model_id
        if len(db_result) < 1:
            return "A model was not found with the given id", 404
        if len(db_result) > 1:
            return "More than one model was found with the given id", 500
        db_result = db_result[0]
        return json_response(
            if crunch_data else db_result[BULK_ADD_MODEL_DATA], 200)

    def bulk_add_model_data_post(self, model_id, column_model, data):
        Add data to the service with a bulk add model

        :param model_id:     The id of the model to add off of
        :param column_model: The model of the columns being used with that model
        :param data:         The list of dictionaries with all of the data to add
        :return: A conformation message with a 201 if it was successful, otherwise an error message with the appropriate code
        # Get the model and parse the json lines
        model = list(self.db.find({DATA_TYPE: DATA_TYPE_MODEL, ID: model_id}))
        if len(model) < 1:
            return "The given model was not found", 404
        if len(model) > 1:
            return "More than one model was found with the id", 500
        model = model[0][BULK_ADD_MODEL_DATA]
        # Get all of the data in each column
        for n in model[BAC_GRAPH][BAC_NODES]:
            column_data = []
            for line in data:
                if n.get(BAC_COLUMN_NAME):
            # Add it to the db
            if n.get(BAC_USER_SEMANTIC_TYPES):
                for ust in n[BAC_USER_SEMANTIC_TYPES]:
                    result = self.semantic_types_column_data_post_put(
                            n[BAC_COLUMN_NAME], model[BAC_NAME], column_model),
                        column_data, False)[1]
                    if result == 201:
                    elif result == 404:
                        return "A required column was not found", 404
                        return "Error occurred while adding data to the column", 500

        return "Data successfully added to columns", 201
Exemple #9
    def post(self, file_id):
        new_text = request.get_json()['data']
            {'_id': ObjectId(file_id)},
            {'$set': {'file_text': new_text}},
        return '', 204

api.add_resource(FileServer, '/<string:file_id>')

if __name__ == '__main__':
    if len(sys.argv) == 3:
        if os.environ.get('WERKZEUG_RUN_MAIN') == 'true':
            print('Initing node')
                server_util.url_builder(DS_ADDR[0], DS_ADDR[1], 'config'),
                json={'ip': sys.argv[1], 'port': sys.argv[2]}
            ), host=sys.argv[1], port=int(sys.argv[2]))
            server_util.url_builder(DS_ADDR[0], DS_ADDR[1], 'config'),
            json={'ip': sys.argv[1], 'port': sys.argv[2]}

        # Catastrophic delete EVERYTHING if node goes down! (Purposeful)
        print('Supply an IP and Port')
from scrapy.spiders import Spider
from scrapy.selector import Selector
from basic_crawler.items import BasicCrawlerItem
from scrapy.http import Request
import re
from pymongo import MongoClient

global db
db = MongoClient().db.links
if not db:
    print('Connected to DB')
res = db.delete_many({})
if res.acknowledged:
    print('Clean successfull')
    print('Clean unsuccessfull')

global visited_links
visited_links = []

class MySpider(Spider):
    name = "basic_crawler"
    allowed_domains = ['']
    start_urls = [""]

    def parse(self, response):
        global db
        global visited_links
        hxs = Selector(response)
        url = response.url
Exemple #11
class TestDeviceDatabaseMongoDB(unittest.TestCase):
    def setUp(self):
        self._database = tests_util.get_mongo_database()
        self._direct_database = MongoClient()["Hestia"]["testing"]

    def tearDown(self):

    def test_get_all_devices(self):
        device_data = self._get_device_data()
        device_data["_id"] = str(ObjectId())

        retrieved_devices = self._database.get_all_devices()

        self.assertEqual(1, len(retrieved_devices))

        device_data["_id"] = str(ObjectId())

        retrieved_devices = self._database.get_all_devices()

        self.assertEqual(2, len(retrieved_devices))

    def test_get_device(self):
        device_data = self._get_device_data()
        device_data["_id"] = str(ObjectId())

        device = self._database.get_device(device_data["_id"])

        self.assertIsInstance(device, Device)

    def test_add_device(self):
        device_data = self._get_device_data()
        initial_count = self._direct_database.count()


        self.assertEqual(initial_count + 1, self._direct_database.count())

    def test_delete_device(self):
        device_data = self._get_device_data()
        device_data["_id"] = str(ObjectId())
        initial_count = self._direct_database.count()


        self.assertEqual(initial_count - 1, self._direct_database.count())

    def test_update_field(self):
        device_data = self._get_device_data()
        device_data["_id"] = str(ObjectId())
        new_name = "Hestia"

        self._database.update_field(device_data["_id"], "name", new_name)

        device = self._direct_database.find_one({"_id": device_data["_id"]})
        self.assertEqual(device["name"], new_name)

    def test_get_field(self):
        device_data = self._get_device_data()
        device_data["_id"] = str(ObjectId())

        name = self._database.get_field(device_data["_id"], "name")

        self.assertEqual(device_data["name"], name)

    def test_get_activator_field(self):
        device_data = self._get_device_data()
        device_data["_id"] = str(ObjectId())

        activators = self._direct_database.find_one(
            {"_id": device_data["_id"]})["activators"]
        act_id = list(activators.keys())[0]

        activator_name = self._database.get_activator_field(
            device_data["_id"], act_id, "name")

        real_name = device_data["activators"][act_id]["name"]

        self.assertEqual(real_name, activator_name)

    def test_update_activator_field(self):
        device_data = self._get_device_data()
        device_data["_id"] = str(ObjectId())

        activators = self._direct_database.find_one(
            {"_id": device_data["_id"]})["activators"]
        act_id = list(activators.keys())[0]

        new_name = "new_name"
        self._database.update_activator_field(device_data["_id"], act_id,
                                              "name", new_name)

        device_in_db = self._direct_database.find_one(
            {"_id": device_data["_id"]})
        activator_name_in_db = device_in_db["activators"][act_id]["name"]

        self.assertEqual(activator_name_in_db, new_name)

    def _get_device_data(self):
        device_data = {
            "options": {
                "bridge_ip": "",
                "bridge_port": 90
            "activators": [{
                "module": "plugins.mock.activators.ActivateLock",
                "rank": 0,
                "class": "ActivateLock",
                "name": "Activate",
                "type": "bool",
                "state": True
        activators = device_data.pop("activators", None)
        device_data["activators"] = {}
        for activator in activators:
            _id = str(ObjectId())
            device_data["activators"][_id] = activator
        return device_data
Exemple #12
class DataBase:
    def __init__(self, name: str, auth_data: dict):
        db_name = auth_data['questionnaire'][name]
        user = auth_data['user']
        password = auth_data['password']

        appeal = f'mongodb+srv://{user}:{password}{db_name}?retryWrites=true&w=majority'

        self.db = MongoClient(appeal)['questionnaire'][db_name]

    def __lambda_fun(self):
        self._get_last_id = lambda: len(list(self.db.find())) - 1
        self.get_list_data = lambda: list(self.db.find())
        self.get_questions_ids = lambda: [el['_id'] for el in self.get_list_data()]
        self.remove_all_data = lambda: self.db.delete_many({})
        self.remove_questions = lambda *ids: [self.db.delete_one({'_id': id_}) for id_ in ids]

        self._remove_arg = lambda question_id, arg: self.db.update_one({
            '_id': question_id
        }, {
            '$pull': {'answers': arg}

    def add(self, data: str, question_id: int = None):
        if question_id is None:
            last_id = self._get_last_id()
            post = {'_id': last_id + 1, 'question': data, 'answers': []}


        self.db.update_one({'_id': question_id}, {'$push': {'answers': data}})

    def show_all(self, file_name: str):
        rows = self.get_list_data()
        columns = [*rows[0].keys()] if rows else []

        with open(f'{file_name}.csv', 'w', newline='') as file:
            writer = csv.DictWriter(file, delimiter=';', fieldnames=columns)

    def show_ans(self, question_id: int):
        file_name = str(question_id)

        columns = ['answer_id', 'answer']
        rows = enumerate(self.db.find_one({'_id': question_id})['answers'])

        with open(f'{file_name}.csv', 'w', newline='') as file:
            writer = csv.writer(file, delimiter=';')

    def remove_answers(self, question_id: int, *ids: int):
        questions = self.get_list_data()

        for question in questions:
            if question['_id'] == question_id:
                answers = question['answers']

        for id_ in ids:
            answer = answers[id_]
            self._remove_arg(question_id, answer)
Exemple #13
    cursor = cnx.cursor(dictionary=True)

    # shop_ids = [12988, 12382, 11077, 12823, 10377, 15081, 2397]
    shop_ids = [17065, 17066]
    sql = 'select s.*, ' \
          'from s ' \
          'inner join f_shop.shop_owner o on s.shop_owner_id = ' \
          'where in (%s)' % ','.join(['%s'] * len(shop_ids))
    cursor.execute(sql, shop_ids)

    mc = MongoClient(host='', port=27017)['profile']['shop']

    # 清理旧数据
    mc.delete_many({'': {'$in': shop_ids}})
    new_shop_records = [{
        'tel': record['tel'],
        'password': record['password'],
        'name': record['shop_name'],
        'avatar': record['avatar'],
        'status': 'STATUS_INIT',
        'create_time': record['create_time'],
        'accounts': [],
        'loc': {
            'province': record['province'],
            'province_code': record['province_code'],
            'district': record['district'],
            'city': record['city'],
            'address': ifnull(record['address']),
            'street_code': record['street_code'],
class Server(object):
    def __init__(self):
        self.db = MongoClient().data.service
        self.classifier = MyRandomForest({}, {}, DATA_MODEL_PATH)

    ################ Stuff for use in this file ################

    def _create_column(self, column, type_id, column_name, source_name, model, force=False):
        Create a column in a semantic type and return the column's id if it was created successfully.

        Notes: If the column already exists and force is not set to true, a 409 will be returned and no data will be modified.

        :param type_id:     Id of the semantic type this column belongs to
        :param column_name: Name of the column to be created
        :param source_name: Name of the source of the column to be created
        :param model:       Model of the column to be created
        :param data:        Data which will be added to the column on creation
        :param force:       Force create the column, if this is true and the column exists the old column will be deleted (with all of its data) before creation
        :return: The id of the new column and a response code of 201 if the creation was successful, otherwise it will be an error message with the appropriate error code
        column_id = get_column_id(type_id, column_name, source_name, model)
        db_body = {ID: column_id, DATA_TYPE: DATA_TYPE_COLUMN, TYPE_ID: type_id, COLUMN_NAME: column_name,
                   SOURCE_NAME: source_name, MODEL: model}
        if self.db.find_one(db_body):
            if force:
                return "Column already exists", 409
        return column_id, 201

    def _predict_column(self, column_name, source_names, data):
        Predicts the semantic type of a column.

        :param column_name:  Name of the column
        :param source_names: List of source names
        :param data:         The data to predict based opon
        :return: A list of dictionaries which each contain the semantic type and confidence score
        att = Column(column_name, source_names[0])

        # print(data)
        for value in data:
        att.semantic_type = "to_predict"
        return att.predict_type(searcher.search_types_data(INDEX_NAME, source_names), searcher.search_similar_text_data(INDEX_NAME, att.value_text, source_names), self.classifier)

    def _update_bulk_add_model(self, model, column_model):
        Updates the bulk add model in the db and also returns it.

        :param model:        The current bulk add model
        :param column_model: The model of the columns which are being updated against
        :return: The updated bulk add model
        for n in model[BAC_GRAPH][BAC_NODES]:
            if n.get(BAC_COLUMN_NAME):
                column_id = get_column_id(get_type_id(n[BAC_USER_SEMANTIC_TYPES][0][BAC_CLASS][BAC_URI],
                                          n[BAC_COLUMN_NAME], model[BAC_NAME], column_model)
                prediction = self._predict_column(n[BAC_COLUMN_NAME], [model[BAC_NAME]],
                                                  self.db.find_one({DATA_TYPE: DATA_TYPE_COLUMN, ID: column_id})[DATA])
                n[BAC_LEARNED_SEMANTIC_TYPES] = []
                for t in prediction:
                    type_info = decode_type_id(t[SL_SEMANTIC_TYPE])
                    od = collections.OrderedDict()
                    od[BAC_CLASS] = {BAC_URI: type_info[0]}
                    od[BAC_PROPERTY] = {BAC_URI: type_info[1]}
        self.db.update_one({DATA_TYPE: DATA_TYPE_MODEL, ID: model[BAC_ID]}, {"$set": {BULK_ADD_MODEL_DATA: model}})
        return model

    ################ Predict ################

    def predict_post(self, data, namespaces=None, column_names=None, source_names=None, models=None):
        Predicts the semantic type of the given data.

        :param namespaces:   List of allowed namespaces
        :param column_names: List of allowed column names
        :param source_names: List of allowed source names
        :param models:       List of allowed column models
        :param data:         List of the data values to predict.
        :return: A return message (if it is successful this will be a list of the predicted types) and a return code
        data = [x.strip() for x in data]
        data = [x for x in data if x]
        if not data:
            return "Predicting data cannot be empty", 500
        if source_names is None:
            # If no source names are given just use all of the source names in the db
            source_names = set()
            for col in self.db.find({DATA_TYPE: DATA_TYPE_COLUMN}):
            source_names = list(source_names)
        if len(source_names) < 1: return "You must have columns to be able to predict", 400

        #### Predict the types
        ## Do the actual predicting using the semantic labeler
        predictions = self._predict_column(column_names[0], source_names, data)
        if len(predictions) < 1: return "No matches found", 404

        ## Filter the results
        allowed_ids_namespaces = None
        allowed_ids_models = None
        all_allowed_ids = None
        if namespaces is not None:
            allowed_ids_namespaces = set()
            current_allowed_types = list(
                self.db.find({DATA_TYPE: DATA_TYPE_SEMANTIC_TYPE, NAMESPACE: {"$in": namespaces}}))
            for prediction in current_allowed_types:
        if models:
            allowed_ids_models = set()
            current_allowed_types = list(self.db.find({DATA_TYPE: DATA_TYPE_COLUMN, MODEL: {"$in": models}}))
            for c in current_allowed_types:
        if allowed_ids_namespaces is not None and allowed_ids_models is not None:
            all_allowed_ids = allowed_ids_namespaces & allowed_ids_models
        elif allowed_ids_namespaces is not None and allowed_ids_models is None:
            all_allowed_ids = allowed_ids_namespaces
        elif allowed_ids_namespaces is None and allowed_ids_models is not None:
            all_allowed_ids = allowed_ids_models
        return_body = []
        for prediction in predictions:
            for type_id, exact_score in prediction[1]:
                if all_allowed_ids is not None:
                    if prediction[SL_SEMANTIC_TYPE] not in all_allowed_ids:
                obj_dict = {TYPE_ID_PATH: type_id, SCORE: exact_score}
                type_class_property = decode_type_id(type_id)
                obj_dict[CLASS] = type_class_property[0]
                obj_dict[PROPERTY] = type_class_property[1]
        return_body.sort(key=lambda x: x[SCORE], reverse=True)
        return json_response(return_body, 200)

    ################ SemanticTypes ################

    def semantic_types_get(self, class_=None, property_=None, namespaces=None, source_names=None, column_names=None,
                           column_ids=None, models=None, return_columns=False, return_column_data=False):
        Returns all of the semantic types (and optionally their columns and columns' data) filtered by the given parameters.

        :param class_:             The class of the semantic types to get
        :param property_:          The property of the semantic types to get
        :param namespaces:         The possible namespaces of the semantic types to get
        :param source_names:       The possible source names of at least one column of a semantic type must have
        :param column_names:       The possible column names of at least one column of a semantic type must have
        :param column_ids:         The possible column ids of at least one column of a semantic type must have
        :param models:             The possible column model of at least one column of a semantic type must have
        :param return_columns:     True if all of the columns (but not the data in the columns) should be returned with the semantic types
        :param return_column_data: True if all of the columns and their data should be returned with the semantic types
        :return: All of the semantic types which fit the following parameters
        # Find all of the type ids that satisfy the class, property, and namespaces
        if class_ is not None: db_body[CLASS] = class_
        if property_ is not None: db_body[PROPERTY] = property_
        if namespaces is not None: db_body[NAMESPACE] = {"$in": namespaces}
        possible_result = list(self.db.find(db_body))
        possible_type_ids = set()
        for t in possible_result:

        # Find all of the type ids from the columns which satisfy the other parameters
        if source_names or column_names or column_ids or models:
            db_body = {DATA_TYPE: DATA_TYPE_COLUMN}
            if source_names is not None: db_body[SOURCE_NAME] = {"$in": source_names}
            if column_names is not None: db_body[COLUMN_NAME] = {"$in": column_names}
            if column_ids is not None: db_body[ID] = {"$in": column_ids}
            if models is not None: db_body[MODEL] = {"$in": models}
            other_possible_ids = set()
            for col in self.db.find(db_body):
            possible_type_ids = possible_type_ids & other_possible_ids

        # Construct the return body
        return_body = []
        for t in possible_result:
            if t[ID] in possible_type_ids:
                o = collections.OrderedDict()
                o[TYPE_ID_PATH] = t[ID]
                o[CLASS] = t[CLASS]
                o[PROPERTY] = t[PROPERTY]
                o[NAMESPACE] = t[NAMESPACE]

        # Add the column data if requested
        if return_columns:
            db_body = {DATA_TYPE: DATA_TYPE_COLUMN}
            for type_ in return_body:
                db_body[TYPE_ID] = type_[TYPE_ID_PATH]
                type_[COLUMNS] = clean_columns_output(self.db.find(db_body), return_column_data)

        if len(return_body) < 1: return "No Semantic types matching the given parameters were found", 404
        return json_response(return_body, 200)

    def semantic_types_post_put(self, class_, property_, force=False):
        Creates a semantic type and returns the id if it was successful.

        Notes: If the type already exists and force is not set to true a 409 will be returned and no data will be modified

        :param class_:    The class of the semantic type, note that this must be a valid URL
        :param property_: The property of the semantic type
        :param force:     Force create the semantic type, if this is true and the type already exists the existing type (and all of its columns and data) will be deleted before creation
        :return: The id of the new semantic type and a response code of 201 if the creation was successful, otherwise it will be an error message with the appropriate error code
        class_ = class_.rstrip("/")
        property_ = property_.rstrip("/")

        ## Verify that class is a valid uri and namespace is a valid uri
        namespace = "/".join(class_.replace("#", "/").split("/")[:-1])

        ## Actually add the type
        type_id = get_type_id(class_, property_)
        db_body = {ID: type_id, DATA_TYPE: DATA_TYPE_SEMANTIC_TYPE, CLASS: class_, PROPERTY: property_,
                   NAMESPACE: namespace}
        if self.db.find_one(db_body):
            if force:
                self.db.delete_many({DATA_TYPE: DATA_TYPE_COLUMN, TYPE_ID: type_id})
                return type_id, 409
        return type_id, 201

    def semantic_types_delete(self, class_=None, property_=None, type_ids=None, namespaces=None, source_names=None,
                              column_names=None, column_ids=None, models=None, delete_all=False):
        Deletes all of the semantic types (and all of their columns/data) that fit the given parameters.

        :param class_:       The class of the semantic types to delete
        :param property_:    The property of the semantic types to delete
        :param type_ids:     The possible ids of the semantic types to delete
        :param namespaces:   The possible namespaces of the semantic types to delete
        :param source_names: The possible source names of at least one column of a semantic type must have
        :param column_names: The possible column names of at least one column of a semantic type must have
        :param column_ids:   The possible column ids of at least one column of a semantic type must have
        :param models:       The possible column model of at least one column of a semantic type must have
        :param delete_all:   Set this to true if all semantic types should be deleted
        :return: The amount of semantic types deleted and a 200 if it worked, otherwise and error message with the appropriate code
        if class_ is None and property_ is None and type_ids is None and namespaces is None and source_names is None and column_names is None and column_ids is None and models is None and not delete_all:
            return "To delete all semantic types give deleteAll as true", 400
            return "All " + str(self.db.delete_many({DATA_TYPE: {"$in": [DATA_TYPE_SEMANTIC_TYPE,
                                                                         DATA_TYPE_COLUMN]}}).deleted_count) + " semantic types and their data were deleted", 200

        print str(class_)+" "+str(property_)+" "+str(type_ids)+" "+str(namespaces)+" "+str(source_names)+" "+str(column_names)+" "+str(column_ids)+" "+str(models)+" "+str(delete_all)

        # Find the parent semantic types and everything below them of everything which meets column requirements
        type_ids_to_delete = []
        db_body = {DATA_TYPE: DATA_TYPE_COLUMN}
        db_body_id = {DATA_TYPE: DATA_TYPE_SEMANTIC_TYPE}
        if type_ids is not None:
            db_body[TYPE_ID] = {"$in": type_ids}
            db_body_id[ID] = {"$in": type_ids}

        if source_names is not None: db_body[SOURCE_NAME] = {"$in": source_names}
        if column_names is not None: db_body[COLUMN_NAME] = {"$in": column_names}
        if column_ids is not None: db_body[COLUMN_ID_PATH] = {"$in": column_ids}
        if models is not None: db_body[MODEL] = {"$in": models}
        for col in self.db.find(db_body):
            print "col[TYPE_ID] = "+str(col[TYPE_ID])
            if col[TYPE_ID] not in type_ids_to_delete:
        for col in self.db.find(db_body_id):
            print "col[ID] = "+str(col[ID])
            if col[ID] not in type_ids_to_delete:
        # Find the semantic types which meet the other requirements and delete all types which need to be
        possible_types = []
        if class_ is not None: db_body[CLASS] = class_
        if property_ is not None: db_body[PROPERTY] = property_
        if namespaces is not None: db_body[NAMESPACE] = {"$in": namespaces}

        if type_ids is None and source_names is None and column_names is None and column_ids is None and models is None:
            deleted = self.db.delete_many(db_body).deleted_count
            for t in self.db.find(db_body):
                if t[ID] not in possible_types:
            for t in self.db.find(db_body_id):
                if t[ID] not in possible_types:
            for id_ in type_ids_to_delete:
                if id_ not in possible_types:
            db_body = {DATA_TYPE: DATA_TYPE_COLUMN, TYPE_ID: {"$in": type_ids_to_delete}}
            deleted = self.db.delete_many(
                {DATA_TYPE: DATA_TYPE_SEMANTIC_TYPE, ID: {"$in": type_ids_to_delete}}).deleted_count
        if deleted < 1: return "No semantic types with the given parameters were found", 404
        return str(deleted) + " semantic types matched parameters and were deleted", 200

    ################ SemanticTypesColumns ################

    def semantic_types_columns_get(self, type_id, column_ids=None, column_names=None, source_names=None, models=None,
        Returns all of the columns in a semantic type that fit the given parameters.

        :param type_id:            The id of the semantic type
        :param column_ids:         The possible ids of the columns to be returned
        :param column_names:       The possible names of the columns to be returned
        :param source_names:       The possible source names of the columns to be returned
        :param models:             The possible models of the columns to be returned
        :param return_column_data: True if all of the data in the column should be returned with the columns
        :return: All of the columns in the semantic type that fit the given parameters
        db_body = {DATA_TYPE: DATA_TYPE_COLUMN, TYPE_ID: type_id}
        if source_names is not None: db_body[SOURCE_NAME] = {"$in": source_names}
        if column_names is not None: db_body[COLUMN_NAME] = {"$in": column_names}
        if column_ids is not None: db_body[ID] = {"$in": column_ids}
        if models is not None: db_body[MODEL] = {"$in": models}
        result = list(self.db.find(db_body))
        if len(result) < 1: return "No columns matching the given parameters were found", 404
        return json_response(clean_columns_output(result, return_column_data), 200)

    def semantic_types_columns_post_put(self, type_id, column_name, source_name, model, data=[], force=False):
        Create a column in a semantic type, optionally with data.

        :param type_id:     Id of the semantic type to create the column in
        :param column_name: The name of the column to be created
        :param source_name: The name of the source of the column to be created
        :param model:       The model of the column to be created
        :param data:        The (optional) list of data to put into the column on creation
        :param force:       True if the column should be replaced if it already exists
        :return: The id of the newly created with a 201 if it was successful, otherwise an error message with the appropriate error code
        column = Column(column_name, source_name)
        column.semantic_type = type_id

        #if the size of the training data is MORE than a threshold value, then sample the threshold values randomly
        if(len(data)>SAMPLE_SIZE): data = random.sample(data, SAMPLE_SIZE)

        for value in data:
        result = self._create_column(column, type_id, column_name, source_name, model, force)
        return result

    def semantic_types_columns_delete(self, type_id, column_ids=None, column_names=None, source_names=None,
        Delete all of the columns in a semantic type that match the given parameters.

        :param type_id:      The id of the semantic type to delete the columns from
        :param column_ids:   The possible ids of the columns to delete
        :param source_names: The possible names of the columns to delete
        :param column_names: The possible source names of the columns to delete
        :param models:       The possible models of the columns to delete
        :return: The number of columns deteled with a 200 if successful, otherwise an error message with an appropriate error code
        db_body = {DATA_TYPE: DATA_TYPE_COLUMN, TYPE_ID: type_id}
        if source_names is not None: db_body[SOURCE_NAME] = {"$in": source_names}
        if column_names is not None: db_body[COLUMN_NAME] = {"$in": column_names}
        if column_ids is not None: db_body[ID] = {"$in": column_ids}
        if models is not None: db_body[MODEL] = {"$in": models}
        found_columns = list(self.db.find(db_body))
        if len(found_columns) < 1: return "No columns were found with the given parameters", 404
        return str(self.db.delete_many(db_body).deleted_count) + " columns deleted successfully", 200

    ################ SemanticTypesColumnData ################

    def semantic_types_column_data_get(self, column_id):
        Returns all of the data in the column

        :param column_id: Id of the column to get the data from
        :return: The column and all of its info
        result = list(self.db.find({DATA_TYPE: DATA_TYPE_COLUMN, ID: column_id}))
        if len(result) < 1: return "No column with that id was found", 404
        if len(result) > 1: return "More than one column was found with that id", 500
        return json_response(clean_column_output(result[0]), 200)

    def semantic_types_column_data_post_put(self, column_id, body, force=False):
        Add or replace data on an existing column

        Notes: If the column does not exist a 404 will be returned

        :param column_id: Id of the column to add/replace the data of
        :param body:      An array of the new data
        :param force:     True if the current data in the column should be replaced, false if the new data should just be appended
        :return: A conformation with a 201 if it was added successfully or an error message with an appropriate error code if it was not successful

        column_data = self.db.find_one({DATA_TYPE: DATA_TYPE_COLUMN, ID: column_id})
        if column_data.matched_count < 1: return "No column with that id was found", 404
        if column_data.matched_count > 1: return "More than one column was found with that id", 500

        column = Column(column_data[COLUMN_NAME], column_data[SOURCE_NAME], get_type_from_column_id(column_id))
        if not force:

        for value in body:

        data = column.to_json()

        return "Column data updated", 201

    def semantic_types_column_data_delete(self, column_id):
        Delete the data from the column with the given id

        :param column_id: Id of the column to delete the data from
        :return: A deletion conformation with a 200 if successful, otherwise an error message with an appropriate error code
        result = self.db.update_many({DATA_TYPE: DATA_TYPE_COLUMN, ID: column_id}, {"$set": {DATA: []}})
        if result.matched_count < 1: return "No column with that id was found", 404
        if result.matched_count > 1: return "More than one column was found with that id", 500
        column = self.db.find_one({DATA_TYPE: DATA_TYPE_COLUMN, ID: column_id})

        self.db.delete_one({DATA_TYPE: DATA_TYPE_COLUMN, TYPE_ID: get_type_from_column_id(column_id)})

        self.db.delete_one({DATA_TYPE: DATA_TYPE_COLUMN, ID: column_id})
        return "Column data deleted", 200

    ################ BulkAddModels ################

    def bulk_add_models_get(self, model_ids=None, model_names=None, model_desc=None, show_all=False, crunch_data=True):
        Returns the current state of all of the bulk add models.

        :param model_ids:   The possible ids of the models to get
        :param model_names: The possible names of the models to get
        :param model_desc:  The possible descriptions of the models to get
        :param show_all:    True if the whole model should be returned
        :param crunch_data: False if learnedSemanticTypes should not be generated and the version in the db should be used instead, note that the data in the db is updated every time a get is run with crunch_data=true
        :return: All of the models that fit the given parameters
        db_body = {DATA_TYPE: DATA_TYPE_MODEL}
        if model_ids is not None: db_body[ID] = {"$in": model_ids}
        if model_names is not None: db_body[NAME] = {"$in": model_names}
        if model_desc is not None: db_body[MODEL_DESC] = model_desc
        db_result = list(self.db.find(db_body))
        if len(db_result) < 1: return "No models were found with the given parameters", 404

        # Construct the return body
        return_body = []
        for mod in db_result:
            o = collections.OrderedDict()
            o[MODEL_ID] = mod[ID]
            o[NAME] = mod[NAME]
            o[DESC] = mod[DESC]
            if show_all: o[MODEL] = self._update_bulk_add_model(mod[BULK_ADD_MODEL_DATA],
                                                                mod[MODEL]) if crunch_data else mod[BULK_ADD_MODEL_DATA]
        return json_response(return_body, 200)

    def bulk_add_models_post(self, model, column_model=DEFAULT_BULK_MODEL):
        Add a bulk add model.

        :param column_model: The model that all of the created columns should have
        :param model:        A dictionary of the model
        :return: Stats of the data added
        #### Assert the required elements exist
        if BAC_ID not in model: return "The given model must have an id", 400
        if BAC_NAME not in model: return "The given model must have a name", 400
        if BAC_DESC not in model: return "The given model must have a description", 400
        if BAC_GRAPH not in model: return "The given model must have a graph", 400
        if BAC_NODES not in model[BAC_GRAPH]: return "The given model must have nodes within the graph", 400
        if len(list(self.db.find({ID: model[BAC_ID]}))) > 0: return "Model id already exists", 409

        #### Parse and add the model
        # Try to add of the given semantic types and columns
        new_type_count = 0
        new_column_count = 0
        existed_type_count = 0
        existed_column_count = 0
        for n in model[BAC_GRAPH][BAC_NODES]:
            if n.get(BAC_USER_SEMANTIC_TYPES):
                for ust in n[BAC_USER_SEMANTIC_TYPES]:
                    semantic_status = self.semantic_types_post_put(ust[BAC_CLASS][BAC_URI], ust[BAC_PROPERTY][BAC_URI],
                    if semantic_status[1] == 201:
                        new_type_count += 1
                    elif semantic_status[1] == 409:
                        existed_type_count += 1
                    elif semantic_status[1] == 400:
                        return semantic_status
                        return "Error occurred while adding semantic type: " + str(ust), 500
                    column_status = self._create_column(
                        get_type_id(ust[BAC_CLASS][BAC_URI], ust[BAC_PROPERTY][BAC_URI]), n[BAC_COLUMN_NAME],
                        model[BAC_NAME], column_model)
                    if column_status[1] == 201:
                        new_column_count += 1
                    elif column_status[1] == 409:
                        existed_column_count += 1
                    elif column_status[1] == 400:
                        return column_status
                        return "Error occurred while adding column for semantic type: " + str(ust), 500

        # Nothing bad happened when creating the semantic types and columns, so add the model to the DB
            {DATA_TYPE: DATA_TYPE_MODEL, ID: model["id"], NAME: model[BAC_NAME], DESC: model["description"],
             MODEL: column_model, BULK_ADD_MODEL_DATA: model})
        return "Model and columns added, " + str(new_type_count) + " semantic types created, " + \
               str(existed_type_count) + " semantic types already existed, " + \
               str(new_column_count) + " columns created, and " + \
               str(existed_column_count) + " columns already existed.", 201

    def bulk_add_models_delete(self, model_ids=None, model_names=None, model_desc=None):
        Delete all of the bulk add models which fit the given parameters

        :param model_ids:   The possible ids of the models to delete
        :param model_names: The possible names of the models to delete
        :param model_desc:  The possible descriptions of the models to delete
        :return: The amount of models deleted with a 200 if successful, otherwise an error message with the appropriate code
        db_body = {DATA_TYPE: DATA_TYPE_MODEL}
        if model_ids is not None:
            db_body[ID] = {"$in": model_ids}
        if model_names is not None:
            db_body[NAME] = {"$in": model_names}
        if model_desc is not None:
            db_body[MODEL_DESC] = model_desc
        deleted_count = self.db.delete_many(db_body).deleted_count

        if deleted_count < 1:
            return "No models were found with the given parameters", 404
        return str(deleted_count) + " models deleted successfully", 200

    ################ BulkAddModelData ################

    def bulk_add_model_data_get(self, model_id, crunch_data):
        Returns the current state of the bulk add model

        :param model_id:    The id of the model to get
        :param crunch_data: False if learnedSemanticTypes should not be generated and the version in the db should be used instead, note that the data in the db is updated every time a get is run with crunch_data=true
        :return: The current state of the bulk add model
        db_result = list(self.db.find({DATA_TYPE: DATA_TYPE_MODEL, ID: model_id}))
        if len(db_result) < 1:
            return "A model was not found with the given id", 404
        if len(db_result) > 1:
            return "More than one model was found with the given id", 500
        db_result = db_result[0]
        return json_response(
            self._update_bulk_add_model(db_result[BULK_ADD_MODEL_DATA], db_result[MODEL]) if crunch_data else db_result[
                BULK_ADD_MODEL_DATA], 200)

    def bulk_add_model_data_post(self, model_id, column_model, data):
        Add data to the service with a bulk add model

        :param model_id:     The id of the model to add off of
        :param column_model: The model of the columns being used with that model
        :param data:         The list of dictionaries with all of the data to add
        :return: A conformation message with a 201 if it was successful, otherwise an error message with the appropriate code
        # Get the model and parse the json lines
        model = list(self.db.find({DATA_TYPE: DATA_TYPE_MODEL, ID: model_id}))
        if len(model) < 1:
            return "The given model was not found", 404
        if len(model) > 1:
            return "More than one model was found with the id", 500
        model = model[0][BULK_ADD_MODEL_DATA]
        # Get all of the data in each column
        for n in model[BAC_GRAPH][BAC_NODES]:
            column_data = []
            for line in data:
                if n.get(BAC_COLUMN_NAME):
            # Add it to the db
            if n.get(BAC_USER_SEMANTIC_TYPES):
                for ust in n[BAC_USER_SEMANTIC_TYPES]:
                    result = self.semantic_types_column_data_post_put(
                        get_column_id(get_type_id(ust[BAC_CLASS][BAC_URI], ust[BAC_PROPERTY][BAC_URI]),
                                      n[BAC_COLUMN_NAME], model[BAC_NAME], column_model), column_data, False)[1]
                    if result == 201:
                    elif result == 404:
                        return "A required column was not found", 404
                        return "Error occurred while adding data to the column", 500

        return "Data successfully added to columns", 201
from client import TradeClient, Client
import time
import sys
import config

bfxclient = Client()

order = MongoClient().wtracker.orders
lookback = 1

while (1):

    t = int(time.time()) - lookback
    cur = order.find({'ts': {'$gt': t}})
    order.delete_many({'ts': {'$lt': t - lookback}})

    # look for most expensive in asks.
    # look for cheapest in bids.
    group = {}
    disqualified = []
    count = 0
    base_cur = {}

    for x in cur:
        count += 1
        if count > 2:

        _id = x['ts']
        pair = x['pair']
class ConfigurationIO:
    def __init__(self):
        self.config_db = MongoClient('localhost', 20000).get_database("tokenizer").get_collection(

        self.train_db = MongoClient('localhost', 20000).get_database("tokenizer").get_collection(

        # self.label_db = MongoClient('localhost', 20000).get_database("tokenizer").get_collection(
        #     'Labels')

        self.task_db = MongoClient('localhost', 20000).get_database("tokenizer").get_collection(

        print('configuration initialization done')

    def insertTextIntoDatabase(self, sentences, database):
        if (self.config_db.find().count() == 0 and self.train_db.find().count() == 0):
            max_id = 0

        elif (self.config_db.find().count() == 0 and self.train_db.find().count() != 0):
            max_id = self.train_db.find_one(sort=[("_id", -1)])["_id"]

        elif (self.config_db.find().count() != 0 and self.train_db.find().count() == 0):
            max_id = self.config_db.find_one(sort=[("_id", -1)])["_id"]

        elif (self.config_db.find().count() != 0 and self.train_db.find().count() != 0):
            max_id = max(self.config_db.find_one(sort=[("_id", -1)])["_id"],
                         self.train_db.find_one(sort=[("_id", -1)])["_id"])

        sentence_state = [{"_id": index + 1 + max_id, "text": s, "database": database} for index, s in
        saveJsonObj = json.dumps(sentence_state, ensure_ascii=False)
        # self.config_db.delete_many({})

    def insertTask(self, databaseName, type, tags, description):
        task = {}
        task['database'] = databaseName
        task['category'] = type
        task['description'] = description
        task['tags'] = tags
        task['timeAdded'] = strftime("%Y-%m-%d %H:%M:%S", gmtime())

    def getCategoryOfDatabase(self, database):
        cursor = self.task_db.find_one({'database': database}, {'category': 1})
        return cursor['category']

    def deleteDb(self, dbName):
        self.config_db.delete_many({'database': dbName})

    def getTrainedDatabases(self):
        cursor = self.train_db.find({'_id': {'$gt': 0}}, {'database': 1})
        list = []
        for item in cursor:
            if item['database'] not in list:
        return list

    def getUntrainedDatabases(self):
        cursor = self.config_db.find({'_id': {'$gt': 0}}, {'database': 1})
        list = []
        for item in cursor:
            if item['database'] not in list:
        return list

    def getUntrainedDatabasesGroupByCategory(self):
        cursor = self.config_db.find({'_id': {'$gt': 0}}, {'database': 1, 'category': 1})
        # list = [{"name": doc['database'], "category": doc['category']} for doc in cursor]
        list = []
        for item in cursor:
            doc = {"name": item['database'], "category": item['category']}
            if doc not in list:
        return list

    def getSubmittedSentencesFromDatabase(self, database):
        text = 'database:' + database
        trainCursor = self.train_db.find({"database": database}, {"database": 0, "id": 0})
        data = [doc for doc in trainCursor]
        return data
def test_mongo_dump_and_restore(docker_container, tmp_path):
    # Dummy data insertion
    docs = [
        {'name': 'col1doc1'},
        {'name': 'col1doc2'},
        {'name': 'col1doc3'},
    inserted_doc_ids = None
    port = 27020
    client = MongoClient(f'mongodb://localhost:{port}')
    uri = 'mongodb://localhost/tmpdb'
    dump_path = str(tmp_path / 'dump1.tgz')

    with docker_container('mongo:4.0', ports={'27017/tcp': str(port)}, appdir=str(tmp_path)) as container:  # noqa: E501
        cmd_prefix = f'docker exec -i {} '
        inserted_doc_ids = client.db1['col1'].insert_many(docs).inserted_ids

        # Get a dump after inserting the documents
        with mongo_utils.mongo_dump(cmd_prefix=cmd_prefix, uri=uri, collection='col1', db='db1') as (stream, stats):  # noqa: E501
            with open(dump_path, 'wb') as fp:

        assert stats.num_docs == 3

        # Doesn't count the number of docs if requested not to
        with mongo_utils.mongo_dump(cmd_prefix=cmd_prefix, uri=uri, collection='col1', db='db1', count=False) as (_, stats):  # noqa: E501
        assert not stats.num_docs

        # Test if a dummy falsey command throws
        with pytest.raises(Exception) as exc:
            with mongo_utils.mongo_dump(cmd_prefix=cmd_prefix + ' false ', uri=uri, collection='col1', db='db1') as _:  # noqa: E501
        assert'exited with error code', str(exc))

    with docker_container('mongo:4.0', ports={'27017/tcp': str(port)}, appdir=str(tmp_path)) as container:  # noqa: E501

        def restore_dump(**kwargs):
            with open(dump_path, 'rb') as fp:
                return mongo_utils.mongo_restore(
                    cmd_prefix=f'docker exec -i {} ',

        # Insert one document and check if it wasn't overwritten
        col = MongoClient(f'mongodb://localhost:{port}').db2['col2']
            '_id': inserted_doc_ids[0],
            'name': 'test',
        stats = restore_dump()
        assert {d['name'] for d in col.find()} == {
            'test', 'col1doc2', 'col1doc3',
        assert stats.num_docs == 2

        # Checking if duplicated docs are properly returned
            '_id': inserted_doc_ids[0],
            'name': 'new doc 1',
        col.insert_one({'name': 'new doc 2'})
        stats = restore_dump()

        assert stats.duplicated_ids == [inserted_doc_ids[0]]

        r = col.delete_many({'_id': {'$in': stats.duplicated_ids}})
        stats = restore_dump()
        assert stats.num_docs == 1
        assert set(stats.duplicated_ids) == set(inserted_doc_ids[1:])
        assert {d['name'] for d in col.find()} == {
            'col1doc1', 'new doc 2', 'col1doc2', 'col1doc3',

        # Now drop the collection
        col.insert_one({'name': 'new doc'})
        stats = restore_dump(drop=True)
        assert {d['name'] for d in col.find()} == {
            'col1doc1', 'col1doc2', 'col1doc3',
        assert stats.num_docs == 3
Exemple #18
class MongoDatabase(Database):
    This class implements the abstract class Database and communicates with the MongoDB database. 
    It has several methods for this communication.
    def __init__(self, collection):
        self._devices = MongoClient()["Hestia"][collection]

    def get_all_devices(self):
        """Instantiates all devices in database"""
        devices = []
        for data in self._devices.find():
            _id = data["_id"]
            device = self._get_class(data["module"], data["class"])(self, _id)
        return devices

    def get_device(self, device_id):
        """Instantiates the device with the given device_id"""
        data = self.__get_device_data(device_id)
        device = self._get_class(data["module"], data["class"])
        return device(self, device_id)

    def add_device(self, plugin):
        """Adds the given plugin info as a new device"""
        plugin["_id"] = str(ObjectId())

    def delete_device(self, device_id):
        self._devices.delete_one({"_id": device_id})

    def update_field(self, device_id, field, new_value):
        self._devices.find_one_and_update({"_id": device_id},
                                          {"$set": {
                                              field: new_value

    def get_field(self, device_id, field):
        data = self.__get_device_data(device_id)
        return data[field]

    def get_activator_field(self, device_id, activator_id, field):
        data = self.__get_device_data(device_id)
        activator = self.__get_activator(data, activator_id)
        return activator[field]

    def update_activator_field(self, device_id, activator_id, field,
            {"_id": device_id},
            {"$set": {
                "activators." + activator_id + "." + field: new_value

    def delete_all_devices(self):

    def __get_device_data(self, device_id):
        """Get data of device based on its id"""
        data = self._devices.find_one(device_id)
        if data is None:
            raise NotFoundException("device")
            return data

    def __get_activator(data, activator_id):
            return data["activators"][activator_id]
        except KeyError as exception:
            raise NotFoundException("activator")