コード例 #1
0
 def __iter__(self):
     if self._sort:
         cursors = [
             Cursor(collection,
                    self._query(),
                    no_cursor_timeout=self._no_cursor_timeout,
                    limit=limit,
                    sort=[self._sort])
             for collection, limit in self._mongo_collections
         ]
     else:
         cursors = [
             Cursor(collection,
                    self._query(),
                    no_cursor_timeout=self._no_cursor_timeout,
                    limit=limit)
             for collection, limit in self._mongo_collections
         ]
     try:
         for cursor in cursors:
             for tweet in cursor:
                 yield tweet
     finally:
         for cursor in cursors:
             cursor.close()
コード例 #2
0
 def ensure_cursor_death(self, collection, cursor_id, retrieved):
     batch_size = self.real_app.config['CURSOR_BATCH_SIZE']
     cursor = Cursor(collection, _cursor_id=cursor_id,
                     limit=batch_size, _retrieved=retrieved)
     try:
         cursor.next()
     except StopIteration:
         pass
     except OperationFailure:
         pass
     else:
         self.fail('Cursor was not killed')
コード例 #3
0
 def next(self):
     """A `next` that caches the returned results.  Together with the
     slightly different `__iter__`, these cursors can be iterated over
     more than once."""
     if self.__tailable:
         return PymongoCursor.next(self)
     try:
         ret = PymongoCursor.next(self)
     except StopIteration:
         self.__fullcache = True
         raise
     self.__itercache.append(ret)
     return ret
コード例 #4
0
 def ensure_cursor_death(self, collection, cursor_id, retrieved):
     batch_size = self.real_app.config['CURSOR_BATCH_SIZE']
     cursor = Cursor(collection,
                     _cursor_id=cursor_id,
                     limit=batch_size,
                     _retrieved=retrieved)
     try:
         cursor.next()
     except StopIteration:
         pass
     except OperationFailure:
         pass
     else:
         self.fail('Cursor was not killed')
コード例 #5
0
def get_pagination_from_cursor(cursor: Cursor, start: int, limit: int):
    total = cursor.count()

    results = cursor.skip(start).limit(limit)
    has_next = total > (start + limit)

    pagination = {
        "start": start,
        "limit": limit,
        "total": total,
        "hasNext": has_next,
        "results": list(results)
    }

    return pagination
コード例 #6
0
ファイル: monkey.py プロジェクト: vfulco/mrq
        def _Cursor__send_message(self, *args, **kwargs):
            # print self.__dict__
            job = get_current_job()

            if job:

                subtype = "cursor"
                collection = self._Cursor__collection.name  # pylint: disable=no-member

                if collection == "$cmd":
                    items = self._Cursor__spec.items()  # pylint: disable=no-member
                    if len(items) > 0:
                        subtype, collection = items[0]

                job.set_current_io({
                    "type": "mongodb.%s" % subtype,
                    "data": {
                        "collection": "%s.%s" % (self._Cursor__collection.database.name, collection)  # pylint: disable=no-member
                    }
                })
            ret = Cursor._Cursor__send_message(self, *args, **kwargs)  # pylint: disable=no-member

            if job:
                job.set_current_io(None)
            return ret
コード例 #7
0
ファイル: monkey.py プロジェクト: frankrousseau/mrq
        def _Cursor__send_message(self, *args, **kwargs):
            # print self.__dict__
            job = get_current_job()

            if job:

                subtype = "find"
                collection = self._Cursor__collection.name  # pylint: disable=no-member

                if collection == "$cmd":
                    items = self._Cursor__spec.items()  # pylint: disable=no-member
                    if len(items) > 0:
                        subtype, collection = items[0]

                job.set_current_io({
                    "type": "mongodb.%s" % subtype,
                    "data": {
                        "collection":
                        "%s.%s" %
                        (self._Cursor__collection.database.name, collection)  # pylint: disable=no-member
                    }
                })
            ret = Cursor._Cursor__send_message(self, *args, **kwargs)  # pylint: disable=no-member

            if job:
                job.set_current_io(None)
            return ret
コード例 #8
0
ファイル: monkey.py プロジェクト: AshBT/mrq
        def _Cursor__send_message(self, *args, **kwargs):
            # print self.__dict__
            job = get_current_job()

            if job:

                subtype = "cursor"
                collection = self._Cursor__collection.name  # pylint: disable=no-member

                if collection == "$cmd":
                    items = self._Cursor__spec.items()  # pylint: disable=no-member
                    if len(items) > 0:
                        subtype, collection = items[0]

                full_name = "%s.%s" % (self._Cursor__collection.database.name, collection)  # pylint: disable=no-member

                job.set_current_io({
                    "type": "mongodb.%s" % subtype,
                    "data": {
                        "collection": full_name
                    }
                })

            if config.get("mongodb_pre_hook"):

                config.get("mongodb_pre_hook")({
                    "collection": full_name,
                    "method": subtype,
                    "args": (getattr(args[0], "spec", None), ),
                    "kwargs": kwargs,
                    "client": self._Cursor__collection.database.client,
                    "job": job
                })

            start_time = time.time()
            ret = False
            try:
                ret = Cursor._Cursor__send_message(self, *args, **kwargs)  # pylint: disable=no-member
            finally:
                stop_time = time.time()
                if job:
                    job.set_current_io(None)

                if config.get("mongodb_post_hook"):
                    config.get("mongodb_post_hook")({
                        "collection": full_name,
                        "method": subtype,
                        "args": (getattr(args[0], "spec", None), ),
                        "kwargs": kwargs,
                        "client": self._Cursor__collection.database.client,
                        "job": job,
                        "result": ret,
                        "time": stop_time - start_time
                    })

            return ret
コード例 #9
0
def limit_data(data: Cursor, request_params: dict) -> list:
    ''' Limits the number of results in a response based on the parameters sent in an HTTP request.
        --> data : The cursor of data to apply the limit to.
        --> request_params : The parameters sent with the request (in querystring or body).
        <-- A queryset containing the limited data.
    '''

    mongo_limit = int(request_params.get('limit', 0))
    if mongo_limit:
        return data.limit(mongo_limit)

    return data
コード例 #10
0
def sort_data(data: Cursor, request_params: dict) -> list:
    ''' Sorts a data according to the parameters sent in an HTTP request.
        --> data : The cursor of data to apply the sort to.
        --> request_params : The parameters sent with the request (in querystring or body).
        <-- A queryset containing the sorted data.
    '''

    mongo_sort = request_params.get('sort', {})
    if mongo_sort != {}:
        return data.sort([(x, int(y)) for x, y in mongo_sort.items()])

    return data
コード例 #11
0
    def get_inner_oppty_dur_stats(cls, mm1_ob_cur: Cursor, mm2_ob_cur: Cursor):

        # set default structire for inner_stats
        inner_stats = {
            "mm1": {
                "mid_price": {},
                "amount": {
                    "asks": {
                        "min_ask": {},
                        "top5": {},
                        "top10": {},
                        "total": {}
                    },
                    "bids": {
                        "max_bid": {},
                        "top5": {},
                        "top10": {},
                        "total": {}
                    }
                }
            },
            "mm2": {
                "mid_price": {},
                "amount": {
                    "asks": {
                        "min_ask": {},
                        "top5": {},
                        "top10": {},
                        "total": {}
                    },
                    "bids": {
                        "max_bid": {},
                        "top5": {},
                        "top10": {},
                        "total": {}
                    }
                }
            }
        }  # avg, var, std will be added to each of deepest key
        # get mm1 stat infos
        cls.get_mid_price_stat("mm1", mm1_ob_cur.clone(), inner_stats)

        # get mm2 stat infos
        cls.get_mid_price_stat("mm2", mm2_ob_cur.clone(), inner_stats)

        # get mm1 amount stat infos
        cls.get_amount_stat_by_depth("mm1", mm1_ob_cur.clone(), inner_stats,
                                     "asks")
        cls.get_amount_stat_by_depth("mm1", mm1_ob_cur.clone(), inner_stats,
                                     "bids")

        # get mm2 amount stat infos
        cls.get_amount_stat_by_depth("mm2", mm2_ob_cur.clone(), inner_stats,
                                     "asks")
        cls.get_amount_stat_by_depth("mm2", mm2_ob_cur.clone(), inner_stats,
                                     "bids")

        return inner_stats
コード例 #12
0
ファイル: collection.py プロジェクト: bolinette/bolinette
 def _apply_params(self, cursor: Cursor):
     cursor.skip(self._offset)
     if self._limit is not None:
         cursor.limit(self._limit)
     if len(self._order_by) > 0:
         order_by = [
             (column, DESCENDING if desc else ASCENDING)
             for column, desc in self._order_by
         ]
         cursor.sort(order_by)
     return cursor
コード例 #13
0
 def count_docs(self):
     """ Count the amount of populated docs. """
     self.output.append("Populated: %d" %
                        Cursor.count(self.search('filled', 'cmt_count')))
コード例 #14
0
from pymongo import MongoClient
from pymongo.cursor import Cursor
from graph_builder import GraphBuilder
from dbpedia_subjects_extractor import DbpediaSubjectsExtractor
import sys

DATABASE_NAME = 'socialnetworks'
COLLECTION_NAME = sys.argv[1]


def preprocessing(x):
    print("Processing: ", x['Links'][0]['Uri'])
    return x['Links'][0]['Body']


if __name__ == "__main__":
    client = MongoClient('localhost', 27017)
    database = client[DATABASE_NAME]

    documents_collection = database[COLLECTION_NAME]
    cursor = Cursor(documents_collection, no_cursor_timeout=True)

    graph_builder = GraphBuilder(DbpediaSubjectsExtractor, preprocessing=preprocessing)
    graph_builder.build(cursor)
    graph_builder.save_graph(COLLECTION_NAME + ".gml")
コード例 #15
0
ファイル: cursor.py プロジェクト: eristoddle/mogo
 def next(self):
     value = PyCursor.next(self)
     return self._model(**value)
コード例 #16
0
    def find(self, *args, **kwargs):
        """Query the database.

        The `spec` argument is a prototype document that all results
        must match. For example:

        >>> db.test.find({"hello": "world"})

        only matches documents that have a key "hello" with value
        "world".  Matches can have other keys *in addition* to
        "hello". The `fields` argument is used to specify a subset of
        fields that should be included in the result documents. By
        limiting results to a certain subset of fields you can cut
        down on network traffic and decoding time.

        Raises :class:`TypeError` if any of the arguments are of
        improper type. Returns an instance of
        :class:`~pymongo.cursor.Cursor` corresponding to this query.

        :Parameters:
          - `spec` (optional): a SON object specifying elements which
            must be present for a document to be included in the
            result set
          - `fields` (optional): a list of field names that should be
            returned in the result set ("_id" will always be
            included), or a dict specifying the fields to return
          - `skip` (optional): the number of documents to omit (from
            the start of the result set) when returning the results
          - `limit` (optional): the maximum number of results to
            return
          - `timeout` (optional): if True, any returned cursor will be
            subject to the normal timeout behavior of the mongod
            process. Otherwise, the returned cursor will never timeout
            at the server. Care should be taken to ensure that cursors
            with timeout turned off are properly closed.
          - `snapshot` (optional): if True, snapshot mode will be used
            for this query. Snapshot mode assures no duplicates are
            returned, or objects missed, which were present at both
            the start and end of the query's execution. For details,
            see the `snapshot documentation
            <http://dochub.mongodb.org/core/snapshot>`_.
          - `tailable` (optional): the result of this find call will
            be a tailable cursor - tailable cursors aren't closed when
            the last data is retrieved but are kept open and the
            cursors location marks the final document's position. if
            more data is received iteration of the cursor will
            continue from the last document received. For details, see
            the `tailable cursor documentation
            <http://www.mongodb.org/display/DOCS/Tailable+Cursors>`_.
          - `sort` (optional): a list of (key, direction) pairs
            specifying the sort order for this query. See
            :meth:`~pymongo.cursor.Cursor.sort` for details.
          - `max_scan` (optional): limit the number of documents
            examined when performing the query
          - `as_class` (optional): class to use for documents in the
            query result (default is
            :attr:`~pymongo.connection.Connection.document_class`)
          - `slave_okay` (optional): if True, allows this query to
            be run against a replica secondary.
          - `network_timeout` (optional): specify a timeout to use for
            this query, which will override the
            :class:`~pymongo.connection.Connection`-level default

        .. note:: The `max_scan` parameter requires server
           version **>= 1.5.1**

        .. versionadded:: 1.8
           The `network_timeout` parameter.

        .. versionadded:: 1.7
           The `sort`, `max_scan` and `as_class` parameters.

        .. versionchanged:: 1.7
           The `fields` parameter can now be a dict or any iterable in
           addition to a list.

        .. versionadded:: 1.1
           The `tailable` parameter.

        .. mongodoc:: find
        """
        if not 'slave_okay' in kwargs and self.slave_okay:
            kwargs['slave_okay'] = True
        return Cursor(self, *args, **kwargs)
コード例 #17
0
ファイル: monkey.py プロジェクト: zhengge2017/mrq
        def _Cursor__send_message(self, *args, **kwargs):
            # print self.__dict__
            job = get_current_job()

            if job:

                subtype = "cursor"
                collection = self._Cursor__collection.name  # pylint: disable=no-member

                if collection == "$cmd":
                    items = list(self._Cursor__spec.items())  # pylint: disable=no-member
                    if len(items) > 0:
                        subtype, collection = items[0]

                full_name = "%s.%s" % (
                    self._Cursor__collection.database.name, collection)  # pylint: disable=no-member

                job.set_current_io({
                    "type": "mongodb.%s" % subtype,
                    "data": {
                        "collection": full_name
                    }
                })

            if config.get("mongodb_pre_hook"):

                config.get("mongodb_pre_hook")({
                    "collection":
                    full_name,
                    "method":
                    subtype,
                    "args": (getattr(args[0], "spec", None), ),
                    "kwargs":
                    kwargs,
                    "client":
                    self._Cursor__collection.database.client,
                    "job":
                    job
                })

            start_time = time.time()
            ret = False
            try:
                ret = Cursor._Cursor__send_message(self, *args, **kwargs)  # pylint: disable=no-member
            finally:
                stop_time = time.time()
                if job:
                    job.set_current_io(None)

                if config.get("mongodb_post_hook"):
                    config.get("mongodb_post_hook")({
                        "collection":
                        full_name,
                        "method":
                        subtype,
                        "args": (getattr(args[0], "spec", None), ),
                        "kwargs":
                        kwargs,
                        "client":
                        self._Cursor__collection.database.client,
                        "job":
                        job,
                        "result":
                        ret,
                        "time":
                        stop_time - start_time
                    })

            return ret
コード例 #18
0
 def r_comments(self, rng):
     self.output.append("Comment Amount: %d" % Cursor.count(
         self.search('select_gtv', 'comment_date',
                     datetime(rng[0], rng[1], 1), 0, 'comment_id')))
コード例 #19
0
 def commentc(self):
     self.output.append("Comment Amount: %d" %
                        Cursor.count(self.search('exists', 'comment_id')))
コード例 #20
0
ファイル: read.py プロジェクト: lycantropos/admin
def paginate(cursor: Cursor, *, offset: int, limit: int) -> Cursor:
    if offset:
        cursor = cursor.skip(offset)
    if limit is not None:
        cursor = cursor.limit(limit)
    return cursor
コード例 #21
0
 def __init__(self, model, spec=None, *args, **kwargs):
     self._order_entries = []
     self._query = spec
     self._model = model
     PyCursor.__init__(self, model._get_collection(), spec, *args, **kwargs)
コード例 #22
0
ファイル: document.py プロジェクト: Lothiraldan/picomongo
 def next(self):
     return self._document(PymongoCursor.next(self), **self._kwargs)
コード例 #23
0
ファイル: document.py プロジェクト: Lothiraldan/picomongo
 def __getattr__(self, attr_name):
     return PymongoCursor.__getattribute__(self, attr_name)
コード例 #24
0
ファイル: document.py プロジェクト: Lothiraldan/picomongo
 def __getitem__(self, index):
     return self._document(PymongoCursor.__getitem__(self, index),
                           **self._kwargs)
コード例 #25
0
ファイル: cursor.py プロジェクト: eristoddle/mogo
 def __getitem__(self, *args, **kwargs):
     value = PyCursor.__getitem__(self, *args, **kwargs)
     if type(value) == self.__class__:
         return value
     return self._model(**value)
コード例 #26
0
 def miss_docs(self):
     self.output.append("Missing: %d" %
                        Cursor.count(self.search('empty', 'cmt_count')))
コード例 #27
0
 def articlec(self):
     self.output.append("Article Amount: %d" %
                        Cursor.count(self.search('exists', 'nr')))
コード例 #28
0
 def find(self, *args, **kwargs):
     return Cursor(self, *args, **kwargs)
コード例 #29
0
 def r_articles(self, rng):
     self.output.append("Article Amount: %d" % Cursor.count(
         self.search('select_gtv', 'date', datetime(rng[0], rng[1], 1), 0,
                     'nr')))
コード例 #30
0
ファイル: cursor.py プロジェクト: jjmalina/mogo
 def next(self):
     value = PyCursor.next(self)
     return self._model(**value)
コード例 #31
0
ファイル: cursor.py プロジェクト: jellonek/mogo
 def __init__(self, model, spec=None, *args, **kwargs):
     self._order_entries = []
     self._query = spec
     self._model = model
     PyCursor.__init__(
         self, model._get_collection(), spec, *args, **kwargs)
コード例 #32
0
ファイル: document.py プロジェクト: whitebox-it/picomongo
 def next(self):
     return self._document(PymongoCursor.next(self), **self._kwargs)
コード例 #33
0
ファイル: __mongodb.py プロジェクト: dwdb/python-notes
    if ignore_id:
        if '_id' in df.columns:
            df.drop('_id', axis=1, inplace=True)
    df.fillna('', inplace=True)

    if drop_exist:
        db[coll_name].drop()
    db[coll_name].insert_many(df.T.to_dict().values())

    # 插入新集合,json将Dataframe更改为dictionary,导致行顺序不定
    # import json
    # db[coll_name].insert_many(json.loads(df.T.to_json()).values())

    # 直接插入时,Dataframe中的数字无法直接插入数据库
    # del doc['orderID']
    # doc['orderID'] = int(doc['orderID'])


if __name__ == '__main__':

    conn = connect2mongodb()

    db = conn.get_database(DATA_BASE)

    coll = db.get_collection('CTP-090923')

    cur = Cursor(coll)

    print(coll.find({}).count())

    pass
コード例 #34
0
ファイル: document.py プロジェクト: whitebox-it/picomongo
 def __getattr__(self, attr_name):
     return PymongoCursor.__getattribute__(self, attr_name)
コード例 #35
0
ファイル: cursor.py プロジェクト: jjmalina/mogo
 def __getitem__(self, *args, **kwargs):
     value = PyCursor.__getitem__(self, *args, **kwargs)
     if type(value) == self.__class__:
         return value
     return self._model(**value)
コード例 #36
0
ファイル: cursor.py プロジェクト: eristoddle/mogo
 def __init__(self, model, *args, **kwargs):
     self._order_entries = []
     self._model = model
     PyCursor.__init__(self, model._get_collection(), *args, **kwargs)
コード例 #37
0
 def next(self, *args, **kwargs):
     result = Cursor.next(self, *args, **kwargs)
     if not result is None:
         return CSObject(getattr(winter.objects, self.collection.name)(result),
                 self.collection.name)
     return result
コード例 #38
0
ファイル: document.py プロジェクト: whitebox-it/picomongo
 def __getitem__(self, index):
     return self._document(PymongoCursor.__getitem__(self, index),
                           **self._kwargs)
コード例 #39
0
ファイル: cursor.py プロジェクト: jjmalina/mogo
 def __init__(self, model, *args, **kwargs):
     self._order_entries = []
     self._model = model
     PyCursor.__init__(self, model._get_collection(), *args, **kwargs)
コード例 #40
0
ファイル: collection.py プロジェクト: isdb/idb.py
    def find(self,
             spec=None,
             fields=None,
             skip=0,
             limit=0,
             timeout=True,
             snapshot=False,
             tailable=False,
             _sock=None,
             _must_use_master=False,
             _is_command=False):
        """Query the database.

        The `spec` argument is a prototype document that all results must
        match. For example:

        >>> db.test.find({"hello": "world"})

        only matches documents that have a key "hello" with value "world".
        Matches can have other keys *in addition* to "hello". The `fields`
        argument is used to specify a subset of fields that should be included
        in the result documents. By limiting results to a certain subset of
        fields you can cut down on network traffic and decoding time.

        Raises TypeError if any of the arguments are of improper type. Returns
        an instance of Cursor corresponding to this query.

        :Parameters:
          - `spec` (optional): a SON object specifying elements which must be
            present for a document to be included in the result set
          - `fields` (optional): a list of field names that should be returned
            in the result set ("_id" will always be included)
          - `skip` (optional): the number of documents to omit (from the start
            of the result set) when returning the results
          - `limit` (optional): the maximum number of results to return
          - `timeout` (optional): if True, any returned cursor will be subject
            to the normal timeout behavior of the mongod process. Otherwise,
            the returned cursor will never timeout at the server. Care should
            be taken to ensure that cursors with timeout turned off are
            properly closed.
          - `snapshot` (optional): if True, snapshot mode will be used for this
            query. Snapshot mode assures no duplicates are returned, or objects
            missed, which were present at both the start and end of the query's
            execution. For details, see the `snapshot documentation
            <http://www.mongodb.org/display/DOCS/How+to+do+Snapshotting+in+the+Mongo+Database>`_.
          - `tailable` (optional): the result of this find call will be a
            tailable cursor - tailable cursors aren't closed when the last data
            is retrieved but are kept open and the cursors location marks the
            final document's position. if more data is received iteration of
            the cursor will continue from the last document received. For
            details, see the `tailable cursor documentation
            <http://www.mongodb.org/display/DOCS/Tailable+Cursors>`_.

        .. versionadded:: 1.1
           The `tailable` parameter.

        .. mongodoc:: find
        """
        if spec is None:
            spec = SON()

        slave_okay = self.__database.connection.slave_okay

        if not isinstance(spec, dict):
            raise TypeError("spec must be an instance of dict")
        if fields is not None and not isinstance(fields, list):
            raise TypeError("fields must be an instance of list")
        if not isinstance(skip, int):
            raise TypeError("skip must be an instance of int")
        if not isinstance(limit, int):
            raise TypeError("limit must be an instance of int")
        if not isinstance(timeout, bool):
            raise TypeError("timeout must be an instance of bool")
        if not isinstance(snapshot, bool):
            raise TypeError("snapshot must be an instance of bool")
        if not isinstance(tailable, bool):
            raise TypeError("tailable must be an instance of bool")

        if fields is not None:
            if not fields:
                fields = ["_id"]
            fields = self._fields_list_to_dict(fields)

        return Cursor(self,
                      spec,
                      fields,
                      skip,
                      limit,
                      slave_okay,
                      timeout,
                      tailable,
                      snapshot,
                      _sock=_sock,
                      _must_use_master=_must_use_master,
                      _is_command=_is_command)