def __iter__(self): if self._sort: cursors = [ Cursor(collection, self._query(), no_cursor_timeout=self._no_cursor_timeout, limit=limit, sort=[self._sort]) for collection, limit in self._mongo_collections ] else: cursors = [ Cursor(collection, self._query(), no_cursor_timeout=self._no_cursor_timeout, limit=limit) for collection, limit in self._mongo_collections ] try: for cursor in cursors: for tweet in cursor: yield tweet finally: for cursor in cursors: cursor.close()
def ensure_cursor_death(self, collection, cursor_id, retrieved): batch_size = self.real_app.config['CURSOR_BATCH_SIZE'] cursor = Cursor(collection, _cursor_id=cursor_id, limit=batch_size, _retrieved=retrieved) try: cursor.next() except StopIteration: pass except OperationFailure: pass else: self.fail('Cursor was not killed')
def next(self): """A `next` that caches the returned results. Together with the slightly different `__iter__`, these cursors can be iterated over more than once.""" if self.__tailable: return PymongoCursor.next(self) try: ret = PymongoCursor.next(self) except StopIteration: self.__fullcache = True raise self.__itercache.append(ret) return ret
def get_pagination_from_cursor(cursor: Cursor, start: int, limit: int): total = cursor.count() results = cursor.skip(start).limit(limit) has_next = total > (start + limit) pagination = { "start": start, "limit": limit, "total": total, "hasNext": has_next, "results": list(results) } return pagination
def _Cursor__send_message(self, *args, **kwargs): # print self.__dict__ job = get_current_job() if job: subtype = "cursor" collection = self._Cursor__collection.name # pylint: disable=no-member if collection == "$cmd": items = self._Cursor__spec.items() # pylint: disable=no-member if len(items) > 0: subtype, collection = items[0] job.set_current_io({ "type": "mongodb.%s" % subtype, "data": { "collection": "%s.%s" % (self._Cursor__collection.database.name, collection) # pylint: disable=no-member } }) ret = Cursor._Cursor__send_message(self, *args, **kwargs) # pylint: disable=no-member if job: job.set_current_io(None) return ret
def _Cursor__send_message(self, *args, **kwargs): # print self.__dict__ job = get_current_job() if job: subtype = "find" collection = self._Cursor__collection.name # pylint: disable=no-member if collection == "$cmd": items = self._Cursor__spec.items() # pylint: disable=no-member if len(items) > 0: subtype, collection = items[0] job.set_current_io({ "type": "mongodb.%s" % subtype, "data": { "collection": "%s.%s" % (self._Cursor__collection.database.name, collection) # pylint: disable=no-member } }) ret = Cursor._Cursor__send_message(self, *args, **kwargs) # pylint: disable=no-member if job: job.set_current_io(None) return ret
def _Cursor__send_message(self, *args, **kwargs): # print self.__dict__ job = get_current_job() if job: subtype = "cursor" collection = self._Cursor__collection.name # pylint: disable=no-member if collection == "$cmd": items = self._Cursor__spec.items() # pylint: disable=no-member if len(items) > 0: subtype, collection = items[0] full_name = "%s.%s" % (self._Cursor__collection.database.name, collection) # pylint: disable=no-member job.set_current_io({ "type": "mongodb.%s" % subtype, "data": { "collection": full_name } }) if config.get("mongodb_pre_hook"): config.get("mongodb_pre_hook")({ "collection": full_name, "method": subtype, "args": (getattr(args[0], "spec", None), ), "kwargs": kwargs, "client": self._Cursor__collection.database.client, "job": job }) start_time = time.time() ret = False try: ret = Cursor._Cursor__send_message(self, *args, **kwargs) # pylint: disable=no-member finally: stop_time = time.time() if job: job.set_current_io(None) if config.get("mongodb_post_hook"): config.get("mongodb_post_hook")({ "collection": full_name, "method": subtype, "args": (getattr(args[0], "spec", None), ), "kwargs": kwargs, "client": self._Cursor__collection.database.client, "job": job, "result": ret, "time": stop_time - start_time }) return ret
def limit_data(data: Cursor, request_params: dict) -> list: ''' Limits the number of results in a response based on the parameters sent in an HTTP request. --> data : The cursor of data to apply the limit to. --> request_params : The parameters sent with the request (in querystring or body). <-- A queryset containing the limited data. ''' mongo_limit = int(request_params.get('limit', 0)) if mongo_limit: return data.limit(mongo_limit) return data
def sort_data(data: Cursor, request_params: dict) -> list: ''' Sorts a data according to the parameters sent in an HTTP request. --> data : The cursor of data to apply the sort to. --> request_params : The parameters sent with the request (in querystring or body). <-- A queryset containing the sorted data. ''' mongo_sort = request_params.get('sort', {}) if mongo_sort != {}: return data.sort([(x, int(y)) for x, y in mongo_sort.items()]) return data
def get_inner_oppty_dur_stats(cls, mm1_ob_cur: Cursor, mm2_ob_cur: Cursor): # set default structire for inner_stats inner_stats = { "mm1": { "mid_price": {}, "amount": { "asks": { "min_ask": {}, "top5": {}, "top10": {}, "total": {} }, "bids": { "max_bid": {}, "top5": {}, "top10": {}, "total": {} } } }, "mm2": { "mid_price": {}, "amount": { "asks": { "min_ask": {}, "top5": {}, "top10": {}, "total": {} }, "bids": { "max_bid": {}, "top5": {}, "top10": {}, "total": {} } } } } # avg, var, std will be added to each of deepest key # get mm1 stat infos cls.get_mid_price_stat("mm1", mm1_ob_cur.clone(), inner_stats) # get mm2 stat infos cls.get_mid_price_stat("mm2", mm2_ob_cur.clone(), inner_stats) # get mm1 amount stat infos cls.get_amount_stat_by_depth("mm1", mm1_ob_cur.clone(), inner_stats, "asks") cls.get_amount_stat_by_depth("mm1", mm1_ob_cur.clone(), inner_stats, "bids") # get mm2 amount stat infos cls.get_amount_stat_by_depth("mm2", mm2_ob_cur.clone(), inner_stats, "asks") cls.get_amount_stat_by_depth("mm2", mm2_ob_cur.clone(), inner_stats, "bids") return inner_stats
def _apply_params(self, cursor: Cursor): cursor.skip(self._offset) if self._limit is not None: cursor.limit(self._limit) if len(self._order_by) > 0: order_by = [ (column, DESCENDING if desc else ASCENDING) for column, desc in self._order_by ] cursor.sort(order_by) return cursor
def count_docs(self): """ Count the amount of populated docs. """ self.output.append("Populated: %d" % Cursor.count(self.search('filled', 'cmt_count')))
from pymongo import MongoClient from pymongo.cursor import Cursor from graph_builder import GraphBuilder from dbpedia_subjects_extractor import DbpediaSubjectsExtractor import sys DATABASE_NAME = 'socialnetworks' COLLECTION_NAME = sys.argv[1] def preprocessing(x): print("Processing: ", x['Links'][0]['Uri']) return x['Links'][0]['Body'] if __name__ == "__main__": client = MongoClient('localhost', 27017) database = client[DATABASE_NAME] documents_collection = database[COLLECTION_NAME] cursor = Cursor(documents_collection, no_cursor_timeout=True) graph_builder = GraphBuilder(DbpediaSubjectsExtractor, preprocessing=preprocessing) graph_builder.build(cursor) graph_builder.save_graph(COLLECTION_NAME + ".gml")
def next(self): value = PyCursor.next(self) return self._model(**value)
def find(self, *args, **kwargs): """Query the database. The `spec` argument is a prototype document that all results must match. For example: >>> db.test.find({"hello": "world"}) only matches documents that have a key "hello" with value "world". Matches can have other keys *in addition* to "hello". The `fields` argument is used to specify a subset of fields that should be included in the result documents. By limiting results to a certain subset of fields you can cut down on network traffic and decoding time. Raises :class:`TypeError` if any of the arguments are of improper type. Returns an instance of :class:`~pymongo.cursor.Cursor` corresponding to this query. :Parameters: - `spec` (optional): a SON object specifying elements which must be present for a document to be included in the result set - `fields` (optional): a list of field names that should be returned in the result set ("_id" will always be included), or a dict specifying the fields to return - `skip` (optional): the number of documents to omit (from the start of the result set) when returning the results - `limit` (optional): the maximum number of results to return - `timeout` (optional): if True, any returned cursor will be subject to the normal timeout behavior of the mongod process. Otherwise, the returned cursor will never timeout at the server. Care should be taken to ensure that cursors with timeout turned off are properly closed. - `snapshot` (optional): if True, snapshot mode will be used for this query. Snapshot mode assures no duplicates are returned, or objects missed, which were present at both the start and end of the query's execution. For details, see the `snapshot documentation <http://dochub.mongodb.org/core/snapshot>`_. - `tailable` (optional): the result of this find call will be a tailable cursor - tailable cursors aren't closed when the last data is retrieved but are kept open and the cursors location marks the final document's position. if more data is received iteration of the cursor will continue from the last document received. For details, see the `tailable cursor documentation <http://www.mongodb.org/display/DOCS/Tailable+Cursors>`_. - `sort` (optional): a list of (key, direction) pairs specifying the sort order for this query. See :meth:`~pymongo.cursor.Cursor.sort` for details. - `max_scan` (optional): limit the number of documents examined when performing the query - `as_class` (optional): class to use for documents in the query result (default is :attr:`~pymongo.connection.Connection.document_class`) - `slave_okay` (optional): if True, allows this query to be run against a replica secondary. - `network_timeout` (optional): specify a timeout to use for this query, which will override the :class:`~pymongo.connection.Connection`-level default .. note:: The `max_scan` parameter requires server version **>= 1.5.1** .. versionadded:: 1.8 The `network_timeout` parameter. .. versionadded:: 1.7 The `sort`, `max_scan` and `as_class` parameters. .. versionchanged:: 1.7 The `fields` parameter can now be a dict or any iterable in addition to a list. .. versionadded:: 1.1 The `tailable` parameter. .. mongodoc:: find """ if not 'slave_okay' in kwargs and self.slave_okay: kwargs['slave_okay'] = True return Cursor(self, *args, **kwargs)
def _Cursor__send_message(self, *args, **kwargs): # print self.__dict__ job = get_current_job() if job: subtype = "cursor" collection = self._Cursor__collection.name # pylint: disable=no-member if collection == "$cmd": items = list(self._Cursor__spec.items()) # pylint: disable=no-member if len(items) > 0: subtype, collection = items[0] full_name = "%s.%s" % ( self._Cursor__collection.database.name, collection) # pylint: disable=no-member job.set_current_io({ "type": "mongodb.%s" % subtype, "data": { "collection": full_name } }) if config.get("mongodb_pre_hook"): config.get("mongodb_pre_hook")({ "collection": full_name, "method": subtype, "args": (getattr(args[0], "spec", None), ), "kwargs": kwargs, "client": self._Cursor__collection.database.client, "job": job }) start_time = time.time() ret = False try: ret = Cursor._Cursor__send_message(self, *args, **kwargs) # pylint: disable=no-member finally: stop_time = time.time() if job: job.set_current_io(None) if config.get("mongodb_post_hook"): config.get("mongodb_post_hook")({ "collection": full_name, "method": subtype, "args": (getattr(args[0], "spec", None), ), "kwargs": kwargs, "client": self._Cursor__collection.database.client, "job": job, "result": ret, "time": stop_time - start_time }) return ret
def r_comments(self, rng): self.output.append("Comment Amount: %d" % Cursor.count( self.search('select_gtv', 'comment_date', datetime(rng[0], rng[1], 1), 0, 'comment_id')))
def commentc(self): self.output.append("Comment Amount: %d" % Cursor.count(self.search('exists', 'comment_id')))
def paginate(cursor: Cursor, *, offset: int, limit: int) -> Cursor: if offset: cursor = cursor.skip(offset) if limit is not None: cursor = cursor.limit(limit) return cursor
def __init__(self, model, spec=None, *args, **kwargs): self._order_entries = [] self._query = spec self._model = model PyCursor.__init__(self, model._get_collection(), spec, *args, **kwargs)
def next(self): return self._document(PymongoCursor.next(self), **self._kwargs)
def __getattr__(self, attr_name): return PymongoCursor.__getattribute__(self, attr_name)
def __getitem__(self, index): return self._document(PymongoCursor.__getitem__(self, index), **self._kwargs)
def __getitem__(self, *args, **kwargs): value = PyCursor.__getitem__(self, *args, **kwargs) if type(value) == self.__class__: return value return self._model(**value)
def miss_docs(self): self.output.append("Missing: %d" % Cursor.count(self.search('empty', 'cmt_count')))
def articlec(self): self.output.append("Article Amount: %d" % Cursor.count(self.search('exists', 'nr')))
def find(self, *args, **kwargs): return Cursor(self, *args, **kwargs)
def r_articles(self, rng): self.output.append("Article Amount: %d" % Cursor.count( self.search('select_gtv', 'date', datetime(rng[0], rng[1], 1), 0, 'nr')))
def __init__(self, model, spec=None, *args, **kwargs): self._order_entries = [] self._query = spec self._model = model PyCursor.__init__( self, model._get_collection(), spec, *args, **kwargs)
if ignore_id: if '_id' in df.columns: df.drop('_id', axis=1, inplace=True) df.fillna('', inplace=True) if drop_exist: db[coll_name].drop() db[coll_name].insert_many(df.T.to_dict().values()) # 插入新集合,json将Dataframe更改为dictionary,导致行顺序不定 # import json # db[coll_name].insert_many(json.loads(df.T.to_json()).values()) # 直接插入时,Dataframe中的数字无法直接插入数据库 # del doc['orderID'] # doc['orderID'] = int(doc['orderID']) if __name__ == '__main__': conn = connect2mongodb() db = conn.get_database(DATA_BASE) coll = db.get_collection('CTP-090923') cur = Cursor(coll) print(coll.find({}).count()) pass
def __init__(self, model, *args, **kwargs): self._order_entries = [] self._model = model PyCursor.__init__(self, model._get_collection(), *args, **kwargs)
def next(self, *args, **kwargs): result = Cursor.next(self, *args, **kwargs) if not result is None: return CSObject(getattr(winter.objects, self.collection.name)(result), self.collection.name) return result
def find(self, spec=None, fields=None, skip=0, limit=0, timeout=True, snapshot=False, tailable=False, _sock=None, _must_use_master=False, _is_command=False): """Query the database. The `spec` argument is a prototype document that all results must match. For example: >>> db.test.find({"hello": "world"}) only matches documents that have a key "hello" with value "world". Matches can have other keys *in addition* to "hello". The `fields` argument is used to specify a subset of fields that should be included in the result documents. By limiting results to a certain subset of fields you can cut down on network traffic and decoding time. Raises TypeError if any of the arguments are of improper type. Returns an instance of Cursor corresponding to this query. :Parameters: - `spec` (optional): a SON object specifying elements which must be present for a document to be included in the result set - `fields` (optional): a list of field names that should be returned in the result set ("_id" will always be included) - `skip` (optional): the number of documents to omit (from the start of the result set) when returning the results - `limit` (optional): the maximum number of results to return - `timeout` (optional): if True, any returned cursor will be subject to the normal timeout behavior of the mongod process. Otherwise, the returned cursor will never timeout at the server. Care should be taken to ensure that cursors with timeout turned off are properly closed. - `snapshot` (optional): if True, snapshot mode will be used for this query. Snapshot mode assures no duplicates are returned, or objects missed, which were present at both the start and end of the query's execution. For details, see the `snapshot documentation <http://www.mongodb.org/display/DOCS/How+to+do+Snapshotting+in+the+Mongo+Database>`_. - `tailable` (optional): the result of this find call will be a tailable cursor - tailable cursors aren't closed when the last data is retrieved but are kept open and the cursors location marks the final document's position. if more data is received iteration of the cursor will continue from the last document received. For details, see the `tailable cursor documentation <http://www.mongodb.org/display/DOCS/Tailable+Cursors>`_. .. versionadded:: 1.1 The `tailable` parameter. .. mongodoc:: find """ if spec is None: spec = SON() slave_okay = self.__database.connection.slave_okay if not isinstance(spec, dict): raise TypeError("spec must be an instance of dict") if fields is not None and not isinstance(fields, list): raise TypeError("fields must be an instance of list") if not isinstance(skip, int): raise TypeError("skip must be an instance of int") if not isinstance(limit, int): raise TypeError("limit must be an instance of int") if not isinstance(timeout, bool): raise TypeError("timeout must be an instance of bool") if not isinstance(snapshot, bool): raise TypeError("snapshot must be an instance of bool") if not isinstance(tailable, bool): raise TypeError("tailable must be an instance of bool") if fields is not None: if not fields: fields = ["_id"] fields = self._fields_list_to_dict(fields) return Cursor(self, spec, fields, skip, limit, slave_okay, timeout, tailable, snapshot, _sock=_sock, _must_use_master=_must_use_master, _is_command=_is_command)