def record_progress(mongo, config, kind, start_dt, end_dt, status): """Mark the downloading status of a given (entity, time-range) block gae_download.py downloads data, and communicates via this method as it does so. It downloads data a chunk at a time, each chunk being a time-range of a single entity. As the download is started/completed/etc, gae_download.py calls record_progress with the chunk-index and its current status. This method then marks the status in the download control db. Arguments: mongo: mongo connection config: the donwload control db config kind: datastore entity type start_dt, end_dt: backup_timestamp range of the entity type status: one of the enum values in DownloadStatus """ def _record_progress(mongo, config, kind, start_dt, end_dt, status): key = get_key(kind, start_dt, end_dt) mongo_db = mongo[config['control_db']] mongo_collection = mongo_db['ProgressLogs'] db_doc = mongo_collection.find_one(key) history = {} if db_doc and 'history' in db_doc: history = db_doc['history'] history[str(status)] = dt.datetime.now() doc = {'_id': key, 'kind': kind, 'start_dt': start_dt, 'end_dt': end_dt, 'status': status, 'history': history} mongo_collection.save(doc) func = db_decorator(max_tries=5, func=_record_progress) func(mongo, config, kind, start_dt, end_dt, status)
def put_document(entity, config, mongo): """Put the GAE entity into mongodb""" def _put_document(entity, config, mongo): kind = entity.key().kind() document = {} document.update(entity) mutable = int(config['kinds'][kind][2]) #make sure all records using the key field as the #index key if 'key' not in document: document['_id'] = str(entity.key()) else: document['_id'] = document['key'] document = apply_transform(document) try: mongo_db = mongo[get_db_name(config, kind)] mongo_collection = mongo_db[kind] if mutable == 0: mongo_collection.insert(document) else: mongo_collection.save(document) except DuplicateKeyError: # ignore pass except InvalidDocument: g_logger.error("InvalidDocument %s" % (document)) except InvalidStringData as e: g_logger.error("Problem inserting doc: %s \n error: %s" % (document, e)) func = db_decorator(config['max_tries'], _put_document) func(entity, config, mongo)
def open_db_conn(config): """Get a mongodb connection (and reuse it)""" def _open_db_conn(config): return pymongo.Connection(config['dbhost'], config['dbport']) func = db_decorator(config['max_tries'], _open_db_conn) return func(config)
def ensure_db_index(config, mongo, kind, index): """ensure index for kind""" def _ensure_db_index(config, mongo, kind, index): mongo_db = mongo[get_db_name(config, kind)] mongo_db[kind].ensure_index(index) func = db_decorator(config['max_tries'], _ensure_db_index) func(config, mongo, kind, index)
def open_db_conn(config): """Get a mongodb connection (and reuse it)""" def _open_db_conn(config): return pymongo.Connection(config["dbhost"], config["dbport"]) func = db_decorator(config["max_tries"], _open_db_conn) return func(config)
def get_failed_jobs(mongo, config): """Get gae download tasks with status != SUCCESS.""" def _get_failed_jobs(mongo, config): mongo_db = mongo[config['control_db']] mongo_collection = mongo_db['ProgressLogs'] query = {"status": {"$lt": DownloadStatus.SUCCESS}} return mongo_collection.find(query) func = db_decorator(5, _get_failed_jobs) return func(mongo, config)