Beispiel #1
0
def archive_success_make_subrecord(record):
    '''
    Makes the 'submission' subrecord.
      Takes the record.
      Returns the subrecord value (dict).
    '''

    id1 = record.get('_id')
    subrecord = {}

    for key in [
            'job_id', 'when_ready_for_pbs', 'when_archival_queued',
            'when_archival_started'
    ]:
        if not key in record:
            raise Exception(
                util.gen_msg(
                    f"Expected key '{key}' not found in record w/ id '{id1}'.")
            )
        if not record[key]:
            raise Exception(
                util.gen_msg(
                    f"Unexpected value '{record[key]}' for key '{key}' in record w/ id '{id1}'."
                ))

        subrecord[key] = record[key]

    subrecord['when_archival_completed'] = util.get_timestamp()
    return subrecord
Beispiel #2
0
def get_current_username(mongo_record):

    if not isinstance(mongo_record, dict):
        raise Exception(
            util.gen_msg(
                f"expected mongo_record to be dict; got: '{type(mongo_record)}'; record: {mongo_record}"
            ))

    if 'current_user' not in mongo_record:
        raise Exception(
            util.gen_msg(
                f"key 'current_user' not found in record: {mongo_record}"))

    user_obj = mongo_record.get('current_user')

    if not isinstance(user_obj, dict):
        raise Exception(
            util.gen_msg(
                f"current_user expected to be dict, got '{type(user_obj)}'."))

    if 'username' not in user_obj:
        raise Exception(
            util.gen_msg(
                f"key 'username' not found in current_user in record: {mongo_record}"
            ))

    username = user_obj.get('username')
    if not (username and isinstance(username, str)):
        raise Exception(
            util.gen_msg(
                f"username not specified or not str in current_user in record: {mongo_record}"
            ))

    return username
Beispiel #3
0
def retrieve_processing(args, user_dict, mongo_collection):
    '''
    Changes status of retrieval job matching args['obj_id'] and args['job_id'] 
      from 'queued' to 'processing'.
    Takes:
      args (dict) with obj_id (str), job_id (str);
      user_dict (dict) not used;
      mongo_collection: MongoDB database.collection
    Returns:
      job_id (str)
 
    MongoDB record changed from:
    "retrievals": [{
      "job_id": "8649.ctarchive.jax.org",
      "retrieval_status": "queued",
      "when_ready_for_pbs": "2020-01-02 07:34:38 EDT-0400",
      "when_retrieval_queued": "2020-01-02 07:34:39 EDT-0400",
      "when_retrieval_started": null,
      "when_retrieval_completed": null
    }]

    to:
    "retrievals": [{
      "job_id": "8649.ctarchive.jax.org",
      *"retrieval_status": "processing",
      "when_ready_for_pbs": "2020-01-02 07:34:38 EDT-0400",
      "when_retrieval_queued": "2020-01-02 07:34:39 EDT-0400",
      *"when_retrieval_started": "2020-01-02 07:36:25 EDT-0400",
      "when_retrieval_completed": null
    }]
    '''

    expected_status = 'queued'

    obj_id, job_id = get_args_objid_jobid(args)

    condition = {'_id': obj_id}
    cursor = mongo_collection.find(condition)
    if cursor.count() != 1:
        raise Exception(util.gen_msg(f"{count} records match {condition}.\n"))

    idx = get_retrievals_idx(job_id, expected_status, cursor[0])
    prefix = 'retrievals.' + str(idx)

    result = mongo_collection.update_one({'_id': obj_id}, {
        '$set': {
            f'{prefix}.retrieval_status': 'processing',
            f'{prefix}.when_retrieval_started': util.get_timestamp()
        }
    })

    if not result.acknowledged:
        raise Exception(
            util.gen_msg(
                f"MongoDB update on _id '{obj_id}' not acknowledged."))

    return job_id
Beispiel #4
0
def archive_failed(args, user_dict, mongo_collection):
    '''
    Marks archive record as having failed. 
      Takes job_id;
      Returns ???'error msg from pbs'???
    from:
    "ready_for_pbs": false,
    "when_ready_for_pbs": "2019-12-31 22:41:00 EDT-0400",
    "when_archival_queued": "2019-12-31 22:41:01 EDT-0400",
    "when_archival_started": "2019-12-31 22:44:08 EDT-0400",
    "when_archival_completed": null,
    "failed_multiple": null,
    "archival_status": "processing",
    "job_id": "8638.ctarchive.jax.org",

    to:
    "ready_for_pbs": false,
    "when_ready_for_pbs": "2019-12-31 22:41:00 EDT-0400",
    "when_archival_queued": "2019-12-31 22:41:01 EDT-0400",
    "when_archival_started": "2019-12-31 22:41:02 EDT-0400",
    +"when_archival_failed": "2019-12-31 22:46:08 EDT-0400",
    "when_archival_completed": null,
    "failed_multiple": null,
    *"archival_status": "failed",
    "job_id": "8638.ctarchive.jax.org",
    '''

    job_id = args.get('job_id')
    if not job_id:
        raise Exception(util.gen_msg("No job_id passed."))

    condition = {'job_id': job_id}
    cursor = mongo_collection.find(condition, {'_id': 1})
    count = cursor.count()
    if count != 1:
        raise Exception(util.gen_msg(f"{count} records match {condition}.\n"))

    id1 = cursor[0][
        '_id']  ## '_id' field from 1st record (dict); type(id1): ObjectId
    result = mongo_collection.update_one(
        {'_id': id1},  ## match condition
        {
            '$set': {
                'when_archival_failed': util.get_timestamp(),
                'archival_status': 'failed'
            }
        })

    if not result.acknowledged:
        raise Exception(
            util.gen_msg("MongoDB update on _id '{id1}' not acknowledged."))

    return job_id
Beispiel #5
0
def archive_success(args, user_dict, mongo_collection):
    '''
    Takes 
      args (dict) arguments from api call; includes: 
        job_id (str), sourceSize (int), archivedSize (int); 
      user_dict (dict): contains api user info; not used now;
      mongo_collection: pymongo object representing MongoDB collection;
    Returns json record (str).

    -"ready_for_pbs": false,
    *"when_ready_for_pbs": "2019-12-31 22:41:00 EDT-0400",
    *"when_archival_queued": "2019-12-31 22:41:01 EDT-0400",
    *"when_archival_started": "2019-12-31 22:44:08 EDT-0400",
    +"when_archival_completed": "2020-01-01 03:01:59 EDT-0400",
    -"failed_multiple": null,
    +"archival_status": "completed",
    *"job_id": "8638.ctarchive.jax.org"

    then:

    "archival_status": "completed",
    "archivedSize": { "$numberInt": "396700549" },
    "dateArchived": "2020-01-01",
    "sourceSize": { "$numberInt": "797725536" },
    "submission": {
        "job_id": "8638.ctarchive.jax.org",
        "when_ready_for_pbs": "2019-12-31 22:41:00 EDT-0400",
        "when_archival_queued": "2019-12-31 22:41:01 EDT-0400",
        "when_archival_started": "2019-12-31 22:44:08 EDT-0400",
        "when_archival_completed": "2020-01-01 03:01:59 EDT-0400"
    }
    '''

    job_id, source_size, archived_size = archive_success_proc_args(args)

    condition = {'job_id': job_id}
    cursor = mongo_collection.find(condition)
    count = cursor.count()
    if count != 1:
        raise Exception(util.gen_msg(f"{count} records match {condition}."))

    id1 = archive_success_update_record(cursor[0], job_id, source_size,
                                        archived_size, mongo_collection)

    cursor = mongo_collection.find({'_id': id1})
    count = cursor.count()
    if count != 1:
        raise Exception(util.gen_msg(f"{count} records match _id '{id1}'."))

    return str(cursor[0])
Beispiel #6
0
def get_retrievals_indices(job_id, status, mongo_record):

    only_one_allowed = ['ready_for_pbs', 'queued']
    pre_jobid_status = 'ready_for_pbs'

    retrievals = get_retrievals_list(mongo_record)

    idx_list = []
    for idx in range(len(retrievals)):
        if not isinstance(retrievals[idx], dict):
            continue
        status_idx = retrievals[idx].get('retrieval_status')

        if status == pre_jobid_status:
            if status_idx == status:
                idx_list.append(idx)
        elif job_id == retrievals[idx].get('job_id'):
            if status_idx != status:
                raise Exception(
                    util.gen_msg(
                        f"Found job_id '{job_id}', but retrieval status '{status_idx}', "
                        + f"expected '{status}'; record: {mongo_record}"))
            idx_list.append(idx)

    return idx_list
Beispiel #7
0
def get_retrievals_idx(job_id, status, mongo_record):
    '''
    Searches mongo_record.retrievals (list of dicts) for subrecord (dict) with 
      matching job_id and retrieval_status; if no matching job_id, but finds
      subrecord with same status and no job_id set, returns the index of the 
      first such subrecord.
    Takes: 
      job_id for retrieval
      status: expected retrieval_status for the matching retrieval
      mongo_record: mongodb record (dict) to be searched.
    Returns:
      index (int >= 0) of best matching subrecord in mongo_record.retrievals list.
    '''

    only_one_allowed = ['ready_for_pbs', 'queued']
    pre_jobid_status = 'ready_for_pbs'

    idx_list = get_retrievals_indices(job_id, status, mongo_record)

    if len(idx_list) == 1:
        return idx_list[0]
    elif len(idx_list) == 0:
        if status == pre_jobid_status:
            raise Exception(
                util.gen_msg(
                    f"Could not find status '{status}' in record: {mongo_record}"
                ))
        else:
            raise Exception(
                util.gen_msg(
                    f"Could not find retrieval job_id '{job_id}' w/ status '{status}' "
                    + f"in record: {mongo_record}"))
    elif status == pre_job_id_status:
        raise Exception(
            util.gen_msg(
                f"{len(idx_list)} matches {idx_list} found w/ status '{status}' "
                + f"in record: {mongo_record}"))
    else:
        raise Exception(
            util.gen_msg(
                f"{len(idx_list)} matches {idx_list} found w/ status '{status}' "
                + f"in record: {mongo_record}"))
Beispiel #8
0
def get_args_objid_jobid(args):
    '''
    Takes args (dict) with str values for keys 'obj_id' and 'job_id';
    Returns values for 'obj_id' (bson.objectid.ObjectId) and 'job_id' (str);
    '''

    obj_id = args.get('obj_id')
    if not obj_id:
        raise Exception(util.gen_msg("No obj_id passed."))

    try:
        id1 = bson.objectid.ObjectId(obj_id)
    except Exception as e:
        raise Exception(util.gen_msg("obj_id '{obj_id}' not valid: {e}"))

    job_id = args.get('job_id')
    if not job_id:
        raise Exception(util.gen_msg("No job_id passed."))

    return id1, job_id
Beispiel #9
0
def get_last_document(args, user_dict, mongo_collection):

    cursor = mongo_collection.find().sort("_id", pymongo.DESCENDING)

    if cursor.count() == 0:
        raise Exception(util.gen_msg(f"No records in collection.\n"))

    doc = cursor[0]
    doc["_id"] = str(doc.get("_id"))

    return doc
Beispiel #10
0
def get_retrievals_list(mongo_record):

    if not isinstance(mongo_record, dict):
        raise Exception(
            util.gen_msg(
                f"expected mongo_record to be dict; got: '{type(mongo_record)}'; record: {mongo_record}"
            ))

    retrievals = mongo_record.get('retrievals')
    if not retrievals:
        raise Exception(
            util.gen_msg(
                f"unexpectedly mongo_record does not have 'retrievals' key; record: {mongo_record}"
            ))
    if not isinstance(retrievals, list):
        raise Exception(
            util.gen_msg(
                f"expected 'retrievals' to be list; got: '{type(retrievals)}'; record: {mongo_record}"
            ))

    return retrievals
Beispiel #11
0
def archive_success_proc_args(args):
    '''
    Checks for presence and correct type of arguments 'job_id, 'sourseSize', and 'archivedSize';
      Takes args (dict) with arguments from flask.request.
      Returns the tuple (job_id, sourceSize, archivedSize) after type conversions.
      Throws errors if anything missing or cannot be properly cast to right type. 
    '''

    job_id = args.get('job_id')
    if not job_id:
        raise Exception(
            util.gen_msg("Required parameter 'job_id' not received."))

    source_size_str = args.get('sourceSize')
    if not source_size_str:
        raise Exception(
            util.gen_msg(f"Required parameter 'sourceSize' not received."))

    try:
        source_size = int(source_size_str)
    except Exception as e:
        raise Exception(
            util.gen_msg(
                f"sourceSize must be an integer; got '{source_size_str}'."))

    archived_size_str = args.get('archivedSize')
    if not archived_size_str:
        raise Exception(
            util.gen_msg("Required parameter 'archivedSize' not received."))

    try:
        archived_size = int(archived_size_str)
    except Exception as e:
        raise Exception(
            util.gen_msg(
                f"archivedSize must be an integer; got '{archived_size_str}'.")
        )

    return job_id, source_size, archived_size
Beispiel #12
0
def archive_queued(args, user_dict, mongo_collection):
    '''
    Takes obj_id, job_id (str)
      Returns job_id

    "ready_for_pbs": false,
    "when_ready_for_pbs": "2019-12-31 22:41:00 EDT-0400",
    "+when_archival_queued": "2019-12-31 22:41:01 EDT-0400",
    "when_archival_started": null,
    "when_archival_completed": null,
    "failed_multiple": null,
    "*archival_status": "queued",
    "+job_id": "8638.ctarchive.jax.org",
    '''

    obj_id, job_id = get_args_objid_jobid(args)

    condition = {'_id': obj_id}
    cursor = mongo_collection.find(condition, {'_id': 1})
    count = cursor.count()
    if count != 1:
        raise Exception(util.gen_msg(f"{count} records match {condition}.\n"))

    result = mongo_collection.update_one(
        {'_id': obj_id},  ## match condition
        {
            '$set': {
                'when_archival_queued': util.get_timestamp(),
                'archival_status': 'queued',
                'job_id': job_id
            }
        })

    if not result.acknowledged:
        raise Exception(
            util.gen_msg(
                f"MongoDB update on _id '{obj_id}' not acknowledged."))

    return job_id
Beispiel #13
0
def get_document_by_objectid_1(obj_id, user_dict, mongo_collection):
    try:
        id1 = bson.objectid.ObjectId(obj_id)
    except Exception as e:
        raise Exception(util.gen_msg(f"obj_id '{obj_id}' not valid: {e}"))

    condition = {"_id": id1}
    cursor = mongo_collection.find(condition)
    if cursor.count() != 1:
        raise Exception(util.gen_msg(f"{cursor.count()} records match {condition}.\n"))

    records_list = get_permitted_records_list(user_dict, cursor)
    if len(records_list) != 1:
        raise Exception(
            util.gen_msg(
                f"You do not have permission to retrieve record w/ object_id '{obj_id}'"
            )
        )

    doc = records_list[0]
    doc["_id"] = str(doc.get("_id"))

    return doc
Beispiel #14
0
def get_permitted_records_list(user_dict, cursor):

    records = []

    if user_dict.get("admin"):
        for record in cursor:
            records.append(record)
        return records

    user_groups_list = user_dict.get("groups_list")
    if not isinstance(user_groups_list, list):
        raise Exception(
            util.gen_msg(
                f"user_dict['groups_list'] '{user_groups_list}' is not a list; user_dict: '{user_dict}'"
            )
        )

    for record in cursor:
        if "system_groups" not in record:
            util.log_email(
                util.gen_msg(f"record has no system_groups key; record: '{record}'")
            )
            continue
        allowed_groups_list = record.get("system_groups")
        if not isinstance(allowed_groups_list, list):
            util.log_email(
                util.gen_msg(
                    f"record['groups_list'] '{allowed_groups_list}' is not a list; record: '{record}'"
                )
            )
        for group in allowed_groups_list:
            if group in user_groups_list:
                records.append(record)
                break

    return records
Beispiel #15
0
def archive_success_update_record(record, job_id, source_size, archived_size,
                                  mongo_collection):
    '''
    Does the actual mongodb update.
      Takes: 
        record, which is the pymongo version of the MongoDB record to be updated.
        job_id (str)
        source_size (int)
        archived_size (int)
      Returns _id on success; raises Exception on error.
    '''

    id1 = record.get(
        '_id')  ## '_id' field from 1st record (dict); type(id1): ObjectId

    subrecord = archive_success_make_subrecord(record)

    result = mongo_collection.update_one(
        {'_id': id1},  ## match condition
        {
            '$set': {
                'archival_status':
                'completed',
                'archivedSize':
                archived_size,
                'dateArchived':
                util.get_timestamp(format=config.time.get('format_day')),
                'sourceSize':
                source_size,
                'submission':
                subrecord
            },
            '$unset': {
                'ready_for_pbs': '',
                'failed_multiple': '',
                'when_ready_for_pbs': '',
                'when_archival_queued': '',
                'when_archival_started': '',
                'when_archival_completed': '',
                'job_id': ''
            }
        })

    if not result.acknowledged:
        raise Exception(
            util.gen_msg("MongoDB update on _id '{id1}' not acknowledged."))

    return id1
Beispiel #16
0
def get_document_by_objectid(args, user_dict, mongo_collection):
    obj_id = args.get("obj_id")
    if not obj_id:
        raise Exception(util.gen_msg("No obj_id passed."))
    return get_document_by_objectid_1(obj_id, user_dict, mongo_collection)
Beispiel #17
0
def retrieve_failed(args, user_dict, mongo_collection):
    '''
    Takes:
      obj_id:
      job_id:
    Returns: 
      error_msg from pbs
      job_id

    Meta-data changed from:
    -"current_user": {
        "fname": "Research",
        "lname": "IT",
        "username": "******",
        "email": "*****@*****.**",
    },
    ...
    "retrievals": [{
        "job_id": "8649.ctarchive.jax.org",
        *"retrieval_status": "processing",
        "when_ready_for_pbs": "2020-01-02 07:34:38 EDT-0400",
        "when_retrieval_queued": "2020-01-02 07:34:39 EDT-0400",
        "when_retrieval_started": "2020-01-02 07:36:25 EDT-0400",
        "when_retrieval_completed": null,
    }]

    to:
    "retrievals": [{
        "job_id": "8649.ctarchive.jax.org",
        *"retrieval_status": "failed",
        "when_ready_for_pbs": "2020-01-02 07:34:38 EDT-0400",
        "when_retrieval_queued": "2020-03-30 12:32:14 EDT-0400",
        "when_retrieval_started": "2020-03-30 12:32:15 EDT-0400",
        "when_retrieval_completed": null,
        +"when_retrieveal_failed": "2020-03-30 14:46:58 EDT-0400",
        +"username": "******",
    }]
    '''

    expected_status = 'processing'

    obj_id, job_id = get_args_objid_jobid(args)

    condition = {'_id': obj_id}
    cursor = mongo_collection.find(condition)
    if cursor.count() != 1:
        raise Exception(util.gen_msg(f"{count} records match {condition}.\n"))

    idx = get_retrievals_idx(job_id, expected_status, cursor[0])
    prefix = 'retrievals.' + str(idx)

    username = get_current_username(cursor[0])

    result = mongo_collection.update_one({'_id': obj_id}, {
        '$set': {
            f'{prefix}.username': username,
            f'{prefix}.retrieval_status': 'failed',
            f'{prefix}.when_retrieval_failed': util.get_timestamp()
        },
        '$unset': {
            'current_user': ''
        }
    })

    if not result.acknowledged:
        raise Exception(
            util.gen_msg(
                f"MongoDB update on _id '{obj_id}' not acknowledged."))

    return f"{job_id}: need to update code to retrieve PBS error message here."
Beispiel #18
0
def retrieve_success(args, user_dict, mongo_collection):
    '''
    Changes status of retrieval job matching args['obj_id'] and args['job_id']
      from 'processing' to 'completed'.
    Takes:
      args (dict) with obj_id (str), job_id (str);
      user_dict (dict) not used;
      mongo_collection: MongoDB database.collection
    Returns:
      job_id (str)

    Meta-data updated from:
    "current_user": {
      "fname": "Research",
      "lname": "IT",
      "username": "******",
      "email": "*****@*****.**"
    },
    "retrievals": [{
      "job_id": "8649.ctarchive.jax.org",
      "retrieval_status": "processing",
      "when_ready_for_pbs": "2020-01-02 07:34:38 EDT-0400",
      "when_retrieval_queued": "2020-01-02 07:34:39 EDT-0400",
      "when_retrieval_started": "2020-01-02 07:36:25 EDT-0400",
      "when_retrieval_completed": null
    }]

    to:
    -"current_user": ...,
    "retrievals": [{
      +"username": "******",
      "job_id": "8649.ctarchive.jax.org",
      *"retrieval_status": "completed",
      "when_ready_for_pbs": "2020-01-02 07:34:38 EDT-0400",
      "when_retrieval_queued": "2020-01-02 07:34:39 EDT-0400",
      "when_retrieval_started": "2020-01-02 07:36:25 EDT-0400",
      *"when_retrieval_completed": "2020-01-02 13:42:53 EDT-0400",
    }]
    '''

    expected_status = 'processing'

    obj_id, job_id = get_args_objid_jobid(args)

    condition = {'_id': obj_id}
    cursor = mongo_collection.find(condition)
    if cursor.count() != 1:
        raise Exception(util.gen_msg(f"{count} records match {condition}.\n"))

    idx = get_retrievals_idx(job_id, expected_status, cursor[0])
    prefix = 'retrievals.' + str(idx)

    username = get_current_username(cursor[0])

    result = mongo_collection.update_one({'_id': obj_id}, {
        '$set': {
            f'{prefix}.username': username,
            f'{prefix}.retrieval_status': 'completed',
            f'{prefix}.when_retrieval_completed': util.get_timestamp()
        },
        '$unset': {
            'current_user': ''
        }
    })

    if not result.acknowledged:
        raise Exception(
            util.gen_msg(
                f"MongoDB update on _id '{obj_id}' not acknowledged."))

    return job_id