Ejemplo n.º 1
0
def FAILED_completed(backend_object, config):
    """Do the tasks for a FAILED request:
       unlock the original files
    """
    storage_id = StorageQuota.get_storage_index(backend_object.get_id())
    # these occur during a PUT or MIGRATE request
    fail_reqs = MigrationRequest.objects.filter(
       	(Q(request_type=MigrationRequest.PUT)
        | Q(request_type=MigrationRequest.MIGRATE))
        & Q(stage=MigrationRequest.FAILED)
        & Q(migration__storage__storage=storage_id)
	)
    for fr in fail_reqs:
        try:
            # we want to use the locked FAILED requests to restore the original
            # permissions on the files
            if fr.locked:
                unlock_original_files(backend_object, fr, config)
                fr.unlock()
                # transition to FAILED_COMPLETED
                fr.stage = MigrationRequest.FAILED_COMPLETED
                fr.save()

                # log
                logging.info((
                    "Transition: request ID: {} external ID: {}: FAILED->FAILED_COMPLETED"
                ).format(fr.pk, fr.migration.external_id))
        except Exception as e:
            logging.error("FAILED: error in FAILED_completed {}".format(str(e)))

    # look for FAILED_COMPLETED requests
    storage_id = StorageQuota.get_storage_index(backend_object.get_id())
    fail_cmpl_reqs = MigrationRequest.objects.filter(
        Q(migration__storage__storage=storage_id)
        & Q(stage=MigrationRequest.FAILED_COMPLETED)
    )
    now = datetime.datetime.utcnow()
    num_days = datetime.timedelta(days=config["COMPLETED_REQUEST_DAYS"])
    #
    for fr in fail_cmpl_reqs:
        if not fr:
            continue
        try:
            # remove the request if the requisite time has elapsed
            if (now - fr.date).days > num_days.days:
                logging.info("FAILED_COMPLETED: deleting request {}".format(fr.pk))
                fr.delete()
            else:
                fr.unlock()

        except Exception as e:
            logging.error("FAILED_COMPLETED: error in FAILED_completed {}".format(str(e)))
def get_completed_deletes(backend_object):
    """Get all the completed deletes for the ObjectStore"""
    # avoiding a circular dependency
    from jdma_control.models import MigrationRequest, Migration, StorageQuota
    # get the storage id
    storage_id = StorageQuota.get_storage_index("objectstore")
    # get the decrypt key
    key = AES_tools.AES_read_key(settings.ENCRYPT_KEY_FILE)

    # list of completed DELETEs to return
    completed_DELETEs = []
    # now loop over the PUT requests
    del_reqs = MigrationRequest.objects.filter(
        (Q(request_type=MigrationRequest.DELETE))
        & Q(stage=MigrationRequest.DELETING)
        & Q(migration__storage__storage=storage_id))
    for dr in del_reqs:
        # decrypt the credentials
        credentials = AES_tools.AES_decrypt_dict(key, dr.credentials)
        try:
            # create a connection to the object store
            s3c = boto3.client(
                "s3",
                endpoint_url=backend_object.OS_Settings["S3_ENDPOINT"],
                aws_access_key_id=credentials['access_key'],
                aws_secret_access_key=credentials['secret_key'])
            # if the bucket has been deleted then the deletion has completed
            buckets = s3c.list_buckets()
            if ('Buckets' not in buckets
                    or dr.migration.external_id not in buckets['Buckets']):
                completed_DELETEs.append(dr.migration.external_id)
        except Exception as e:
            raise Exception(e)
    return completed_DELETEs
Ejemplo n.º 3
0
def lock_put_migrations(backend_object, config):
    """Lock the directories that are going to be put to external storage.
       Also build the MigrationFiles entries from walking the directories
       This is to ensure that the user doesn't write any more data to them
       while the external storage write is ongoing.
    """
    # get the storage id for the backend
    storage_id = StorageQuota.get_storage_index(backend_object.get_id())
    # get the list of PUT requests
    pr = MigrationRequest.objects.filter(
        (Q(request_type=MigrationRequest.PUT)
        | Q(request_type=MigrationRequest.MIGRATE))
        & Q(locked=False)
        & Q(stage=MigrationRequest.PUT_START)
        & Q(migration__storage__storage=storage_id)
    ).first()

    # .first() returns None when no requests that match the filter are found
    if not pr:
        return
    # lock the Migration to prevent other processes acting upon it
    if not pr.lock():
        return
    # carry out the lock migration
    try:
        lock_put_migration(pr, config)
        pr.unlock()
    except Exception as e:
        pr.unlock()
        mark_migration_failed(pr, str(e), e, True)
Ejemplo n.º 4
0
def monitor_put(completed_PUTs, backend_object):
    """Monitor the PUTs and MIGRATES and transition from PUTTING to
    VERIFY_PENDING (or FAILED)"""
    storage_id = StorageQuota.get_storage_index(backend_object.get_id())
    # now loop over the PUT requests
    pr_objs = MigrationRequest.objects.filter(
        (Q(request_type=MigrationRequest.PUT)
        | Q(request_type=MigrationRequest.MIGRATE))
#        & Q(locked=False)
        & Q(stage=MigrationRequest.PUTTING)
        & Q(migration__stage=Migration.PUTTING)
        & Q(migration__storage__storage=storage_id)
    )

    for pr in pr_objs:
        # This is the standard locking code.  See functions in "jdma_lock" for full
        # details
        if not pr:
            return
        # if not pr.lock():
        #     return
        pr.lock()
        ###

        # check whether it's in the completed_PUTs
        if pr.migration.external_id in completed_PUTs:
            # if it is then migrate to VERIFY_PENDING
            pr.stage = MigrationRequest.VERIFY_PENDING
            # reset the last_archive - needed for verify_get
            pr.last_archive = 0
            pr.save()
            logging.info((
                "Transition: request ID: {} external ID {} PUTTING->VERIFY_PENDING"
            ).format(pr.pk, pr.migration.external_id))
        pr.unlock()
Ejemplo n.º 5
0
def monitor_get(completed_GETs, backend_object):
    """Monitor the GETs and transition from GETTING to ON_DISK (or FAILED)"""
    storage_id = StorageQuota.get_storage_index(backend_object.get_id())

    gr_objs = MigrationRequest.objects.filter(
        Q(request_type=MigrationRequest.GET)
#        & Q(locked=False)
        & Q(stage=MigrationRequest.GETTING)
        & Q(migration__storage__storage=storage_id)
    )

    for gr in gr_objs:
        # This is the standard locking code.  See functions in "jdma_lock" for full
        # details
        if not gr:
            return
        # if not gr.lock():
        #     return
        gr.lock()
        ###

        if gr.transfer_id in completed_GETs:
            # There may be multiple completed_GETs with external_id as Migrations
            # can be downloaded by multiple MigrationRequests
            # The only way to check is to make sure all the files in the
            # original migration are present in the target_dir
            gr.stage = MigrationRequest.GET_UNPACKING
            # reset the last archive counter
            gr.last_archive = 0
            gr.save()
            logging.info((
                "Transition: request ID: {} GETTING->GET_UNPACKING"
            ).format(gr.pk))
        gr.unlock()
Ejemplo n.º 6
0
def monitor_delete(completed_DELETEs, backend_object):
    """Monitor the DELETEs and transition from DELETING to DELETE_TIDY"""
    storage_id = StorageQuota.get_storage_index(backend_object.get_id())
    dr_objs = MigrationRequest.objects.filter(
        Q(request_type=MigrationRequest.DELETE)
        # & Q(locked=False)
        & Q(stage=MigrationRequest.DELETING)
        & Q(migration__storage__storage=storage_id)
    )

    for dr in dr_objs:
        # This is the standard locking code.  See functions in "jdma_lock" for full
        # details
        if not dr:
            return
        # if not dr.lock():
        #     return
        dr.lock()
        ###

        if dr.migration.external_id in completed_DELETEs:
            dr.stage = MigrationRequest.DELETE_TIDY
            logging.info((
                "Transition: request ID: {} external ID: {} DELETING->DELETE_TIDY"
            ).format(dr.pk, dr.migration.external_id))
            # reset the last archive counter
            dr.last_archive = 0
            dr.save()
        dr.unlock()
Ejemplo n.º 7
0
def monitor_verify(completed_GETs, backend_object):
    """Monitor the VERIFYs and transition from VERIFY_GETTING to VERIFYING"""
    storage_id = StorageQuota.get_storage_index(backend_object.get_id())

    vr_objs = MigrationRequest.objects.filter(
        (Q(request_type=MigrationRequest.PUT)
        | Q(request_type=MigrationRequest.MIGRATE))
#        & Q(locked=False)
        & Q(stage=MigrationRequest.VERIFY_GETTING)
        & Q(migration__storage__storage=storage_id)
    )
    for vr in vr_objs:
        # This is the standard locking code.  See functions in "jdma_lock" for full
        # details
        if not vr:
            return
        # if not vr.lock():
        #     return
        vr.lock()
        ###

        if vr.transfer_id in completed_GETs:
            vr.stage = MigrationRequest.VERIFYING
            logging.info((
                "Transition: request ID: {} external ID: {} VERIFY_GETTING->VERIFYING"
            ).format(vr.pk, vr.transfer_id))
            # reset the last archive counter
            vr.last_archive = 0
            vr.save()
        vr.unlock()
Ejemplo n.º 8
0
def verify(backend_object, credentials, pr):
    """Start the verification process.  Transition from
    VERIFY_PENDING->VERIFY_GETTING and create the target directory.
    Download the batch from the backend storage to the target directory
    """
    try:
        # open a connection to the backend.  Creating the connection can account
        # for a significant portion of the run time.  So we only do it once!
        global connection_pool
        conn = connection_pool.find_or_create_connection(
            backend_object,
            mig_req=pr,
            credentials=credentials,
            mode="download",
            uid="VERIFY")
        # Transition
        pr.stage = MigrationRequest.VERIFY_GETTING
        pr.save()
        logging.info((
            "Transition: request ID: {} external ID: {} VERIFY_PENDING->VERIFY_GETTING"
        ).format(pr.pk, pr.migration.external_id))

        # get the name of the verification directory
        target_dir = get_verify_dir(backend_object, pr)
        # create the target directory if it doesn't exist
        os.makedirs(target_dir, exist_ok=True)

        # for verify, we want to get the whole batch
        # get the archive set
        archive_set = pr.migration.migrationarchive_set.order_by('pk')

        # add all the files in the archive to a file_list for downloading
        file_list = []
        for archive in archive_set:
            # add files in this archive to those already added
            if archive.packed:
                archive_files = [archive.get_archive_name()]
            else:
                archive_files = archive.get_file_names()['FILE']
            file_list.extend(archive_files)

        logging.debug(("Downloading files to verify: {} from {}").format(
            file_list, backend_object.get_name()))

        backend_object.download_files(conn,
                                      pr,
                                      file_list=file_list,
                                      target_dir=target_dir)
        connection_pool.close_connection(backend_object,
                                         pr,
                                         credentials,
                                         mode="download",
                                         uid="VERIFY")
    except Exception as e:
        storage_name = StorageQuota.get_storage_name(
            pr.migration.storage.storage)
        error_string = ("Failed to download for verify the migration: {} "
                        "on external storage: {}. Exception: {}").format(
                            pr.migration.pk, storage_name, str(e))
        raise Exception(error_string)
Ejemplo n.º 9
0
def GET_completed(backend_object, config):
    """Do the tasks for a completed GET request:
       send a notification email
       delete the request
    """
    storage_id = StorageQuota.get_storage_index(backend_object.get_id())
    gr = MigrationRequest.objects.filter(
        Q(request_type=MigrationRequest.GET)
        & Q(locked=False)
        & Q(migration__storage__storage=storage_id)
        & Q(stage=MigrationRequest.GET_COMPLETED)
    ).first()
    now = datetime.datetime.utcnow()
    num_days = datetime.timedelta(days=config["COMPLETED_REQUEST_DAYS"])
    if not gr:
        return
    if not gr.lock():
        return

    try:
        # remove the request if the requisite time has elapsed
        if (now - gr.date).days > num_days.days:
            logging.info("GET: deleting GET request {}".format(gr.pk))
            gr.delete()
        else:
            gr.unlock()

    except Exception as e:
        logging.error("GET: error in GET_completed {}".format(str(e)))
Ejemplo n.º 10
0
def GET_tidy(backend_object, config):
    """Do the clean up tasks for a completed GET request"""
    storage_id = StorageQuota.get_storage_index(backend_object.get_id())
    # these occur during a PUT or MIGRATE request
    gr = MigrationRequest.objects.filter(
        Q(request_type=MigrationRequest.GET)
        & Q(locked=False)
        & Q(migration__storage__storage=storage_id)
        & Q(stage=MigrationRequest.GET_TIDY)
    ).first()
    if not gr:
        return
    if not gr.lock():
        return
    try:
        # remove the temporary archive files (tarfiles)
        remove_archive_files(backend_object, gr)
        # update the request to GET_COMPLETED
        gr.stage = MigrationRequest.GET_COMPLETED
        logging.info((
            "Transition: request ID: {} external ID: {}: GET_TIDY->GET_COMPLETED"
        ).format(gr.pk, gr.migration.external_id))
        gr.migration.save()
        gr.save()
        # send a notification email that the gets have completed
        send_get_notification_email(backend_object, gr)
    except Exception as e:
        logging.error("GET: error in GET_tidy {}".format(str(e)))

    gr.unlock()
def get_completed_puts(backend_object):
    """Get all the completed puts for the ObjectStore"""
    # avoiding a circular dependency
    from jdma_control.models import MigrationRequest, Migration, StorageQuota
    # get the storage id
    storage_id = StorageQuota.get_storage_index("objectstore")
    # get the decrypt key
    key = AES_tools.AES_read_key(settings.ENCRYPT_KEY_FILE)

    # list of completed PUTs to return
    completed_PUTs = []
    # now loop over the PUT requests
    put_reqs = MigrationRequest.objects.filter(
        (Q(request_type=MigrationRequest.PUT)
         | Q(request_type=MigrationRequest.MIGRATE))
        & Q(stage=MigrationRequest.PUTTING)
        & Q(migration__stage=Migration.PUTTING)
        & Q(migration__storage__storage=storage_id))
    for pr in put_reqs:
        # decrypt the credentials
        credentials = AES_tools.AES_decrypt_dict(key, pr.credentials)
        try:
            # create a connection to the object store
            s3c = boto3.client(
                "s3",
                endpoint_url=backend_object.OS_Settings["S3_ENDPOINT"],
                aws_access_key_id=credentials['access_key'],
                aws_secret_access_key=credentials['secret_key'])
            # loop over each archive in the migration
            archive_set = pr.migration.migrationarchive_set.order_by('pk')
            # counter for number of uploaded archives
            n_up_arch = 0
            for archive in archive_set:
                # get the list of files for this archive
                if archive.packed:
                    file_list = [archive.get_archive_name()]
                else:
                    file_list = archive.get_file_names()['FILE']
                n_files = 0
                for file_path in file_list:
                    # object name is the file_path, without any prefix
                    try:
                        if s3c.head_object(Bucket=pr.migration.external_id,
                                           Key=file_path):
                            n_files += 1
                    except:
                        pass
                # check if all files uploaded and then inc archive
                if n_files == len(file_list):
                    n_up_arch += 1
            if n_up_arch == pr.migration.migrationarchive_set.count():
                completed_PUTs.append(pr.migration.external_id)

        except Exception as e:
            raise Exception(e)

    return completed_PUTs
Ejemplo n.º 12
0
def get_completed_puts(backend_object):
    """Get all the completed puts for the FTP backend"""
    # avoiding a circular dependency
    from jdma_control.models import MigrationRequest, Migration, StorageQuota
    # get the storage id
    storage_id = StorageQuota.get_storage_index("ftp")
    # get the decrypt key
    key = AES_tools.AES_read_key(settings.ENCRYPT_KEY_FILE)

    # list of completed PUTs to return
    completed_PUTs = []
    # now loop over the PUT requests
    put_reqs = MigrationRequest.objects.filter(
        (Q(request_type=MigrationRequest.PUT)
         | Q(request_type=MigrationRequest.MIGRATE))
        & Q(stage=MigrationRequest.PUTTING)
        & Q(migration__stage=Migration.PUTTING)
        & Q(migration__storage__storage=storage_id))
    for pr in put_reqs:
        # decrypt the credentials
        credentials = AES_tools.AES_decrypt_dict(key, pr.credentials)
        try:
            ftp = ftplib.FTP(host=backend_object.FTP_Settings["FTP_ENDPOINT"],
                             user=credentials['username'],
                             passwd=credentials['password'])
            # loop over each archive in the migration
            archive_set = pr.migration.migrationarchive_set.order_by('pk')
            # counter for number of uploaded archives
            n_up_arch = 0
            for archive in archive_set:
                # get the list of files for this archive
                file_list = archive.get_file_names()['FILE']
                n_files = 0
                for file_path in file_list['FILE']:
                    # object name is the file_path, without the gws prefix
                    object_name = (pr.migration.external_id + "/" + file_path)
                    # enforce switch to binary (images here, but that doesn't
                    # matter)
                    ftp.voidcmd('TYPE I')
                    try:
                        fsize = ftp.size(object_name)
                        if fsize is not None:
                            n_files += 1
                    except:
                        pass
                # check if all files uploaded and then inc archive
                if n_files == len(file_list):
                    n_up_arch += 1

            if n_up_arch == pr.migration.migrationarchive_set.count():
                completed_PUTs.append(pr.migration.external_id)

            ftp.quit()
        except Exception as e:
            raise Exception(e)

    return completed_PUTs
Ejemplo n.º 13
0
def get_completed_puts(backend_object):
    """Get all the completed puts for the Elastic Tape"""
    # avoiding a circular dependency
    from jdma_control.models import MigrationRequest, Migration, StorageQuota
    # get the storage id
    storage_id = StorageQuota.get_storage_index("elastictape")
    # list of completed PUTs to return
    completed_PUTs = []
    ET_Settings = backend_object.ET_Settings

    # now loop over the PUT requests
    put_reqs = MigrationRequest.objects.filter(
        (Q(request_type=MigrationRequest.PUT)
         | Q(request_type=MigrationRequest.MIGRATE))
        & Q(stage=MigrationRequest.PUTTING)
        & Q(migration__stage=Migration.PUTTING)
        & Q(migration__storage__storage=storage_id))
    for pr in put_reqs:
        if pr.migration.external_id is None:
            continue
        # form the url and get the response, parse the document using bs4
        holdings_url = "{}?batch={}".format(
            ET_Settings["ET_INPUT_BATCH_SUMMARY_URL"],
            pr.migration.external_id)
        sleep(0.1)  # 100 ms delay to avoid overloading the server
        r = requests.get(holdings_url)
        if r.status_code == 200:
            bs = BeautifulSoup(r.content, "xml")
        else:
            # log error rather than raising exception
            logging.error("Error in ET monitor:{} is unreachable".format(
                str(holdings_url)))
            continue

        # get the 2nd table - 1st is just a heading table
        table = bs.find_all("table")[1]
        if len(table) == 0:
            continue

        # get the first row
        rows = table.find_all("tr")
        if len(rows) < 2:
            continue
        row_1 = table.find_all("tr")[1]

        # the status is the first column
        cols = row_1.find_all("td")
        if len(cols) < 3:
            continue
        transfer_id = cols[0].get_text()
        status = cols[0].get_text()
        # check for completion
        if status in ["SYNCED", "TAPED"]:
            completed_PUTs.append(pr.migration.external_id)

    return completed_PUTs
Ejemplo n.º 14
0
def create_user_gws_quotas(data):
    # Create the User, GroupWorkspace and StorageQuota from each line of the
    # data
    storageid = StorageQuota.get_storage_index("objectstore")
    for line in data:
        if len(line) == 4:
            # create user entry
            new_gws = create_user_entry(line)
            # create the new storage quota
            create_quota_entry(storageid, new_gws, 32 * 10**12, 0)
Ejemplo n.º 15
0
def DELETE_completed(backend_object, config):
    """Do the tasks for a completed DELETE request:
       send a notification email
       delete the request
       delete the migration
       delete any associated requests (PUT, MIGRATE or GETs)
    """
    storage_id = StorageQuota.get_storage_index(backend_object.get_id())
    # these occur during a PUT or MIGRATE request
    dr = MigrationRequest.objects.filter(
        Q(request_type=MigrationRequest.DELETE)
        & Q(locked=False)
        & Q(migration__storage__storage=storage_id)
        & Q(stage=MigrationRequest.DELETE_COMPLETED)
    ).first()
    now = datetime.datetime.utcnow()
    num_days = datetime.timedelta(days=config["COMPLETED_REQUEST_DAYS"])
    if not dr:
        return
    if not dr.lock():
        return
    try:
        # remove the request if the requisite time has elapsed
        if (now - dr.date).days > num_days.days:
            # get the associated PUT or MIGRATE requests - there should only
            # be one
            other_reqs = MigrationRequest.objects.filter(
                (Q(request_type=MigrationRequest.PUT)
                | Q(request_type=MigrationRequest.MIGRATE)
                | Q(request_type=MigrationRequest.GET))
                & Q(migration=dr.migration)
                & Q(migration__storage__storage=storage_id)
            )

            for otr in other_reqs:
                logging.info((
                    "DELETE: deleting request {} associated with DELETE request {}."
                ).format(otr.pk, dr.pk))
                otr.delete()

            logging.info("DELETE: deleting DELETE request {}".format(dr.pk))
            dr.delete()
            # delete the migration
            logging.info((
                "DELETE: deleting migration {} associated with DELETE request {}."
            ).format(dr.migration.pk, dr.pk))
            dr.migration.delete()
            # we are done!
        else:
            dr.unlock()
    except Exception as e:
        logging.error("DELETE: error in DELETE_completed {}".format(str(e)))
Ejemplo n.º 16
0
 def user_has_put_quota(self, conn):
     """Check the remaining quota for the user in the workspace.
     We just check the database here, i.e. check that we are not over
     quota.
     When jdma_lock calculates the file sizes we can check the quota again
     and flag the transfer as FAILED if it goes over the quota.
     """
     from jdma_control.models import StorageQuota
     # get the storage id
     storage_id = StorageQuota.get_storage_index("objectstore")
     storage_quota = StorageQuota.objects.filter(
         storage=storage_id, workspace__workspace=conn.jdma_workspace)[0]
     return storage_quota.quota_used < storage_quota.quota_size
Ejemplo n.º 17
0
def get_transfers(backend_object, key):
    """Work through the state machine to download batches from the external
    storage"""
    # get the storage id for the backend object
    storage_id = StorageQuota.get_storage_index(backend_object.get_id())

    # get the GET requests which are queued (GET_PENDING) for this backend
    gr = MigrationRequest.objects.filter(
        Q(request_type=MigrationRequest.GET)
        & Q(locked=False)
        & Q(migration__storage__storage=storage_id)
        #& ~Q(user__name="n1280run")    # NRM 21/04/2022 - blocking this user name for now until disk cleared
        & Q(stage__in=[
            MigrationRequest.GET_PENDING,
            MigrationRequest.GETTING,
            MigrationRequest.GET_RESTORE,
        ])).first()

    # .first() returns None when no requests that match the filter are found
    if not gr:
        return
    # lock the Migration to prevent other processes acting upon it
    if not gr.lock():
        return
    # determine the credentials for the user - decrypt if necessary
    if gr.credentials != {}:
        credentials = AES_tools.AES_decrypt_dict(key, gr.credentials)
    else:
        credentials = {}

    if gr.stage == MigrationRequest.GET_PENDING:
        # we might have to do something here, like create a download batch
        # for elastic tape.  Also create the directory and transition the
        # state
        try:
            download(backend_object, credentials, gr)
        except Exception as e:
            # Something went wrong, set FAILED and failure_reason
            mark_migration_failed(gr, str(e), e, upload_mig=False)

    elif gr.stage == MigrationRequest.GETTING:
        pass

    elif gr.stage == MigrationRequest.GET_RESTORE:
        # restore the file permissions
        try:
            restore_owner_and_group_on_get(backend_object, gr)
        except Exception as e:
            mark_migration_failed(gr, str(e), e, upload_mig=False)
    gr.unlock()
Ejemplo n.º 18
0
def put_transfers(backend_object, key):
    """Work through the state machine to upload batches to the external
    storage"""
    # get the storage id for the backend object
    storage_id = StorageQuota.get_storage_index(backend_object.get_id())
    # Get the first non-locked PUT request for this backend.
    # This involves resolving two foreign keys
    pr = MigrationRequest.objects.filter(
        (Q(request_type=MigrationRequest.PUT)
         | Q(request_type=MigrationRequest.MIGRATE))
        & Q(locked=False)
        & Q(migration__storage__storage=storage_id)
        & Q(stage__in=[
            MigrationRequest.PUT_PENDING,
            MigrationRequest.VERIFY_PENDING,
        ])).first()

    # .first() returns None when no requests that match the filter are found
    if not pr:
        return
    # lock the Migration to prevent other processes acting upon it
    if not pr.lock():
        return
    # determine the credentials for the user - decrypt if necessary
    if pr.credentials != {}:
        credentials = AES_tools.AES_decrypt_dict(key, pr.credentials)
    else:
        credentials = {}

    # Check whether data is being put to external storage
    if pr.stage == MigrationRequest.PUT_PENDING:
        # create the batch on this instance, next time the script is run
        # the archives will be created as tarfiles
        try:
            upload(backend_object, credentials, pr)
        except Exception as e:
            # Something went wrong, set FAILED and failure_reason
            mark_migration_failed(pr, str(e), e)
    # check if data is now on external storage and should be pulled
    # back for verification
    elif pr.stage == MigrationRequest.VERIFY_PENDING:
        # pull back the data from the external storage
        try:
            verify(backend_object, credentials, pr)
        except Exception as e:
            # Something went wrong, set FAILED and failure_reason
            mark_migration_failed(pr, str(e), e)
    # unlock
    pr.unlock()
Ejemplo n.º 19
0
def PUT_tidy(backend_object, config):
    """Do the clean up tasks for a completed PUT or MIGRATE request"""
    storage_id = StorageQuota.get_storage_index(backend_object.get_id())
    # these occur during a PUT or MIGRATE request
    pr = MigrationRequest.objects.filter(
        (Q(request_type=MigrationRequest.PUT)
        | Q(request_type=MigrationRequest.MIGRATE))
        & Q(locked=False)
        & Q(migration__storage__storage=storage_id)
        & Q(stage=MigrationRequest.PUT_TIDY)
    ).first()

    if not pr:
        return
    # check locked
    if not pr.lock():
        return
    try:
        # remove the temporary staged archive files
        remove_archive_files(backend_object, pr)
        # remove the verification files
        remove_verification_files(backend_object, pr)
        # only remove the original files for a MIGRATE
        if pr.request_type == MigrationRequest.MIGRATE:
            remove_original_files(backend_object, pr, config)
        else:
            # otherwise unlock them (restore uids, gids and permissions)
            unlock_original_files(backend_object, pr, config)
        # set to completed and last archive to 0
        # pr will be deleted next time jdma_tidy is invoked
        pr.stage = MigrationRequest.PUT_COMPLETED
        logging.info("Transition: deleting PUT request {}".format(pr.pk))
        pr.migration.stage = Migration.ON_STORAGE
        logging.info((
            "Transition: request ID: {} external ID: {}: PUT_TIDY->PUT_COMPLETED, PUTTING->ON_STORAGE"
        ).format(pr.pk, pr.migration.external_id))
        pr.last_archive = 0
        pr.migration.save()
        pr.save()
        # send a notification email that the puts have completed
        send_put_notification_email(backend_object, pr)
        # update the amount of quota the migration has used
        update_storage_quota(backend_object, pr.migration, update="add")

    except Exception as e:
        raise Exception(e)
        logging.error("Error in PUT_tidy {}".format(str(e)))

    pr.unlock()
Ejemplo n.º 20
0
def create_user_gws_quotas(data, config):
    # Create the User, GroupWorkspace and StorageQuota from each line of the
    # data
    storageid = StorageQuota.get_storage_index("elastictape")
    for line in data:
        if len(line) == 4:
            # create the user entry using the above script
            new_gws = create_user_entry(line)
            # get the quota and quota used
            quota, quota_used = get_et_quota_used(config["ET_QUOTA_URL"],
                                                  line[0])
            # create the new storage quota and assign the workspace
            create_quota_entry(storageid, new_gws, int(line[2]), quota_used)
            # sleep for 100ms to prevent server getting overloaded
            sleep(0.1)
Ejemplo n.º 21
0
def get_completed_deletes(backend_object):
    """Get all the completed deletes for the Elastic Tape"""
    # avoiding a circular dependency
    from jdma_control.models import MigrationRequest, Migration, StorageQuota  # get the storage id
    storage_id = StorageQuota.get_storage_index("elastictape")
    ET_Settings = backend_object.ET_Settings

    # list of completed DELETEs to return
    completed_DELETEs = []
    # now loop over the PUT requests
    del_reqs = MigrationRequest.objects.filter(
        (Q(request_type=MigrationRequest.DELETE))
        & Q(stage=MigrationRequest.DELETING)
        & Q(migration__storage__storage=storage_id))
    for dr in del_reqs:
        # assume deleted
        deleted = True
        # get a list of synced batches for this workspace and user
        holdings_url = "{}?workspace={};caller={};level=batch".format(
            ET_Settings["ET_HOLDINGS_URL"], dr.migration.workspace.workspace,
            dr.migration.user.name)
        # use requests to fetch the URL
        sleep(0.1)  # 100 ms delay to avoid overloading the server
        r = requests.get(holdings_url)
        if r.status_code == 200:
            bs = BeautifulSoup(r.content, "xml")
        else:
            logging.error("Error in ET monitor:{} is unreachable".format(
                str(holdings_url)))
            continue

        # if the dr.migration.external_id is not in the list of batches
        # then the delete has completed
        batches = bs.select("batch")
        for b in batches:
            batch_id = b.find("batch_id").text.strip()
            if batch_id == dr.migration.external_id:
                deleted = False

        if deleted:
            # it's been deleted so add to the returned list of completed DELETEs
            completed_DELETEs.append(dr.migration.external_id)
    return completed_DELETEs
Ejemplo n.º 22
0
def remove_verification_files(backend_object, pr):
    """Remove those temporary files that have been created in the verification
    step"""
    storage_id = StorageQuota.get_storage_index(backend_object.get_id())
    # get the directory that the temporary files are in
    batch_id = pr.migration.external_id
    # get the temporary directory
    verify_dir = get_verify_dir(backend_object, pr)
    # remove the directory
    if os.path.isdir(verify_dir):
        try:
            shutil.rmtree(verify_dir)
            logging.debug("Deleting verify directory " + verify_dir)
        except Exception as e:
            logging.error((
                "Could not delete verify directory {} : {}"
            ).format(verify_dir, str(e)))
    else:
        logging.debug("Cannot find verify directory " + verify_dir)
Ejemplo n.º 23
0
def verify_files(backend_object, config):
    """Verify the files that have been uploaded to external storage and then
    downloaded back to a temporary directory."""
    # get the storage id for the backend object
    storage_id = StorageQuota.get_storage_index(backend_object.get_id())

    # these are part of a PUT request - get the list of PUT request
    pr = MigrationRequest.objects.filter(
        (Q(request_type=MigrationRequest.PUT)
         | Q(request_type=MigrationRequest.MIGRATE))
        & Q(locked=False)
        & Q(stage=MigrationRequest.VERIFYING)
        & Q(migration__storage__storage=storage_id)).first()
    if not pr:
        return
    # lock the Migration to prevent other processes acting upon it
    if not pr.lock():
        return
    verify(backend_object, pr, config)
    pr.unlock()
Ejemplo n.º 24
0
def lock_delete_migrations(backend_object):
    # get the storage id for the backend
    storage_id = StorageQuota.get_storage_index(backend_object.get_id())
    # get the list of GET requests
    dr = MigrationRequest.objects.filter(
        Q(request_type=MigrationRequest.DELETE)
        & Q(locked=False)
        & Q(stage=MigrationRequest.DELETE_START)
        & Q(migration__storage__storage=storage_id)
    ).first()
    if not dr:
        return
    if not dr.lock():
        return
    try:
        lock_delete_migration(backend_object, dr)
        dr.unlock()
    except Exception as e:
        dr.unlock()
        mark_migration_failed(dr, str(e), e, False)
Ejemplo n.º 25
0
    def user_has_put_quota(self, conn):
        """Check the remaining quota for the user in the workspace.
        We just check the database here, i.e. check that we are not over
        quota.
        When jdma_lock calculates the file sizes we can check the quota again
        and flag the transfer as FAILED if it goes over the quota.
        """
        from jdma_control.models import StorageQuota
        # get the storage id
        storage_id = StorageQuota.get_storage_index("elastictape")
        storage_quota = StorageQuota.objects.filter(
            storage=storage_id, workspace__workspace=conn.jdma_workspace)[0]
        jdma_quota_remaining = storage_quota.quota_size - storage_quota.quota_used

        # get the quota from the elastic tape feed
        et_quota_remaining = workspace_quota_remaining(
            conn.jdma_user,
            conn.jdma_workspace.workspace,
            self.ET_Settings,
        )

        return (jdma_quota_remaining > 0) & (et_quota_remaining > 0)
Ejemplo n.º 26
0
def get_unpacking(backend_object, config):
    """Unpack the ArchiveFiles from a TarFile to a target directory"""
    # get the storage id for the backend object
    storage_id = StorageQuota.get_storage_index(backend_object.get_id())
    # Get the GET requests for this backend which are in the PACKING stage.
    gr = MigrationRequest.objects.filter(
        Q(request_type=MigrationRequest.GET)
        & Q(migration__storage__storage=storage_id)
        & Q(stage=MigrationRequest.GET_UNPACKING)).first()
    if not gr:
        return

    try:
        if not gr.lock():
            return
        unpack_request(gr, get_download_dir(backend_object, gr), config)
        gr.unlock()
    except Exception as e:
        error_string = ("Could not unpack request for batch: {}: {}").format(
            str(gr.migration.external_id), str(e))
        logging.error(error_string)
        mark_migration_failed(gr, error_string, e, upload_mig=True)
Ejemplo n.º 27
0
def lock_delete_migration(backend_object, dr):
    # lock this migration request as well
    # find the associated PUT, MIGRATE and GET migration requests and lock
    # them
    storage_id = StorageQuota.get_storage_index(backend_object.get_id())
    other_reqs = MigrationRequest.objects.filter(
        (Q(request_type=MigrationRequest.PUT)
        | Q(request_type=MigrationRequest.MIGRATE)
        | Q(request_type=MigrationRequest.GET))
        & Q(migration=dr.migration)
        & Q(migration__storage__storage=storage_id)
    )
    # lock the associated migration(s)
    for otr in other_reqs:
        otr.lock()
    # transition to DELETE_PENDING
    dr.stage = MigrationRequest.DELETE_PENDING
    dr.save()
    logging.info("DELETE: Locked migration: {}".format(dr.migration.pk))
    logging.info((
        "Transition: request ID: {} DELETE_START->DELETE_PENDING"
    ).format(dr.pk))
Ejemplo n.º 28
0
def get_completed_deletes(backend_object):
    """Get all the completed deletes for the ObjectStore"""
    # avoiding a circular dependency
    from jdma_control.models import MigrationRequest, Migration, StorageQuota
    # get the storage id
    storage_id = StorageQuota.get_storage_index("ftp")
    # get the decrypt key
    key = AES_tools.AES_read_key(settings.ENCRYPT_KEY_FILE)

    # list of completed DELETEs to return
    completed_DELETEs = []
    # now loop over the PUT requests
    del_reqs = MigrationRequest.objects.filter(
        (Q(request_type=MigrationRequest.DELETE))
        & Q(stage=MigrationRequest.DELETING)
        & Q(migration__storage__storage=storage_id))
    for dr in del_reqs:
        # decrypt the credentials
        credentials = AES_tools.AES_decrypt_dict(key, dr.credentials)
        try:
            # create a connection to the object store
            ftp = ftplib.FTP(host=backend_object.FTP_Settings["FTP_ENDPOINT"],
                             user=credentials['username'],
                             passwd=credentials['password'])
            # if the external_id directory has been deleted then the
            # deletion has completed
            dir_list = ftp.mlsd("/")
            found = False
            for d in dir_list:
                # check if directory and groupworkspace name is in directory
                if d[1]['type'] == 'dir' and dr.migration.external_id in d[0]:
                    found = True
                    break
            if not found:
                completed_DELETEs.append(dr.migration.external_id)

        except Exception as e:
            raise Exception(e)
    return completed_DELETEs
Ejemplo n.º 29
0
def lock_get_migrations(backend_object):
    """Lock the directories that the targets for recovering data from external
    storage.  This is to ensure that there aren't any filename conflicts.
    """
    # get the storage id for the backend
    storage_id = StorageQuota.get_storage_index(backend_object.get_id())
    # get the list of GET requests
    gr = MigrationRequest.objects.filter(
        Q(request_type=MigrationRequest.GET)
        & Q(locked=False)
        & Q(stage=MigrationRequest.GET_START)
        & Q(migration__storage__storage=storage_id)
    ).first()
    if not gr:
        return
    if not gr.lock():
        return
    try:
        lock_get_migration(gr)
        gr.unlock()
    except Exception as e:
        gr.unlock()
        mark_migration_failed(gr, str(e), e, False)
Ejemplo n.º 30
0
def put_packing(backend_object, config):
    """Pack the ArchiveFiles into a TarFile in the ARCHIVE_STAGING_DIR
    for this backend"""
    # get the storage id for the backend object
    storage_id = StorageQuota.get_storage_index(backend_object.get_id())
    # Get the PUT requests for this backend which are in the PACKING stage
    pr = MigrationRequest.objects.filter(
        (Q(request_type=MigrationRequest.PUT)
         | Q(request_type=MigrationRequest.MIGRATE))
        & Q(locked=False)
        & Q(migration__storage__storage=storage_id)
        & Q(stage=MigrationRequest.PUT_PACKING)).first()
    if not pr:
        return
    try:
        if not pr.lock():
            return
        pack_request(pr, backend_object.ARCHIVE_STAGING_DIR, config)
        pr.unlock()
    except Exception as e:
        error_string = ("Could not pack archive for batch: {}: {}").format(
            pr.migration.get_id(), str(e))
        # mark the migration as failed
        mark_migration_failed(pr, error_string, e)