Esempio n. 1
0
def create_remote_file_bundle(name):
    """ Create a bundle with
     a.) an unmanaged s3 path
     b.) a managed s3 path
     c.) a managed s3 path with a directory
     """
    s3_resource = boto3.resource('s3', region_name='us-east-1')
    s3_resource.create_bucket(Bucket=TEST_BUCKET)

    # Copy a local file to moto s3 bucket
    saved_md5 = md5_file(__file__)
    aws_s3.put_s3_file(__file__, TEST_BUCKET_URL)

    s3_path_1 = os.path.join(TEST_BUCKET_URL, os.path.basename(__file__))

    with api.Bundle(TEST_CONTEXT, name=name) as b:
        s3_path_2 = b.get_remote_file('test_s3_file.txt')
        aws_s3.cp_local_to_s3_file(__file__, s3_path_2)
        s3_path_3 = os.path.join(b.get_remote_directory('vince/klartho'),
                                 'test_s3_file.txt')
        aws_s3.cp_local_to_s3_file(__file__, s3_path_3)

        b.add_data([s3_path_1, s3_path_2, s3_path_3])
        b.add_tags({'info': 'added an s3 file'})

    saved_uuid = b.uuid

    b = api.get(TEST_CONTEXT, None, uuid=saved_uuid)
    b.commit()
    md5 = md5_file(b.data[0])
    print(md5)
    print(saved_md5)
    assert md5 == saved_md5
Esempio n. 2
0
    def _write_hframe_remote(self, hfr):
        """

        Args:
            hfr:

        Returns:

        """
        local_obj_dir = os.path.join(self.get_object_dir(), hfr.pb.uuid)
        if not os.path.exists(local_obj_dir):
            raise Exception(
                "Write HFrame to remote failed because hfr {} doesn't appear to be in local context"
                .format(hfr.pb.uuid))
        to_copy_files = glob.glob(os.path.join(local_obj_dir, '*.pb'))
        for f in to_copy_files:
            aws_s3.put_s3_file(
                f, os.path.join(self.get_remote_object_dir(), hfr.pb.uuid))

        return None
Esempio n. 3
0
def test_copy_in_s3_file_with_remote(run_test):
    """ Test copying in s3 file
    The file should be copied into the remote context
    """

    s3_resource = boto3.resource('s3')
    s3_resource.create_bucket(Bucket=TEST_BUCKET)

    api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL)

    # Copy a local file to moto s3 bucket
    saved_md5 = md5_file(__file__)
    aws_s3.put_s3_file(__file__, TEST_BUCKET_URL)

    s3_file = os.path.join(TEST_BUCKET_URL, os.path.basename(__file__))

    with api.Bundle(TEST_CONTEXT, name=TEST_BUNDLE) as b:
        b.add_data(s3_file)
        b.add_tags({'info': 'added an s3 file'})
    saved_uuid = b.uuid

    b = api.get(TEST_CONTEXT, None, uuid=saved_uuid)
    assert b.data.startswith("s3://")
Esempio n. 4
0
def test_copy_in_s3_file(run_test):
    """ Test copying in s3 file
    The file should be copied into the local context
    """

    s3_resource = boto3.resource('s3', region_name="us-east-1")
    s3_resource.create_bucket(Bucket=TEST_BUCKET)

    # Copy a local file to moto s3 bucket
    saved_md5 = md5_file(__file__)
    aws_s3.put_s3_file(__file__, TEST_BUCKET_URL)

    s3_file = os.path.join(TEST_BUCKET_URL, os.path.basename(__file__))

    with api.Bundle(TEST_CONTEXT, name=TEST_BUNDLE) as b:
        b.add_data(s3_file)
        b.add_tags({'info': 'added an s3 file'})
    saved_uuid = b.uuid

    b = api.get(TEST_CONTEXT, None, uuid=saved_uuid)
    md5 = md5_file(b.data)
    print(md5)
    print(saved_md5)
    assert md5 == saved_md5
Esempio n. 5
0
    def copy_in_files(src_files, dst_dir):
        """
        Given a set of link URLs, move them to the destination.

        The link URLs will have file:///, s3://, or vertica:// scheme's

        Do not call this to copy a set of links within an existing bundle.
        # move files from one managed path to another
        hfr = <get some hyperframe>
        new_managed_path = "s3://<some keys>"
        frs = [ fr for fr hfr.get_frames() if fr.is_link_frame() ]
        fr = frs[0] # just look at one
        bundle_paths = fr.get_link_urls()
        src_paths   = convert_bundle_url_to_path(bundle_paths)
        new_paths = self.copy_in_files(src_paths, managed_path)
        fr = hyperframe.FrameRecord.make_link_frame(new_hfr_uuid, fr.pb.name, new_paths)

        Note: currently works for
        local fs : local fs dir
        local fs : s3 dir
        s3       : s3 dir
        s3       : local fs dir

        Args:
            src_files (:list:str):  A single file path or a list of paths
            dst_dir (str):

        Returns:
            file_set: set of new paths where files were copies.  either one file or a list of files

        """
        file_set = []
        return_one_file = False

        if isinstance(src_files, basestring) or isinstance(
                src_files, luigi.LocalTarget):
            return_one_file = True
            src_files = [src_files]

        dst_scheme = urlparse(dst_dir).scheme

        for src_path in src_files:
            try:
                # If this is a luigi LocalTarget and it's in a managed path
                # space, convert the target to a path name but do not
                # actually copy.
                if src_path.path.startswith(dst_dir):
                    file_set.append(urljoin('file:', src_path.path))
                    continue
                else:
                    src_path = src_path.path
            except AttributeError:
                pass

            # TODO: Do something with Vertica links.
            if urlparse(src_path).scheme == 'vertica':
                file_set.append(src_path)
                continue

            dst_file = os.path.join(dst_dir, os.path.basename(src_path))

            if dst_scheme != 's3' and dst_scheme != 'vertica':
                file_set.append(urljoin('file:', dst_file))
            else:
                file_set.append(dst_file)

            if file_set[-1] == src_path:
                # This can happen if you re-push something already pushed that's not localized
                _logger.debug(
                    "DataContext: copy_in_files found src {} == dst {}".format(
                        src_path, file_set[-1]))
                # but it can also happen if you re-bind and push.  So check that file is present!
                if urlparse(
                        src_path
                ).scheme == 's3' and not aws_s3.s3_path_exists(src_path):
                    print(
                        "DataContext: copy_in_files found s3 link {} not present!"
                        .format(src_path))
                    print(
                        "It is likely that this bundle existed on another remote branch and "
                    )
                    print("was not localized before changing remotes.")
                    raise Exception(
                        "copy_in_files: bad localized bundle push.")
                continue

            try:
                if not os.path.isdir(src_path):
                    o = urlparse(src_path)

                    if o.scheme == 's3':
                        # s3 to s3
                        if dst_scheme == 's3':
                            aws_s3.cp_s3_file(src_path, dst_dir)
                        elif dst_scheme != 'vertica':  # assume 'file'
                            aws_s3.get_s3_file(src_path, dst_file)
                        else:
                            raise Exception(
                                "copy_in_files: copy s3 to unsupported scheme {}"
                                .format(dst_scheme))

                    elif o.scheme == 'vertica':
                        _logger.debug(
                            "Skipping an vertica db file on bundle add")

                    elif o.scheme == 'file':
                        if dst_scheme == 's3':
                            # local to s3
                            aws_s3.put_s3_file(o.path, dst_dir)
                        elif dst_scheme != 'vertica':  # assume 'file'
                            # local to local
                            shutil.copy(o.path, dst_dir)
                        else:
                            raise Exception(
                                "copy_in_files: copy local file to unsupported scheme {}"
                                .format(dst_scheme))

                    else:
                        raise Exception(
                            "DataContext copy-in-file found bad scheme: {}".
                            format(o.scheme))
                else:
                    _logger.info(
                        "DataContext copy-in-file: Not adding files in directory {}"
                        .format(src_path))
            except (IOError, os.error) as why:
                _logger.error("Disdat add error: {} {} {}".format(
                    src_path, dst_dir, str(why)))

        if return_one_file:
            return file_set[0]
        else:
            return file_set