Ejemplo n.º 1
0
def initialize_new_upload(data_config, access_token, description=None, source_dir='.'):
    # TODO: hit upload server to check for liveness before moving on
    data_config.set_tarball_path(None)
    data_config.set_data_endpoint(None)
    data_config.set_resource_id(None)

    namespace = data_config.namespace or access_token.username
    data_name = "{}/{}".format(namespace, data_config.name)

    # Create tarball of the data using the ID returned from the API
    # TODO: allow to the users to change directory for the compression
    temp_dir = tempfile.mkdtemp()
    tarball_path = os.path.join(temp_dir, "floydhub_data.tar.gz")

    floyd_logger.debug("Creating tarfile with contents of current directory: %s",
                       tarball_path)
    floyd_logger.info("Compressing data...")

    # TODO: purge tarball on Ctrl-C
    create_tarfile(source_dir=source_dir, filename=tarball_path)

    # If starting a new upload fails for some reason down the line, we don't
    # want to re-tar, so save off the tarball path now
    data_config.set_tarball_path(tarball_path)
    DataConfigManager.set_config(data_config)

    # Create data object using API
    data = DataRequest(name=data_name,
                       description=description,
                       family_id=data_config.family_id,
                       data_type='gzip')
    data_info = DataClient().create(data)
    if not data_info:
        rmtree(temp_dir)
        sys.exit(1)

    data_config.set_data_id(data_info['id'])
    data_config.set_data_name(data_info['name'])
    DataConfigManager.set_config(data_config)

    # fetch auth token for upload server
    creds = DataClient().new_tus_credentials(data_info['id'])
    if not creds:
        # TODO: delete module from server?
        rmtree(temp_dir)
        sys.exit(1)

    data_resource_id = creds[0]
    data_endpoint = TusDataClient().initialize_upload(
        tarball_path,
        metadata={"filename": data_resource_id},
        auth=creds)
    if not data_endpoint:
        # TODO: delete module from server?
        floyd_logger.error("Failed to get upload URL from Floydhub!")
        rmtree(temp_dir)
        sys.exit(1)

    data_config.set_data_endpoint(data_endpoint)
    DataConfigManager.set_config(data_config)
Ejemplo n.º 2
0
    def create(self, data):
        """
        Create a temporary directory for the tar file that will be removed at the
        end of the operation.
        """
        with tempfile.TemporaryDirectory() as temp_directory:
            floyd_logger.info("Compressing data ...")
            compressed_file_path = os.path.join(temp_directory, "data.tar.gz")

            # Create tarfile
            floyd_logger.debug("Creating tarfile with contents of current directory: {}".format(compressed_file_path))
            create_tarfile(source_dir='.', filename=compressed_file_path)

            total_file_size = os.path.getsize(compressed_file_path)
            floyd_logger.info("Creating data source. Total upload size: {}".format(sizeof_fmt(total_file_size)))
            floyd_logger.info("Uploading compressed data ...")

            # Add request data
            request_data = []
            request_data.append(("data", ('data.tar', open(compressed_file_path, 'rb'), 'text/plain')))
            request_data.append(("json", json.dumps(data.to_dict())))

            multipart_encoder = MultipartEncoder(
                fields=request_data
            )

            # Attach progress bar
            progress_callback = create_progress_callback(multipart_encoder)
            multipart_encoder_monitor = MultipartEncoderMonitor(multipart_encoder, progress_callback)

            response = self.request("POST",
                                    self.url,
                                    data=multipart_encoder_monitor,
                                    headers={"Content-Type": multipart_encoder.content_type},
                                    timeout=3600)

            floyd_logger.info("Done")
            return response.json().get("id")