def run_uploader(args): ''' Start the uploader process. This process will run indefinitely, polling the Conductor cloud app for files that need to be uploaded. ''' # convert the Namespace object to a dictionary args_dict = vars(args) # Set up logging log_level_name = args_dict.get("log_level") or CONFIG.get("log_level") log_level = loggeria.LEVEL_MAP.get(log_level_name) log_dirpath = args_dict.get("log_dir") or CONFIG.get("log_dir") set_logging(log_level, log_dirpath) logger.debug('Uploader parsed_args is %s', args_dict) resolved_args = resolve_args(args_dict) uploader = Uploader(resolved_args) uploader.main()
def read_conductor_credentials(use_api_key=False): ''' Read the conductor credentials file, if it exists. This will contain a bearer token from either the user or the API key (if that's desired). If the credentials file doesn't exist, try and fetch a new one in the API key scenario or prompt the user to log in. Args: use_api_key: Whether or not to use the API key Returns: A Bearer token in the event of a success or None if things couldn't get figured out ''' logger.debug("Reading conductor credentials...") if use_api_key and not CONFIG.get('api_key') or not CONFIG.get('api_key', {}).get('client_id') \ or not CONFIG.get('api_key', {}).get('private_key'): use_api_key = False logger.debug("use_api_key = %s" % use_api_key) creds_file = get_creds_path(use_api_key) logger.debug("Creds file is %s" % creds_file) logger.debug("Auth url is %s" % CONFIG.get('auth_url')) if not os.path.exists(creds_file): if use_api_key: if not CONFIG.get('api_key'): logger.debug( "Attempted to use API key, but no api key in in config!") return None # Exchange the API key for a bearer token logger.debug("Attempting to get API key bearer token") get_api_key_bearer_token(creds_file) else: auth.run(creds_file, CONFIG.get('auth_url')) if not os.path.exists(creds_file): return None logger.debug("Reading credentials file...") with open(creds_file) as fp: file_contents = json.loads(fp.read()) expiration = file_contents.get('expiration') if not expiration or expiration < int(time.time()): logger.debug("Credentials expired!") if use_api_key: logger.debug("Refreshing API key bearer token!") get_api_key_bearer_token(creds_file) else: logger.debug("Sending to auth page...") auth.run(creds_file, CONFIG.get('auth_url')) # Re-read the creds file, since it has been re-upped with open(creds_file) as fp: file_contents = json.loads(fp.read()) return file_contents['access_token']
def read_conductor_credentials(use_api_key=False): ''' Read the conductor credentials file, if it exists. This will contain a bearer token from either the user or the API key (if that's desired). If the credentials file doesn't exist, try and fetch a new one in the API key scenario or prompt the user to log in. Args: use_api_key: Whether or not to use the API key Returns: A Bearer token in the event of a success or None if things couldn't get figured out ''' logger.debug("Reading conductor credentials...") if use_api_key and not CONFIG.get('api_key') or not CONFIG.get('api_key', {}).get('client_id') \ or not CONFIG.get('api_key', {}).get('private_key'): use_api_key = False logger.debug("use_api_key = %s" % use_api_key) creds_file = get_creds_path(use_api_key) logger.debug("Creds file is %s" % creds_file) logger.debug("Auth url is %s" % CONFIG.get('auth_url')) if not os.path.exists(creds_file): if use_api_key: if not CONFIG.get('api_key'): logger.debug("Attempted to use API key, but no api key in in config!") return None # Exchange the API key for a bearer token logger.debug("Attempting to get API key bearer token") get_api_key_bearer_token(creds_file) else: auth.run(creds_file, CONFIG.get('auth_url')) if not os.path.exists(creds_file): return None logger.debug("Reading credentials file...") with open(creds_file) as fp: file_contents = json.loads(fp.read()) expiration = file_contents.get('expiration') if not expiration or expiration < int(time.time()): logger.debug("Credentials expired!") if use_api_key: logger.debug("Refreshing API key bearer token!") get_api_key_bearer_token(creds_file) else: logger.debug("Sending to auth page...") auth.run(creds_file, CONFIG.get('auth_url')) # Re-read the creds file, since it has been re-upped with open(creds_file) as fp: file_contents = json.loads(fp.read()) return file_contents['access_token']
def run_submit(args): # convert the Namespace object to a dictionary args_dict = vars(args) # Set up logging log_level_name = args_dict.get("log_level") or CONFIG.get("log_level") log_level = loggeria.LEVEL_MAP.get(log_level_name) log_dirpath = args_dict.get("log_dir") or CONFIG.get("log_dir") set_logging(log_level, log_dirpath) logger.debug('parsed_args is %s', args_dict) submitter = Submit(args_dict) response, response_code = submitter.main() logger.debug("Response Code: %s", response_code) logger.debug("Response: %s", response) if response_code in [201, 204]: logger.info("Submission Complete") else: logger.error("Submission Failure. Response code: %s", response_code) sys.exit(1)
def main(self): ''' Submitting a job happens in a few stages: 1. Gather depedencies and parameters for the job 2. Upload dependencies to cloud storage (requires md5s of dependencies) 3. Submit job to conductor (listing the dependencies and their corresponding md5s) In order to give flexibility to customers (because a customer may consist of a single user or a large team of users), there are two options avaiable that can dicate how these job submission stages are executed. In a simple single-user case, a user's machine can doe all three of the job submission stages. Simple. We call this local_upload=True. However, when there are multiple users, there may be a desire to funnel the "heavier" job submission duties (such as md5 checking and dependency uploading) onto one dedicated machine (so as not to bog down the artist's machine). This is called local_upload=False. This results in stage 1 being performed on the artist's machine (dependency gathering), while handing over stage 2 and 3 to an uploader daemon. This is achieved by the artist submitting a "partial" job, which only lists the dependencies that it requires (omitting the md5 hashes). In turn, the uploader daemon listens for these partial jobs, and acts upon them, by reading each job's listed dependencies (the filepaths that were recorded during the "partial" job submission). The uploader then md5 checks each dependency file from it's local disk, then uploads the file to cloud storage (if necesseary), and finally "completes" the partial job submission by providing the full mapping dictionary of each dependency filepath and it's corresponing md5 hash. Once the job submission is completed, conductor can start acting on the job (executing tasks, etc). ''' # Get the list of file dependencies upload_files = self.get_upload_files() upload_size = 0 # Create a dictionary of upload_files with None as the values. upload_file_info = [] # for path in upload_files: # upload_file_info[path] = {"md5": None, # "source": path, # "destination": path} upload_files = dict([(path, None) for path in upload_files]) logger.debug("Upload files is %s" % upload_files) # If opting to upload locally (i.e. from this machine) then run the uploader now # This will do all of the md5 hashing and uploading files to the conductor (if necesary). if self.local_upload: api_client.read_conductor_credentials(use_api_key=True) uploader_args = { "location": self.location, "database_filepath": self.database_filepath, "thread_count": CONFIG.get("thread_count"), "md5_caching": self.md5_caching } uploader_ = uploader.Uploader(uploader_args) upload_error_message = uploader_.handle_upload_response( self.project, upload_files) if upload_error_message: raise Exception("Could not upload files:\n%s" % upload_error_message) # Get the resulting dictionary of the file's and their corresponding md5 hashes upload_md5s = uploader_.return_md5s() for path, md5 in upload_md5s.iteritems(): upload_files[path] = md5 # If the NOT uploading locally (i.e. offloading the work to the uploader daemon else: # update the upload_files dictionary with md5s that should be enforced # this will override the None values with actual md5 hashes for filepath, md5 in self.enforced_md5s.iteritems(): logger.debug("filepath is %s" % filepath) processed_filepaths = file_utils.process_upload_filepath( filepath) assert len( processed_filepaths ) == 1, "Did not get exactly one filepath: %s" % processed_filepaths upload_files[processed_filepaths[0]] = md5 for upload_file in upload_files: logger.debug("doing stat of %s" % upload_file) filestat = os.stat(upload_file) upload_file_dict = { "md5": upload_files[upload_file], "destination": upload_file, "st_mode": filestat.st_mode, "st_ino": filestat.st_ino, "st_dev": filestat.st_dev, "st_nlink": filestat.st_nlink, "st_uid": filestat.st_uid, "st_gid": filestat.st_gid, "st_size": filestat.st_size, "st_atime": filestat.st_atime, "st_mtime": filestat.st_mtime, "st_ctime": filestat.st_ctime } upload_file_info.append(upload_file_dict) upload_size += filestat.st_size # Submit the job to conductor. upload_files may have md5s included in dictionary or may not. # Any md5s that are incuded, are expected to be checked against if/when the uploader # daemon goes to upload them. If they do not match what is on disk, the uploader will fail the job response, response_code = self.send_job(upload_file_info, upload_size) return json.loads(response), response_code
def make_url(path): ''' ''' url_base = CONFIG.get("api_url") url = "%s/api/v1/fileio/%s" % (url_base, path) return url