q_msg.change_visibility(15) q_msg = sqs_queue.read(5) continue if q_msg != None: # This message will be the status update status_classad = msg.class_ad s3_key = msg.s3_key # Add the S3 Key ID if it isn't there already. This happens # in the case where nothing was needed to be transfered to # the execute node if s3_key != None and ad_s3key == '': update_classad += 'S3KeyID = "%s"\n' % s3_key else: key = grep('^S3KeyID\s*=\s*"(.*)"$', status_classad) if s3_key == None and ad_s3key == '' and \ key != None and key[0] != None: update_classad += 'S3KeyID = "%s"\n' % key[0] # Look for EC2 job parameters for param in ['EC2HookArg', 'EC2LastFailureReason']: value = grep('^(%s.*)' % param, status_classad) if value != None and value[0] != None: update_classad += '%s\n' % value[0] # If the message notifies of a run attempt, increment the counter # on the number of runs attempts run_try = grep('^EC2JobAttempted\s*=\s*(.*)$', status_classad) if run_try != None and run_try[0] != None and \ run_try[0].strip().lower() == 'true':
def main(argv=None): if argv == None: argv = sys.argv status_classad = '' aws_key = '' aws_secret = '' queue_name = '' update_classad = '' ad_s3key = '' job_completed = 'FALSE' update_skip_attribs = ['jobstatus', 'imagesize', 'enteredcurrentstatus', 'jobstartdate'] attempts = 0 region = '' for line in sys.stdin: match = grep('^([^=]*)\s*=\s*(.*)$', line.strip()) if match != None and match[0] != None and match[1] != None: attribute = match[0].strip().lower() val_match = grep('^"(.*)"$', match[1].strip()) if val_match != None and val_match[0] != None: value = val_match[0].strip() else: value = match[1].strip() if attribute == 'amazonaccesskey': aws_key = value continue if attribute == 'amazonsecretkey': aws_secret = value continue if attribute == 'ec2jobsuccessful': job_completed = value continue if attribute == 'amazonfullsqsqueuename': queue_name = value continue if attribute == 'ec2runattempts': attempts = int(value) continue if attribute == 's3keyid': ad_s3key = value continue if attribute == 'ec2region': region = value # Get the specified Amazon key information if os.path.exists(aws_key) == False or os.path.exists(aws_secret) == False: sys.stderr.write('Error: Unable to read AWS key files') return(FAILURE) else: key_file = open(aws_key, 'r') aws_key_val = key_file.readlines()[0].strip() key_file.close() key_file = open(aws_secret, 'r') aws_secret_val = key_file.readlines()[0].strip() key_file.close() # Look for an update try: r_obj = AWSRegion.get_sqs_region(region) sqs_con = SQSConnection(aws_key_val, aws_secret_val, region=r_obj) except BotoServerError, error: sys.stderr.write('Error: Unable to connect to SQS: %s, %s\n' % (error.reason, error.body)) return(FAILURE)
def main(argv=None): if argv == None: argv = sys.argv aws_key = '' aws_secret = '' s3_bucket_obj = '' stdout = '' stderr = '' ec2_success = "false" ret_val = SUCCESS cluster = 0 proc = 0 done_classad = '' s3_key = '' remove_attrs = ['hookkeyword'] region = '' # Read the source class ad from stdin and store it as well as the # job status. The end of the source job is noted by '------' for line in sys.stdin: if line.strip() == '------': break match = grep('^([^=]*)\s+=\s+(.*)$', line.strip()) if match != None and match[0] != None and match[1] != None: attribute = match[0].strip().lower() val_match = grep('^"(.*)"$', match[1].strip()) if val_match != None and val_match[0] != None: value = val_match[0].strip() else: value = match[1].strip() if attribute == 'iwd': iwd = value continue if attribute == 'out' and value.lower() != '/dev/null': stdout = value continue if attribute == 'err' and value.lower() != '/dev/null': stderr = value continue if attribute == 'clusterid': cluster = value continue if attribute == 'procid': proc = value continue # Read the routed class ad from stdin and store the S3 information and # the job status for line in sys.stdin: match = grep('^([^=]*)\s*=\s*(.*)$', line.strip()) if match != None and match[0] != None and match[1] != None: attribute = match[0].strip().lower() val_match = grep('^"(.*)"$', match[1].strip()) if val_match != None and val_match[0] != None: value = val_match[0].strip() else: value = match[1].strip() if attribute == 's3bucketid': bucket = value continue if attribute == 's3keyid': s3_key = value continue if attribute == 'amazonaccesskey': aws_key = value continue if attribute == 'amazonsecretkey': aws_secret = value continue if attribute == 'ec2jobsuccessful': ec2_success = value continue if attribute == 'amazonfullsqsqueuename': queue_name = value continue if attribute == 'ec2region': region = value # If the source job is not in the completed state, but the routed job is # then there was a failure running the AMI. Exit with status 2 so the # job will be re-routed. if ec2_success.lower() == "false": sys.stderr.write('Job %d.%d did not complete. Forcing the job to be routed again\n' % (int(cluster), int(proc))) return(FAILURE) # Pull the specific keys out of the files if os.path.exists(aws_key) == False or os.path.exists(aws_secret) == False: sys.stderr.write('Error: Unable to read AWS key files') return(FAILURE) else: key_file = open(aws_key, 'r') aws_key_val = key_file.readlines()[0].strip() key_file.close() key_file = open(aws_secret, 'r') aws_secret_val = key_file.readlines()[0].strip() key_file.close() # Connect to S3 failed = 1 for attempt in range(1,5): try: s3_con = S3Connection(aws_key_val, aws_secret_val) s3_bucket_obj = s3_con.get_bucket(bucket) failed = 0 break except BotoServerError, error: sys.stderr.write('Error accessing S3: %s, %s\n' % (error.reason, error.body)) time.sleep(5) pass
q_msg = sqs_queue.read(5) while q_msg != None: try: msg = pickle.loads(q_msg.get_body()) except: # Likely bad message in the queue so skip it by setting the # visibility timer far enough in the future that we're unlikely # to hit it again this pass but not so far that it won't be seen # for a long time, and then move on to the next message q_msg.change_visibility(15) q_msg = sqs_queue.read(5) continue # Check the job status to see if this message notifies of # job completion job_status = grep('^JobStatus\s*=\s*(.)$', msg.class_ad) if job_status != None and job_status[0] != None and \ int(job_status[0].strip()) == 4: # We found the update that indicates the job completed. This # message is the update to the source job done_classad = msg.class_ad break else: q_msg = sqs_queue.read(5) if done_classad != '': # Remove attributes that shouldn't be updated for rm_attr in remove_attrs: for line in done_classad.split('\n'): match = grep('^([^=]*)\s*=\s*(.*)$', line.strip()) if match != None and match[0] != None and match[1] != None:
def main(argv=None): if argv == None: argv = sys.argv max_key_id = 1000000000 sqs_data = SQSEntry() grid_classad = '' iwd = '' create_sandbox = 'no' transfer_exe = 'true' skip_attribs = ['clusterid', 'procid', 'bufferblocksize', 'buffersize', 'condorplatform', 'condorversion', 'coresize', 'qdate', 'remotewallclocktime', 'servertime', 'autoclusterid', 'autoclusterattrs', 'currenthosts', 'routedtojobid', 'managed', 'managedmanager', 'periodichold', 'periodicremove', 'periodicrelease'] int_reset_attribs = ['exitstatus', 'completiondate', 'localsyscpu', 'localusercpu', 'numckpts', 'numrestarts', 'numsystemholds', 'committedtime', 'totalsuspensions', 'lastsuspensiontime','cumulativesuspensiontime'] float_reset_attribs = ['remoteusercpu', 'remotesyscpu'] transfer_attribs = ['cmd', 'command', 'in', 'transferinput'] delim = '------' aws_key = '' aws_secret = '' aws_public_key = '' aws_private_key = '' bucket_id = '' rsa_public_key = '' proc_id = '' cluster_id = '' qdate = '' delay = '' s3_key = '' route_name = '' ami = '' instance = '' resource_url = 'https://ec2.amazonaws.com/' region = '' # Parse the route information from stdin. route = grep('^\[\s*(.*)\s*\]$', sys.stdin.readline())[0] for line in route.split(';'): match = grep('^([^=]*)\s*=\s*(.*)$', line.strip()) if match != None and match[0] != None and match[1] != None: attribute = match[0].strip().lower() val_match = grep('^"(.*)"$', match[1].strip()) if val_match != None and val_match[0] != None: value = val_match[0].strip() else: value = match[1].strip() if attribute == 'name': route_name = value continue if attribute == 'set_amazonpublickey': aws_public_key = value continue if attribute == 'set_amazonprivatekey': aws_private_key = value continue if attribute == 'set_amazonaccesskey': aws_key = value continue if attribute == 'set_amazonsecretkey': aws_secret = value continue if attribute == 'set_amazons3bucketname': bucket_id = value continue if attribute == 'set_rsapublickey': rsa_public_key = value continue if attribute == 'set_amazonamishutdowndelay': delay = value continue if attribute == 'set_amazonamiid': ami = value continue if attribute == 'set_amazoninstancetype': instance = value continue if attribute == 'set_ec2region': region = value continue # Read the original class ad from stdin and store it for submission # to SQS. Additionally, convert it to an EC2 classad for output for line in sys.stdin: if line.strip() == delim: continue match = grep('^([^=]*)\s*=\s*(.*)$', line) if match != None and match[0] != None and match[1] != None: attribute = match[0].strip() lower_attr = attribute.lower() val_match = grep('^"(.*)"$', match[1].strip()) if val_match != None and val_match[0] != None: value = val_match[0].strip() else: value = match[1].strip() if lower_attr == 'iwd': # Remove the IWD from the class ad so the execute directory # will be used iwd = value if lower_attr == 'amazonpublickey': if aws_public_key == '': user_aws_public_key = value continue if lower_attr == 'amazonprivatekey': if aws_private_key == '': user_aws_private_key = value continue if lower_attr == 'amazonaccesskey': if aws_key == '': user_aws_key = value continue if lower_attr == 'amazonsecretkey': if aws_secret == '': user_aws_secret = value continue if lower_attr == 'rsapublickey': if rsa_public_key == '': user_rsa_public_key = value continue if lower_attr == 'clusterid': cluster_id = value continue if lower_attr == 'procid': proc_id = value continue if lower_attr == 'qdate': qdate = value continue if lower_attr in skip_attribs: continue sqs_data.class_ad += str(line) if lower_attr == 'globaljobid': continue if lower_attr == 'ec2region': if region == '': region = value if lower_attr == 'jobuniverse': grid_classad += 'JobUniverse = 9\n' grid_classad += 'Remote_JobUniverse = ' + str(value) + '\n' continue if lower_attr in int_reset_attribs: grid_classad += attribute + ' = 0\n' continue if lower_attr in float_reset_attribs: grid_classad += attribute + ' = 0.0\n' continue if lower_attr == 'jobstatus': grid_classad += attribute + ' = 1\n' continue if lower_attr == 'exitbysignal': grid_classad += attribute + ' = FALSE\n' continue if lower_attr == 'shouldtransferfiles': create_sandbox = value.lower() grid_classad += attribute + ' = "NO"\n' continue if lower_attr == 'transferexecutable': transfer_exe = value.lower() if lower_attr == 'cmd' or lower_attr == 'command': executable = value grid_classad += 'Cmd = "EC2: %s: %s"\n' % (route_name, value) continue grid_classad += str(line) job_queue = '%s%s%s' % (cluster_id, proc_id, qdate) sqs_data.class_ad += 'AmazonFullSQSQueueName = "%s"\n' % job_queue if delay != '': sqs_data.class_ad += 'amazonamishutdowndelay = %s\n' % delay sqs_data.class_ad += 'WantAWS = False\n' grid_classad += 'AmazonFullSQSQueueName = "%s"\n' % job_queue # Search through the class ad and make modifications to the files/paths # as necessary new_ad = '' files = [] for line in sqs_data.class_ad.split('\n'): match = grep('^([^=]*)\s*=\s*(.*)$', line) if match != None and match[0] != None and match[1] != None: attribute = match[0].strip() attr_lower = attribute.lower() value = match[1].strip() # Ignore files in /dev (like /dev/null) if grep('^"/dev/.*"', value) != None: continue # Remove quotes if they exist. This is a string, so try to split on # the '/' character. If a / exists, it's a file with a full path match = grep('^"(.*)"$', value) if match != None and match[0] != None: split_val = os.path.split(match[0]) # Replace these attributes in the job class ad or the AMI instance # will fail. Need to remove any reference to directories so all # files will be put in the temporary execute directory on the # machine executing the job if attr_lower in transfer_attribs and create_sandbox == 'yes': # Don't mess with the command if it won't be transfered to # the remote system. This likely means the exe already exists # where the job will be executed if attr_lower == 'cmd' and transfer_exe == 'false': new_ad += line + '\n' continue if split_val[0] == '': files.append(iwd + '/' + match[0].strip() + '\n') elif os.path.exists(split_val[0]) == False: files.append(iwd + '/' + split_val[1] + '\n') else: files.append(match[0].strip() + '\n') new_ad += attribute + ' = "' + split_val[1].strip() + '"\n' continue # Set stdout/stderr files to be created in the sandox so they will # be transfered back if they are actual files. if (attr_lower == 'err' or attr_lower == 'out') and \ value != '/dev/null': new_ad += attribute + ' = "' + os.path.basename(split_val[1]) + '"\n' continue new_ad += line + '\n' sqs_data.class_ad = new_ad # Add user EC2 specific information to the grid class ad if not set by the # route if aws_public_key == '': if user_aws_public_key != '': grid_classad += 'AmazonPublicKey = "%s"\n' % str(user_aws_public_key) else: sys.stderr.write('Error: No Public Key defined by the job or the route') return(FAILURE) if aws_private_key == '': if user_aws_private_key != '': grid_classad += 'AmazonPrivateKey = "%s"\n' % str(user_aws_private_key) else: sys.stderr.write('Error: No Private Key defined by the job or the route') return(FAILURE) if aws_key == '': if user_aws_key != '': grid_classad += 'AmazonAccessKey = "%s"\n' % str(user_aws_key) aws_key_file = user_aws_key else: sys.stderr.write('Error: No Access Key defined by the job or the route') return(FAILURE) else: aws_key_file = aws_key if aws_secret == '': if user_aws_secret != '': grid_classad += 'AmazonSecretKey = "%s"\n' % str(user_aws_secret) aws_secret_file = user_aws_secret else: sys.stderr.write('Error: No Secret Key defined by the job or the route') return(FAILURE) else: aws_secret_file = aws_secret if rsa_public_key == '': if user_rsa_public_key != '': rsa_public_key_file = user_rsa_public_key else: sys.stderr.write('Error: No Secret Key defined by the job or the route') return(FAILURE) else: rsa_public_key_file = rsa_public_key sqs_data.class_ad += 'AmazonAccessKey = "%s"\n' % str(aws_key_file) sqs_data.class_ad += 'AmazonSecretKey = "%s"\n' % str(aws_secret_file) # Pull the specific keys out of the files if os.path.exists(rsa_public_key_file) == False: sys.stderr.write('Error: Unable to read RSA public key file') return(FAILURE) elif os.path.exists(aws_key_file) == False or \ os.path.exists(aws_secret_file) == False: sys.stderr.write('Error: Unable to read AWS key files') return(FAILURE) else: try: key_file = open(aws_key_file, 'r') aws_key_val = key_file.readlines()[0].strip() key_file.close() key_file = open(aws_secret_file, 'r') aws_secret_val = key_file.readlines()[0].strip() key_file.close() key_file = open(rsa_public_key_file, 'r') key_file.close() except IOError, e: sys.stderr.write("Error: Unable to open file") sys.stderr.write(str(e)) return(FAILURE)
def main(argv=None): if argv == None: argv = sys.argv bucket = '' key = '' queue_name = '' region = '' ret_val = SUCCESS # Read the class ad from stdin and store the S3 information for line in sys.stdin: match = grep('^([^=]*)\s*=\s*(.*)$', line.strip()) if match != None and match[0] != None and match[1] != None: attribute = match[0].strip().lower() val_match = grep('^"(.*)"$', match[1].strip()) if val_match != None and val_match[0] != None: value = val_match[0].strip() else: value = match[1].strip() if attribute == 's3bucketid': bucket = value continue if attribute == 's3keyid': key = value continue if attribute == 'amazonaccesskey': aws_key = value continue if attribute == 'amazonsecretkey': aws_secret = value continue if attribute == 'amazonfullsqsqueuename': queue_name = value continue if attribute == 'ec2region': region = value # Pull the specific keys out of the files if os.path.exists(aws_key) == False or \ os.path.exists(aws_secret) == False: sys.stderr.write('Error: Unable to read AWS key files') return(FAILURE) else: key_file = open(aws_key, 'r') aws_key_val = key_file.readlines()[0].strip() key_file.close() key_file = open(aws_secret, 'r') aws_secret_val = key_file.readlines()[0].strip() key_file.close() # Remove messages from SQS work_queue = None results_queue = None full_queue_name = '%s-%s' % (str(aws_key_val), queue_name) try: r_obj = AWSRegion.get_sqs_region(region) sqs_con = SQSConnection(aws_key_val, aws_secret_val, region=r_obj) except BotoServerError, error: sys.stderr.write('Error: Unable to connect to SQS: %s, %s\n' % (error.reason, error.body)) return(FAILURE)
return(FAILURE) continue # Grab the S3 bucket if it wasn't in the input classad if bucket == '': try: bucket = q_msg.s3_bucket except: # Message had no s3_bucket for some reason. sys.stderr.write('Error: Message has no S3 bucket\n') return(FAILURE) # Grab the S3 key if it wasn't defined already if key == '': if msg.s3_key == None: s3_key = grep('^S3KeyID\s*=\s*"(.+)"$', msg.class_ad) if s3_key == None or s3_key[0] == None: s3_key = grep('^s3keyid\s*=\s*"(.+)"$', msg.class_ad) if s3_key != None and s3_key[0] != None: key = s3_key[0] else: key = msg.s3_key # Delete the message. There may be more so keep processing queue.delete_message(q_msg) q_msg = queue.read() # Access S3 try: s3_con = S3Connection(aws_key_val, aws_secret_val)