def add_instance_record(cfg, conn): log.debug('Adding instance record') instance_record = {} if cfg['proc_node_type'] != 'CLOUD': instance_record['instance_id'] = socket.gethostname() instance_record['instance_type'] = 'on-prem' else: instance_record['instance_id'] = get_instance_id() instance_record['instance_type'] = get_instance_type() instance_record['local_queue_id'] = cfg['id'] cfg['instance_record'] = instance_record try: instance_record_sql = ''' insert into instance_records (instance_id, local_queue_id, start_time, instance_type) values (%(instance_id)s, %(local_queue_id)s, current_timestamp, %(instance_type)s) ''' query_database(conn=conn, query=instance_record_sql, params=instance_record, commit=True) except Exception: log.exception("Instance record could not be inserted") else: log.info("Instance record of instance %s and job %s inserted", instance_record['instance_id'], instance_record['local_queue_id'])
def hyp3_process(cfg, n): try: log.info('Processing hello_world') if not cfg['skip_processing']: log.info(f'Process starting at {datetime.now()}') launch_dir = os.getcwd() os.chdir(cfg['workdir']) hyp3proclib.process(cfg, 'proc_ci', ["--hello-world"]) os.chdir(launch_dir) else: log.info('Processing skipped!') cfg['log'] += "(debug mode)" cfg['success'] = True hyp3proclib.update_completed_time(cfg) product_dir = os.path.join(cfg['workdir'], 'PRODUCT') if not os.path.isdir(product_dir): log.info(f'PRODUCT directory not found: {product_dir}') log.error('Processing failed') raise Exception('Processing failed: PRODUCT directory not found') # TODO: final product cleanup and upload to HyP3 DB except Exception as e: log.exception('ci processing failed!') log.exception('Notifying user') hyp3proclib.failure(cfg, str(e)) hyp3proclib.file_system.cleanup_workdir(cfg) log.info('ci done')
def send_email( to_address, subject, body, from_address="no-reply@asf-hyp3", retries=0, maximum_retries=0, mime_type="plain"): """Send an email and return whether the email was successfully sent. We also retry sending the email if something went wrong the first time, with the maximum number of retries configurable in the arguments. This method only supports sending plain text emails. """ if retries > maximum_retries: log.critical( "Notification failed permanently (maximum retries reached)", ) return False, None if retries == 0: log.info("Sending email") else: log.info("Retrying email") smtp = smtplib.SMTP("localhost") msg = MIMEMultipart('related') msg["Subject"] = subject msg["From"] = from_address msg["To"] = to_address msg.preamble = 'This is a multi-part message in MIME format.' msgAlt = MIMEMultipart('alternative') msg.attach(msgAlt) msgText = MIMEText('HyP3 product notification email') msgAlt.attach(msgText) msgText = MIMEText(body) msgText.replace_header('Content-Type', 'text/html') msgAlt.attach(msgText) log.debug("Sending email from {0} to {1}".format(from_address, to_address)) bcc_address = [] bcc = get_config('general', 'bcc', default='') if len(bcc) > 0: bcc_address += bcc.split(',') log.debug("Bcc: " + str(bcc_address)) try: smtp.sendmail(from_address, [to_address] + bcc_address, msg.as_string()) except smtplib.SMTPException as e: msg = str(e) log.error("Failed to notify user: "******"Notification failed permanently (maximum retries reached)") return False, msg return send_email(to_address, subject, body, from_address, retries + 1, maximum_retries) smtp.quit() return True, None
def check_stopfile(cfg, stopfile): if os.path.isfile(stopfile): log.info('Found stopfile: ' + stopfile) log.debug('Removing stopfile: ' + stopfile) os.remove(stopfile) log.info('Stopping') cleanup_lockfile(cfg) sys.exit(0)
def _process_all(self, total): for n in range(total): found = self._process_one(n) log.info('Processed {0}/{1} products.'.format(n + 1, total)) if self.sleep_time > 0: time.sleep(self.sleep_time) if not found and self.stop_if_none: break
def find_in_dir(dir_, all_strs, any_strs=("",)): if not os.path.isdir(dir_): return None for subdir, dirs, files in os.walk(dir_): for file in files: filepath = os.path.join(subdir, file) if any(s in filepath for s in any_strs) and all(s in filepath for s in all_strs): log.info('Found: ' + filepath) return filepath return None
def cleanup_workdir(cfg): if 'workdir' in cfg: if os.path.isdir(cfg['workdir']): if cfg['keep']: log.info('Not removing working directory: ' + cfg['workdir']) else: log.info('Cleaning up working directory: ' + cfg['workdir']) shutil.rmtree(cfg['workdir']) else: log.warn('Could not clean the workdir, not found: ' + cfg['workdir']) cleanup_env(cfg)
def run(self): self.cfg = setup(self.proc_name, cli_args=self.cli_args, sci_version=self.sci_version) with manage_instance_and_lockfile(self.cfg): total = self.cfg['num_to_process'] log.info('Starting') log.debug('Processing {0} products.'.format(total)) self._process_all(total) log.info('Done')
def cleanup_lockfile(cfg): if 'lock_file' not in cfg: log.warn('No lock_file set!') return lock_file = cfg['lock_file'] if 'lock_file' not in cfg or lock_file is None or len(lock_file) == 0: log.info('Internal error: no lock file set.') return if os.path.isfile(lock_file): log.info('Removing lock file ' + lock_file) os.unlink(lock_file) else: log.warn('Lock file not found: ' + lock_file)
def log_instance_shutdown_in_hyp3_db(cfg): if cfg['proc_node_type'] != 'CLOUD': return instance = get_instance_info(cfg) try: with get_db_connection('hyp3-db') as hyp3db_conn: sql = "update instances set shutdown_time = current_timestamp where id = (%(instance_id)s)" query_database(hyp3db_conn, sql, instance, commit=True) except Exception as e: log.error("Instance %s could not be updated with shutdown time", instance["instance_id"]) log.error("Error was: %s", str(e)) else: log.info("Instance %s was updated with shutdown time", instance["instance_id"])
def notify_user_failure(cfg, conn, msg): if cfg['notify_fail'] is False: log.info('Notifications for failures not turned on.') return log.debug('Preparing to notify user of processing failure') username, email, wants_email, subscription_name, process_name = get_user_info(cfg, conn) if wants_email: log.debug('Notifying {0}...'.format(username)) message = "Hi, {0}\n\n".format(username) if cfg['sub_id'] > 0: message += "Your subscription '{0}' attempted to process a product but failed.\n\n".format(subscription_name) subject = "[{0}] Failed processing for subscription '{1}'".format(cfg['subject_prefix'], subscription_name) else: message += "Your one-time '{0}' processing request failed.\n\n".format(process_name) subject = "[{0}] Failed one-time processing for '{1}'".format(cfg['subject_prefix'], process_name) # if len(msg.strip())>0: # message += "\n" + "Captured error message:\n" + msg + "\n\n" if 'granule_url' in cfg and len(cfg['granule_url']) > 0: message += "You can download the original data here:<br>" + cfg['granule_url'] + "<br>" if cfg['other_granule_urls'] is not None: for url in cfg['other_granule_urls']: message += url + "<br>" if "email_text" in cfg and len(cfg["email_text"]) > 0: message += "\n" + cfg["email_text"] + "\n\n" else: message += "\n" if cfg['sub_id'] > 0: param = str(cfg['sub_id']) id_, hashval = create_one_time_hash(conn, 'disable_subscription', cfg['user_id'], param) message += "Disable this subscription:\n" \ "https://api.hyp3.asf.alaska.edu/onetime/disable_subscription?id="+str(id_)+"&key="+hashval+"\n\n" # message += "Captured processing info:\n\n" + cfg['log'] queue_email(conn, cfg['id'], email, subject, message) else: log.info("Email will not be sent to user {0} due to user preference".format(username))
def add_instance_to_hyp3_db(cfg): if cfg['proc_node_type'] != 'CLOUD': return instance = get_instance_info(cfg) try: with get_db_connection('hyp3-db') as hyp3db_conn: sql = 'insert into instances (id, start_time, process_id) values (%(instance_id)s, current_timestamp, %(process_id)s);' query_database(conn=hyp3db_conn, query=sql, params=instance, commit=True) except Exception as e: log.error("Instance %s could not be inserted into instances", instance['instance_id']) log.error("Error was: %s", str(e)) else: log.info("Instance %s was inserted into instances", instance['instance_id'])
def get_db_connection(s, tries=0): connection_string =\ "host='" + get_config(s, 'host') + "' " + \ "dbname='" + get_config(s, 'db') + "' " + \ "user='******'user') + "' " + \ "password='******'pass') + "'" log.info("Connected to db: {0}".format(get_config(s, 'host'))) try: conn = psycopg2.connect(connection_string) except Exception as e: if (tries > 4): log.exception('DB connection problem: ' + str(e)) raise else: log.warning("Problem connecting to DB: " + str(e)) log.info("Retrying in {0} seconds...".format(30 * (tries + 1))) time.sleep(30 * (tries + 1)) return get_db_connection(s, tries=tries + 1) return conn
def update_instance_with_specific_gamma_id(cfg): if cfg['proc_node_type'] != 'CLOUD': return instance = get_instance_info(cfg) try: with get_db_connection('hyp3-db') as hyp3db_conn: sql = 'update instances set process_id = %(process_id)s where id = %(instance_id)s' query_database(conn=hyp3db_conn, query=sql, params=instance, commit=True) except Exception as e: log.error( "any_gamma instance %s could not be updated with specific gamma process id, %s", instance['instance_id'], cfg['proc_name']) log.error("Error was: %s", str(e)) else: log.info( "any_gamma instance %s was update with specific gamma process id, %s", instance['instance_id'], cfg['proc_name'])
def update_instance_record(cfg, conn): if 'instance_record' in cfg: instance_record = cfg['instance_record'] try: instance_record_sql = 'update instance_records set end_time=current_timestamp where (instance_id=%(instance_id)s and local_queue_id=%(local_queue_id)s);' query_database(conn=conn, query=instance_record_sql, params=instance_record, commit=True) except Exception: log.exception( "Instance record for instance %s and job %s could not be updated with job completion time", instance_record['instance_id'], instance_record['local_queue_id']) else: log.info( "Instance record for instance %s and job %s had end_time updated with job completion time", instance_record['instance_id'], instance_record['local_queue_id']) else: log.debug('No instance record found to update')
def setup_workdir(cfg): if cfg['user_workdir'] and len(cfg['workdir']) > 0: wd = cfg['workdir'] log.info('Using previous working directory (will not process)') log.info('Directory is: ' + wd) else: s = cfg['proc_name'] + '_' + str(os.getpid()) + '_' + random_string(6) # Hack to avoid creating a bunch of pointless directories if 'notify' in s: return wd = os.path.join(cfg['workdir'], s) cfg['workdir'] = wd log.debug('Workdir is: ' + wd) if os.path.isdir(wd): log.warn('Working directory already exists! Removing...') shutil.rmtree(wd) log.info('Creating work directory: ' + wd) os.mkdir(wd) # Some of the processes are location dependent! os.chdir(wd)
def send_queued_emails(): with get_db_connection('hyp3-db') as conn: sql = ''' select id, local_queue_id, recipients, subject, message, attachment_filename, attachment, mime_type from email_queue where status = 'QUEUED' ''' recs = query_database(conn, sql) if len(recs) == 0: log.info('No emails to send') for r in recs: if r and r[0] and r[2] and len(r[2]) > 0: id_ = int(r[0]) lqid = None if r[1] is not None: lqid = int(r[1]) to = r[2] subject = r[3] body = r[4] mime_type = "plain" if r[7] is not None: mime_type = r[7] if mime_type == "text": mime_type = "plain" log.info('Emailing ' + to + ' for lqid: ' + str(lqid)) log.debug('Subject: ' + subject) ok, msg = send_email(to, subject, body, mime_type=mime_type) if ok: status = 'SENT' else: status = 'FAILED' log.debug('Updating status to ' + status) sql = "update email_queue set status = %(status)s, system_message = %(msg)s, processed_time = current_timestamp where id = %(id)s" query_database(conn, sql, {'status': status, 'msg': msg, 'id': id_}, commit=True)
def check_lockfile_pid(lock_file): pid = str(os.getpid()) with open(lock_file, 'r') as lock: lock_file_pid = lock.read() if str(lock_file_pid) != str(pid): log.info('Lock file does not contain process PID') log.info('Process PID: "{}" File PID: "{}"'.format(pid, lock_file_pid)) log.info('Exiting without cleaning') sys.exit(0)
def check_lockfile(cfg): lock_file = os.path.join(cfg['lock_dir'], cfg['proc_name'] + '.lock') cfg['lock_file'] = lock_file if os.path.isfile(lock_file): log.info('Lock file exists: ' + lock_file) log.info('Exiting -- already running.') sys.exit(0) # We use os.open with O_CREAT so that two ingests don't both do the # above check, and pass, and then both try to create the lock file, # and both succeed - this way one will fail try: o = os.open(lock_file, os.O_WRONLY | os.O_CREAT | os.O_EXCL) fd = os.fdopen(o, 'w') except Exception as e: log.warning('Failed to open lock file: ' + str(e)) fd = None if not fd: log.error('Could not open lock file: ' + lock_file) sys.exit(1) pid = str(os.getpid()) fd.write(pid) fd.close() # Now check the file just in case... with open(lock_file, 'r') as fd: s = fd.read() if s != pid: log.error('Failed to correctly initialize lock file') sys.exit(1) else: log.info('Acquired lock file, PID is ' + pid)
def process_insar(cfg, n): try: log.info('Processing ISCE InSAR pair "{0}" for "{1}"'.format(cfg['sub_name'], cfg['username'])) g1, g2 = earlier_granule_first(cfg['granule'], cfg['other_granules'][0]) list_file = 'list.csv' write_list_file(os.path.join(cfg['workdir'], list_file), g1, g2) d1 = g1[17:25] d2 = g2[17:25] delta = (datetime.datetime.strptime(d2, '%Y%m%d')-datetime.datetime.strptime(d1, '%Y%m%d')).days ifm_dir = d1 + '_' + d2 cfg['ifm'] = ifm_dir log.debug('IFM dir is: ' + ifm_dir) sd1 = d1[0:4]+'-'+d1[4:6]+'-'+d1[6:8] sd2 = d2[0:4]+'-'+d2[4:6]+'-'+d2[6:8] cfg["email_text"] = "This is a {0}-day InSAR pair from {1} to {2}.".format(delta, sd1, sd2) subswath = get_extra_arg(cfg, "subswath", "0") if subswath == "0": process(cfg, 'procAllS1StackISCE.py', ["-90", "90", "-180", "180", "-f", list_file, "-d"]) else: process(cfg, 'procS1StackISCE.py', ["-f", list_file, "-d", "-s", subswath]) subdir = os.path.join(cfg['workdir'], 'PRODUCT') if not os.path.isdir(subdir): log.info('PRODUCT directory not found: ' + subdir) log.error('Processing failed') raise Exception("Processing failed: PRODUCT directory not found") else: looks = get_looks(subdir) out_name = build_output_name_pair(g1, g2, cfg['workdir'], looks + "-iw" + subswath + cfg['suffix']) log.info('Output name: ' + out_name) out_path = os.path.join(cfg['workdir'], out_name) zip_file = out_path + '.zip' if os.path.isdir(out_path): shutil.rmtree(out_path) if os.path.isfile(zip_file): os.unlink(zip_file) cfg['out_path'] = out_path # clip_tiffs_to_roi(cfg, conn, product) log.debug('Renaming '+subdir+' to '+out_path) os.rename(subdir, out_path) find_browses(cfg, out_path) cfg['attachment'] = find_phase_png(out_path) add_esa_citation(g1, out_path) zip_dir(out_path, zip_file) cfg['final_product_size'] = [os.stat(zip_file).st_size, ] cfg['original_product_size'] = 0 with get_db_connection('hyp3-db') as conn: record_metrics(cfg, conn) upload_product(zip_file, cfg, conn) success(conn, cfg) except Exception as e: log.exception('Processing failed') log.info('Notifying user') failure(cfg, str(e)) cleanup_workdir(cfg) log.info('Done')
def notify_user(product_url, queue_id, cfg, conn): """Email a user notifying them of a finished product. Takes the name of a finished product, download link for the finished product, subscription ID for the product, the configuration parameters, and a database connection, and emails the user to notify them that the product has been finished and provide them with the download links for both the finished product and the original granule. This function return True upon success and False upon failure. """ username, email, wants_email, subscription_name, process_name = get_user_info(cfg, conn) if cfg['sub_id'] > 0: title = "A new '{0}' product for your subscription '{1}' is ready.".format(process_name, subscription_name) subject = "[{0}] New product for subscription '{1}'".format(cfg['subject_prefix'], subscription_name) else: title = "A new product for your '{0}' one-time processing request has been generated.".format(process_name) subject = "[{0}] New {1} product available".format(cfg['subject_prefix'], process_name) message = get_email_header(title) message += "<p>Hello HyP3-User!" message += "<p>" + title + "\n" if 'description' in cfg and cfg['description'] and len(cfg['description']) > 0: message += "<p>" + escape(cfg['description'], quote=False).replace('\n', '<br>') + "<br>\n" if process_name != "Notify Only": message += '<p>You can download it here:<br><a href="{0}">{1}</a><br><br>\n'.format(product_url, cfg['filename']) if 'browse_url' in cfg and cfg['browse_url'] is not None and len(cfg['browse_url']) > 0: message += '<center><a href="{0}"><img src="{1}" width="80%" border="0"/></a></center><br>\n'.format(cfg['browse_url'], cfg['browse_url']) if 'final_product_size' in cfg: sz = cfg['final_product_size'][0] mb = float(sz)/1024.0/1024.0 message += "<p>Size: %.2f MB<br><br>\n" % mb message += "You can find all of your products at the HyP3 website:<br>{0}/products<br>\n".format(cfg['hyp3_product_url']) if 'granule_url' in cfg and len(str(cfg['granule_url'])) > 0 and 'Subscription: ' not in str(cfg['granule_url']): message += "<p>You can download the original data from the ASF datapool here:<br>" + urlify(cfg['granule_url']) + "<br>\n" if 'other_granule_urls' in cfg and cfg['other_granule_urls'] is not None: for url in cfg['other_granule_urls']: message += urlify(url) + "<br>\n" if 'SLC' in cfg['granule']: message += '<p>View this stack in the ASF baseline tool:<br>' message += 'http://baseline.asf.alaska.edu/#baseline?granule={0}\n'.format(cfg['granule']) else: message += "<p>You can download it here:<br>" + urlify(product_url) + "<br>" if "email_text" in cfg and len(cfg["email_text"]) > 0: message += "<p>" + cfg["email_text"] + "<br>" if 'process_time' in cfg: message += process_name + " processing time: " + str(datetime.timedelta(seconds=int(cfg['process_time']))) + "<br>\n" if cfg['sub_id'] > 0: param = str(cfg['sub_id']) id_, hashval = create_one_time_hash(conn, 'disable_subscription', cfg['user_id'], param) message += "<p>Done with this subscription? Disable it with this link:<br>" \ "https://api.hyp3.asf.alaska.edu/onetime/disable_subscription?id="+str(id_)+"&key="+hashval+"<br><br>\n" message += get_email_footer() # message += "Hostname: " + socket.gethostname() + "\n" if wants_email: log.info('Emailing: ' + email) queue_email(conn, cfg['id'], email, subject, usr(message, username)) else: log.info("Email will not be sent to user {0} due to user preference".format(username)) bcc = get_config('general', 'bcc', default='') if len(bcc) > 0: # We only have to do the first one, the rest will be bcc'ed :) addr = bcc.split(',')[0] log.debug('Queueing email for BCC user: '******'id'], addr, subject, usr(message, username))
def find_rtc_zip(dir_, orbit): log.debug('Orbit: ' + orbit) rtc_zip = find_in_dir(dir_, [".zip", "AP_", orbit]) log.info("Found RTC zip: " + rtc_zip) return rtc_zip
def check_lockfile_exists(lock_file): if not os.path.isfile(lock_file): log.info('Lock file does not exist') log.info('Stopping') sys.exit(0)