def set_active_dar(self, scid, dar_id): # Also used for concurrency control. There should be only one # active dar per scenario. If it is not # empty and we are trying to set another one, we return False. # If it was empty there is no active dar underway, so # if we are trying to clear it again we also return false. # self._lock_db.acquire() try: ss = models.ScenarioStatus.objects.get(scenario_id=scid) old_dar = ss.active_dar if dar_id and old_dar: raise IngestionError( "A DAR is already ative for scenario "+`scid`) if not dar_id and not old_dar: raise StopRequest('') ss.active_dar = dar_id ss.save() except StopRequest as e: return False except IngestionError as e: self._logger.error(`e`) return False except Exception as e: self._logger.error(`e`) finally: self._lock_db.release() return True
def run_scripts(self, sc_id, ncn_id, scripts_args): if not scripts_args: return 0 n_errors = 0 for script_arg in scripts_args: if check_status_stopping(sc_id): raise StopRequest("Stop Request") self._logger.info("Running script: %s" % script_arg[0]) r = subprocess.call(script_arg) if 0 != r: n_errors += 1 self._logger.error(`ncn_id`+": script returned status:"+`r`) return n_errors
def ingestion_logic(scid, scenario_data): root_dl_dir = DownloadManagerController.Instance()._download_dir custom = scenario_data['extraconditions'] eoids = scenario_data['dssids'] if scenario_data['dsrc_type'] != DSRC_EOWCS_CHOICE: logger.warning('Data source type ' + scenario_data['dsrc_type'] + ' is not implemented') if not os.access(root_dl_dir, os.R_OK | os.W_OK): raise IngestionError("Cannot write/read " + root_dl_dir) if 0 != DEBUG_MAX_DEOCS_URLS: logger.info(" DEBUG_MAX_DEOCS_URLS = " + ` DEBUG_MAX_DEOCS_URLS `) if 0 != DEBUG_MAX_GETCOV_URLS: logger.info(" DEBUG_MAX_GETCOV_URLS = " + ` DEBUG_MAX_GETCOV_URLS `) nreqs = 0 retval = (0, None, None, None, "", None) scenario_data["sc_id"] = scid scenario_data["custom"] = custom ncn_id = scenario_data["ncn_id"] dl_requests = get_download_URLs(scenario_data, eoids) if not dl_requests or 0 == len(dl_requests): logger.warning( ` ncn_id ` + ": no GetCoverage requests generated") retval = (0, None, None, None, "NO_ACTION", None) else: if check_status_stopping(scid): raise StopRequest("Stop Request") nreqs = len(dl_requests) logger.info( ` ncn_id ` + ": Submitting " + ` nreqs ` + " URLs to the Download Manager") dl_dir, dar_url, dar_id = \ request_download(scenario_data["ncn_id"], scid, dl_requests) dl_errors, failed_dirs, failed_urls = wait_for_download( scid, dar_url, dar_id, ncn_id) if len(failed_urls) > 0: logger.warning("Failed downloads for "+`ncn_id`+":\n" +\ '\n'.join(failed_urls)) logger.info("Products for scenario " + ncn_id + " downloaded to " + dl_dir) retval = (dl_errors, dl_dir, dar_url, dar_id, "OK", failed_dirs) return retval
def getDssList(scid, eo_dss_list, aoi_toi, wcs_type): # get list of datasets that overlap bbox and timeperiod id_list = [] req_bb, req_time = aoi_toi for dss in eo_dss_list: if scid and check_status_stopping(scid): raise StopRequest("Stop Request") timeperiod = extract_TimePeriod(dss) if None == timeperiod: logger.warning("Failed to extract time range from " + ` dss `) continue if not req_time.overlaps(timeperiod): continue bb1 = extract_WGS84bbox(dss) if None == bb1: logger.warning("Failed to extract bb from " + ` dss `) continue if bb1.overlaps(req_bb): id_list.append(extract_Id(dss, wcs_type)) return id_list
def urls_from_EOWCS(params, eoids): caps = get_caps_from_pf(params['dsrc'], True) if check_status_stopping(params["sc_id"]): raise StopRequest("Stop Request") if None == caps: raise IngestionError("cannot get Capabilities from '" + params['dsrc'] + "'") service_version = extract_ServiceTypeVersion(caps).strip() wcs_type = determine_wcs_type(caps) aoi_toi = build_aoi_toi(params["aoi_bbox"], params['from_date'], params['to_date']) if len(eoids) > 0: # use only the dssids specified, don't look for more. id_list = eoids else: # find all datasets that match the bbox and Toi wcseo_dss = extract_DatasetSeriesSummaries(caps, wcs_type) id_list = getDssList(params["sc_id"], wcseo_dss, aoi_toi, wcs_type) if IE_DEBUG > 0: logger.debug("wcs_type = " + ` wcs_type `) caps = None # no longer needed md_urls = generate_MD_urls(params, service_version, id_list) if IE_DEBUG > 1: logger.debug("Qualified " + ` len(md_urls) ` + " md_urls") dl_requests = process_csDescriptions(params, aoi_toi, service_version, wcs_type, md_urls) return dl_requests
def ingest_func(self,parameters): if IE_DEBUG > 0: self._logger.info( "wfm: executing INGEST_SCENARIO, id=" +\ `parameters["scenario_id"]`) percent = 1 sc_id = parameters["scenario_id"] ncn_id = None final_status = "OK" self._wfm.set_scenario_status( self._id, sc_id, 0, "GENERATING URLS", percent) try: scenario = models.Scenario.objects.get(id=sc_id) ncn_id = scenario.ncn_id.encode('ascii','ignore') cat_reg = scenario.cat_registration # ingestion_logic blocks until DM is finished downloading self._wfm.set_ingestion_pid(sc_id, os.getpid()) dl_errors, dl_dir, dar_url, dar_id, status, failed_dirs = \ ingestion_logic(sc_id, models.scenario_dict(scenario)) if check_status_stopping(sc_id): raise StopRequest("Stop Request") n_errors = 0 if status == "NO_ACTION": final_status = "NOTHING INGESTED" else: if None == dar_id: raise IngestionError("No DAR generated") s2pre = scenario.s2_preprocess if s2pre != 'NO': # s2pre is functional only for local ingestion self._logger.error( "S2 Preprocessor is not implemented for data from product facility."+ " Hint: use local ingestion instead") s2pre = 'NO' n_errors = self.post_download_actions( sc_id, ncn_id, dl_dir, parameters["scripts"], cat_reg, s2pre, scenario.tar_result, failed_dirs) n_errors += dl_errors if n_errors>0: raise IngestionError(`ncn_id`+": ingestion encountered "+ `n_errors` +" errors") # Finished if "OK" == final_status: d_str = time.strftime('%Y-%m-%d %H:%M', time.gmtime()) final_status += ' ' + d_str self._wfm.set_scenario_status(self._id, sc_id, 1, final_status, 0) self._logger.info(`ncn_id`+": ingestion completed.") except StopRequest as e: self._logger.info(`ncn_id`+": Stop request from user: Ingestion Stopped") self._wfm.set_scenario_status(self._id, sc_id, 1, "STOPPED, IDLE", 0) except Exception as e: self._logger.error(`ncn_id`+" Error while ingesting: " + `e`) self._wfm.set_scenario_status(self._id, sc_id, 1, "INGEST ERROR", 0) if IE_DEBUG > 0: traceback.print_exc(12,sys.stdout) finally: self._wfm.set_ingestion_pid(sc_id, 0)
def post_download_actions(self, scid, ncn_id, dl_dir, scripts, cat_reg, s2pre, tar_result, failed_dirs): # For each product that was downloaded into its seperate # directory, generate a product manifest for the ODA server, # and also split each downloaded product into its parts. # Then run the post- ingestion scripts. # TODO: the splitting could be done by the EO-WCS DM plugin # instead of doing it here dir_list = os.listdir(dl_dir) n_dirs = len(dir_list) n_errors = 0 i = 1 for d in dir_list: process = True for f in failed_dirs: if d in f: self._logger.info("Not proceesing dir (download had failed): " + f) process = False n_errors += 1 break if not process: continue self._logger.info("Processing dir " + d) percent = 100 * (float(i) / float(n_dirs)) # keep percent > 0 to ensure webpage updates if percent < 1.0: percent = 1 self._wfm.set_scenario_status(self._id, scid, 0, "RUNNING SCRIPTS", percent) try: mf_name, metafiles = split_and_create_mf( dl_dir, d, ncn_id, self._logger) except Exception as e: self._logger.info("Exception" + `e`) mf_name = None if not mf_name: self._logger.info("Error processing download directory " + `d`) n_errors += 1 continue # archive products that were downloaded for m in metafiles: archive_metadata(scid, m) scripts_args = self.mk_scripts_args( scripts, mf_name, cat_reg) n_errors += self.run_scripts(scid, ncn_id, scripts_args) i += 1 # run the tar script if requested if tar_result: if check_status_stopping(scid): raise StopRequest("Stop Request") tar_script = os.path.join(IE_SCRIPTS_DIR, IE_TAR_RESULT_SCRIPT) script_arg = [tar_script, dl_dir] if cat_reg: script_arg.append(self.mk_catreg_arg()) self._logger.info(`ncn_id`+": running " + `script_arg`) r = subprocess.call(script_arg) if 0 != r: n_errors += 1 self._logger.error(`ncn_id`+": tar script returned status:"+`r`) else: self._logger.info(`ncn_id`+": tar file is ready: " + dl_dir + IE_TAR_FILE_SUFFIX); return n_errors
def wait_for_download(scid, dar_url, dar_id, ncn_id, max_wait=None): """ scid may be None blocks until the DM reports that the DAR with this dar_url has completed all constituent individual product downloads """ if None == ncn_id: ncn_id = "(None)" set_status(scid, "Downloading", 1) request = get_dar_status(dar_url) if check_status_stopping(scid): stop_download(scid, request) raise StopRequest("Stop Request") if None == request: # wait and try again time.sleep(DAR_STATUS_INTERVAL) request = get_dar_status(dar_url) if None == request: time.sleep(1) request = get_dar_status(dar_url) if check_status_stopping(scid): stop_download(scid, request) raise StopRequest("Stop Request") if None == request: time.sleep(1) request = get_dar_status(dar_url) if None == request: if None != scid: wfm_clear_dar(scid) raise DMError( "Bad DAR status from DM; no 'dataAccessRequests' found.") product_list = request["productList"] n_products = len(product_list) total_percent = n_products * 100 all_done = False n_done = 0 total_size = 0 n_errors = 0 failed_urls = [] failed_dirs = [] try: ts = time.time() tdiff = 0 last_status = {} last_st_message = "" while not all_done: tdiff = time.time() - ts all_done = True if None != max_wait and tdiff > max_wait: n_errors += 1 logger.warning("Time-out waiting for download") break part_percent = 0 n_done = 0 n_errors = 0 for product in product_list: if "productProgress" not in product: continue dl_dir = product["downloadDirectory"] progress = product["productProgress"] dl_status = progress["status"] if dl_status == "IN_ERROR": if "message" in progress: msg = progress["message"] else: msg = "(none)" if "uuid" in product: uuid = product["uuid"] else: uuid = "(unknown)" if "productAccessUrl" in product: url = product["productAccessUrl"] else: url = "(unknown)" if url not in failed_urls: n_errors += 1 n_done += 1 failed_urls.append(url) failed_dirs.append(dl_dir) logger.info("Dl Manager reports 'IN_ERROR' for uuid " + uuid + ", message: " + msg + "\n url: " + url) dl_report = simplejson.dumps(product, indent=2) logger.info("Dl Manager status: \n" + dl_report) elif dl_status == "COMPLETED": n_done += 1 else: all_done = False if IE_DEBUG > 0: prod_uuid = None if 'uuid' in product: prod_uuid = product['uuid'] else: prod_uuid = 'unknown' if not (prod_uuid in last_status and \ last_status[prod_uuid] == dl_status): logger.debug("Status from DM: " + ` dl_status ` + ", prod. uuid=" + ` prod_uuid `) last_status[prod_uuid] = dl_status if "progressPercentage" not in progress: part_percent += 100 else: part_percent += progress["progressPercentage"] if "downloadedSize" in progress: total_size += progress["downloadedSize"] percent_done = int( (float(part_percent) / float(total_percent)) * 100) if percent_done < 1: percent_done = 1 if all_done: if n_errors > 0: set_status(scid, ` n_errors ` + " errors during Dl.", percent_done) else: set_status(scid, "Finished Dl. (" + ` n_products ` + ")", percent_done) if total_size < 102400: ts = ` total_size ` + ' bytes' else: ts = ` total_size / 1024 ` + ' kb' logger.info("Dl Manager reports downloaded "+ts+\ " in " + `n_products`+ ' products') break elif check_status_stopping(scid): stop_download(scid, request) raise StopRequest("Stop Request") else: status_message = "Downloading (" + ` n_done ` + '/' + ` n_products ` + ")" set_status(scid, status_message, percent_done) new_st_message = ncn_id + " Status: " + status_message + " done: " + ` percent_done ` + "%" if new_st_message != last_st_message: last_st_message = new_st_message logger.info(new_st_message) if check_status_stopping(scid): stop_download(scid, request) raise StopRequest("Stop Request") sleep_time = DAR_STATUS_INTERVAL if tdiff > (32 * DAR_STATUS_INTERVAL): sleep_time = 5 * DAR_STATUS_INTERVAL elif tdiff > (6 * DAR_STATUS_INTERVAL): sleep_time = 2 * DAR_STATUS_INTERVAL time.sleep(sleep_time) request = get_dar_status(dar_url) if check_status_stopping(scid): stop_download(scid, request) raise StopRequest("Stop Request") product_list = request["productList"] # all done last_status = None if n_errors > 0: logger.info("Completed download with " + ` n_errors ` + " errors") except StopRequest: logger.info("StopRequest while waiting for download") raise except Exception as e: logger.warning("Unexpected exception in wait_for_download: " + ` e `) if IE_DEBUG > 0: traceback.print_exc(12, sys.stdout) raise e finally: if None != scid: wfm_clear_dar(scid) return n_errors, failed_dirs, failed_urls
def process_csDescriptions(params, aoi_toi, service_version, wcs_type, md_urls): """ Input: md_urls is a tuple, where each element is a pair containg the MetaData URL and its EOID : (MetaData_URL, EOID) aoi_toi is a tuple containing Area-of-interest Bounding-Box and the Time of Interest time range Each md_url is accessed in turn to get the metatada from the product facility. The MD is expected to contain a wcseo:EOCoverageSetDescription, with a number of coverageSetDescriptions """ logger.info("Processing " + ` len(md_urls) ` + " EOCoverageSetDescription urls.") base_url = params['dsrc'] + \ "?" + SERVICE_WCS + \ '&version=' + service_version + \ "&" + WCS_GET_COVERAGE +\ "&" + WCS_IMAGE_FORMAT if params['download_subset']: if WCS_TYPE_DRAFT201 == wcs_type: base_url += \ "&subset=Lat," +EPSG_4326+"("+`aoi_toi[0].ll[1]`+","+`aoi_toi[0].ur[1]`+")"+\ "&subset=Long,"+EPSG_4326+"("+`aoi_toi[0].ll[0]`+","+`aoi_toi[0].ur[0]`+")" else: base_url += \ "&subset=lat("+`aoi_toi[0].ll[1]`+","+`aoi_toi[0].ur[1]`+")"+\ "&subset=lon("+`aoi_toi[0].ll[0]`+","+`aoi_toi[0].ur[0]`+")"+\ "&subsettingcrs="+EPSG_4326 dl_reqests = [] ndeocs = 0 # number of DescribeEOCoverageSet urls processed toteocs = float(len(md_urls)) coastcache = None if should_check_coastline(params): shpfile = IE_30KM_SHPFILE prjfile = None coastcache = None try: coastcache = coastline_cache_from_aoi(shpfile, prjfile, aoi_toi[0]) except Exception as e: logger.error( "NOT checking coastline due to Error initialising coastline:\n" + ` e `) for md_url_pair in md_urls: md_url = md_url_pair[0] eoid = md_url_pair[1] if check_status_stopping(params["sc_id"]): raise StopRequest("Stop Request") #make sure percent_done is > 0 percent_done = (float(ndeocs) / toteocs) * 100.0 if percent_done < 0.5: percent_done = 1.0 set_status(params["sc_id"], "Create DAR: get MD", percent_done) logger.info("Processing MD for EOID " + ` eoid `) if 0 != DEBUG_MAX_DEOCS_URLS: if ndeocs > DEBUG_MAX_DEOCS_URLS: break ndeocs += 1 dl_reqests += gen_dl_urls(params, aoi_toi, base_url, md_url, eoid, coastcache, wcs_type) if 0 != DEBUG_MAX_GETCOV_URLS and dl_reqests: dl_reqests = dl_reqests[:DEBUG_MAX_GETCOV_URLS] coastcache = None set_status(params["sc_id"], "Create DAR: get MD", 100) return dl_reqests
def gen_dl_urls(params, aoi_toi, base_url, md_url, eoid, ccache, wcs_type): """ params is the dictionary of input parameters aoi_toi is a tuple containing Area-of-interest Bounding-Box and the Time of Interest time range md_url is the metadata url This function generates Download URLs: 1. get_coverage requests based on metadata from the DescribeEOCoverageSet request - depending if the metadata matches the scenario params, and 2. Other ULR refs found in the metadata the product and mask elements: //eop:product//eop:fileName/ows:ServiceReference[@xlink:href] //eop:mask//eop:fileName/ows:ServiceReference[@xlink:href] """ if IE_DEBUG > 0: logger.info("Generating DL-URLS from DescribeEOCoverageSet: '" + md_url + "'") ret = [] scid = params['sc_id'] # cd_tree: coverage description tree extracted from the # metadata XML file (fp, cd_tree) = getXmlTree(md_url, EOCS_DESCRIPTION_TAG) if check_status_stopping(scid): if None != fp: fp.close() raise StopRequest("Stop Request") if None == cd_tree: if None != fp: fp.close() return ret if IE_DEBUG > 1: try: nreturned = cd_tree.attrib['numberReturned'] nmatched = cd_tree.attrib['numberMatched'] logger.info(" MD reports nreturned = "+`nreturned`+\ ", nmatched = "+`nmatched`) except KeyError: pass cds = get_coverageDescriptions(cd_tree) if len(cds) < 1: logger.warning("No CoverageDescriptions found in '" + md_url + "'") should_check_archived = True if 'check_arch' in params: should_check_archived = params['check_arch'] failed = set() passed = 0 for cd in cds: if check_status_stopping(scid): if None != fp: fp.close() raise StopRequest("Stop Request") coverage_id = extract_CoverageId(cd) if None == coverage_id: logger.error("EOID " + ` eoid ` + " Cannot find CoverageId in '" + md_url + "'") continue if IE_DEBUG > 2: logger.debug(" coverage_id=" + coverage_id) if should_check_archived and check_archived(scid, coverage_id): if IE_DEBUG > 0: logger.debug(" coverage_id='" + coverage_id + "' is achived, not downloading.") continue if not check_bbox(cd, aoi_toi[0]): if IE_DEBUG > 2: logger.debug(" bbox check failed.") if IE_DEBUG > 0: failed.add('bbox') continue if not check_timePeriod(cd, aoi_toi[1], md_url, wcs_type): if IE_DEBUG > 2: logger.debug(" TimePeriod check failed.") failed.add('TimePeriod') continue if not check_text_condition(cd, params, 'sensor_type', xpaths_sensor(wcs_type)): if IE_DEBUG > 2: logger.debug(" sensor type check failed.") if IE_DEBUG > 0: failed.add('sensor_type') continue if not check_float_max(cd, params, 'view_angle', xpaths_incidenceangle(wcs_type), True): if IE_DEBUG > 2: logger.debug(" incidence angle check failed.") if IE_DEBUG > 0: failed.add('view_angle') continue if not check_float_max(cd, params, 'cloud_cover', xpaths_cloudcover(wcs_type)): if IE_DEBUG > 2: logger.debug(" cloud cover check failed.") if IE_DEBUG > 0: failed.add('cloud_cover') continue if not check_coastline(cd, coverage_id, params, ccache, wcs_type): if IE_DEBUG > 2: logger.debug(" coastline check failed.") if IE_DEBUG > 0: failed.add('coastline check') continue if not check_custom_conditions(cd, params): if IE_DEBUG > 2: logger.debug(" custom conds check failed.") if IE_DEBUG > 0: failed.add('custom conditions') continue passed = passed + 1 ret.append(base_url + "&CoverageId=" + coverage_id) # disabled, not supported by ODA server for now: #ret.extend( extract_prods_and_masks(cd, True) ) if None != fp: fp.close() cd_tree = None if IE_DEBUG > 0: logger.info("EOID " + ` eoid ` + " cov descriptions passed: " + ` passed ` + " / " + ` len(cds) `) if IE_DEBUG > 0 and IE_DEBUG < 3: logger.info("EOID " + ` eoid ` + " summary of conditions failed: " + ` [f for f in failed] `) if IE_DEBUG > 1: dbg_gen_urls = "\n ".join(ret) logger.info("EOID " + ` eoid ` + " generated URLs:\n " + dbg_gen_urls) del cd_tree return ret