def set_active_dar(self, scid, dar_id):
        # Also used for concurrency control.  There should be only one
        # active dar per scenario.  If it is not
        # empty and we are trying to set another one, we return False.
        # If it was empty there is no active dar underway, so
        # if we are trying to clear it again we also return false.
        #
        self._lock_db.acquire()
        try:
            ss = models.ScenarioStatus.objects.get(scenario_id=scid)
            old_dar = ss.active_dar
            if dar_id and old_dar:
                raise IngestionError(
                    "A DAR is already ative for scenario "+`scid`)
            if not dar_id and not old_dar:
                raise StopRequest('')
            ss.active_dar = dar_id
            ss.save()

        except StopRequest as e:
            return False

        except IngestionError as e:
            self._logger.error(`e`)
            return False

        except Exception as e:
            self._logger.error(`e`)

        finally:
            self._lock_db.release()
        return True
    def run_scripts(self, sc_id, ncn_id, scripts_args):
        if not scripts_args: return 0

        n_errors = 0
        for script_arg in scripts_args:
            if check_status_stopping(sc_id):
                raise StopRequest("Stop Request")

            self._logger.info("Running script: %s" % script_arg[0])
            r = subprocess.call(script_arg)
            if 0 != r:
                n_errors += 1
                self._logger.error(`ncn_id`+": script returned status:"+`r`)
        return n_errors
def ingestion_logic(scid, scenario_data):
    root_dl_dir = DownloadManagerController.Instance()._download_dir
    custom = scenario_data['extraconditions']

    eoids = scenario_data['dssids']
    if scenario_data['dsrc_type'] != DSRC_EOWCS_CHOICE:
        logger.warning('Data source type ' + scenario_data['dsrc_type'] +
                       ' is not implemented')

    if not os.access(root_dl_dir, os.R_OK | os.W_OK):
        raise IngestionError("Cannot write/read " + root_dl_dir)

    if 0 != DEBUG_MAX_DEOCS_URLS:
        logger.info(" DEBUG_MAX_DEOCS_URLS  = " + ` DEBUG_MAX_DEOCS_URLS `)
    if 0 != DEBUG_MAX_GETCOV_URLS:
        logger.info(" DEBUG_MAX_GETCOV_URLS = " + ` DEBUG_MAX_GETCOV_URLS `)

    nreqs = 0
    retval = (0, None, None, None, "", None)
    scenario_data["sc_id"] = scid
    scenario_data["custom"] = custom
    ncn_id = scenario_data["ncn_id"]
    dl_requests = get_download_URLs(scenario_data, eoids)
    if not dl_requests or 0 == len(dl_requests):
        logger.warning( ` ncn_id ` + ": no GetCoverage requests generated")
        retval = (0, None, None, None, "NO_ACTION", None)
    else:
        if check_status_stopping(scid):
            raise StopRequest("Stop Request")

        nreqs = len(dl_requests)
        logger.info( ` ncn_id ` + ": Submitting " + ` nreqs ` +
                     " URLs to the Download Manager")
        dl_dir, dar_url, dar_id = \
            request_download(scenario_data["ncn_id"], scid, dl_requests)
        dl_errors, failed_dirs, failed_urls = wait_for_download(
            scid, dar_url, dar_id, ncn_id)
        if len(failed_urls) > 0:
            logger.warning("Failed downloads for "+`ncn_id`+":\n" +\
                                 '\n'.join(failed_urls))

        logger.info("Products for scenario " + ncn_id + " downloaded to " +
                    dl_dir)
        retval = (dl_errors, dl_dir, dar_url, dar_id, "OK", failed_dirs)

    return retval
def getDssList(scid, eo_dss_list, aoi_toi, wcs_type):
    # get list of datasets that overlap bbox and timeperiod
    id_list = []
    req_bb, req_time = aoi_toi
    for dss in eo_dss_list:
        if scid and check_status_stopping(scid):
            raise StopRequest("Stop Request")

        timeperiod = extract_TimePeriod(dss)
        if None == timeperiod:
            logger.warning("Failed to extract time range from " + ` dss `)
            continue
        if not req_time.overlaps(timeperiod):
            continue

        bb1 = extract_WGS84bbox(dss)
        if None == bb1:
            logger.warning("Failed to extract bb from " + ` dss `)
            continue
        if bb1.overlaps(req_bb):
            id_list.append(extract_Id(dss, wcs_type))

    return id_list
def urls_from_EOWCS(params, eoids):

    caps = get_caps_from_pf(params['dsrc'], True)

    if check_status_stopping(params["sc_id"]):
        raise StopRequest("Stop Request")

    if None == caps:
        raise IngestionError("cannot get Capabilities from '" +
                             params['dsrc'] + "'")

    service_version = extract_ServiceTypeVersion(caps).strip()
    wcs_type = determine_wcs_type(caps)

    aoi_toi = build_aoi_toi(params["aoi_bbox"], params['from_date'],
                            params['to_date'])

    if len(eoids) > 0:
        # use only the dssids specified, don't look for more.
        id_list = eoids
    else:
        # find all datasets that match the bbox and Toi
        wcseo_dss = extract_DatasetSeriesSummaries(caps, wcs_type)
        id_list = getDssList(params["sc_id"], wcseo_dss, aoi_toi, wcs_type)

    if IE_DEBUG > 0:
        logger.debug("wcs_type = " + ` wcs_type `)

    caps = None  # no longer needed

    md_urls = generate_MD_urls(params, service_version, id_list)
    if IE_DEBUG > 1:
        logger.debug("Qualified " + ` len(md_urls) ` + " md_urls")
    dl_requests = process_csDescriptions(params, aoi_toi, service_version,
                                         wcs_type, md_urls)
    return dl_requests
    def ingest_func(self,parameters):
        if IE_DEBUG > 0:
            self._logger.info(
                "wfm: executing INGEST_SCENARIO, id=" +\
                    `parameters["scenario_id"]`)

        percent = 1
        sc_id = parameters["scenario_id"]
        ncn_id = None
        final_status = "OK"
        self._wfm.set_scenario_status(
            self._id, sc_id, 0, "GENERATING URLS", percent)
        try:
            scenario = models.Scenario.objects.get(id=sc_id)
            ncn_id   = scenario.ncn_id.encode('ascii','ignore')
            cat_reg  = scenario.cat_registration

            # ingestion_logic blocks until DM is finished downloading
            self._wfm.set_ingestion_pid(sc_id, os.getpid())
            dl_errors, dl_dir, dar_url, dar_id, status, failed_dirs = \
                ingestion_logic(sc_id, models.scenario_dict(scenario))

            if check_status_stopping(sc_id):
                raise StopRequest("Stop Request")

            n_errors = 0
            if status == "NO_ACTION":
                final_status = "NOTHING INGESTED"
            else:
                if None == dar_id:
                    raise IngestionError("No DAR generated")

                s2pre = scenario.s2_preprocess
                if s2pre != 'NO':
                    # s2pre is functional only for local ingestion
                    self._logger.error(
                        "S2 Preprocessor is not implemented for data from product facility."+
                        " Hint: use local ingestion instead")
                    s2pre = 'NO'

                n_errors = self.post_download_actions(
                    sc_id,
                    ncn_id,
                    dl_dir,
                    parameters["scripts"],
                    cat_reg,
                    s2pre,
                    scenario.tar_result,
                    failed_dirs)

            n_errors += dl_errors
            if n_errors>0:
                raise IngestionError(`ncn_id`+": ingestion encountered "+ `n_errors` +" errors")

            # Finished
            if "OK" == final_status:
                d_str = time.strftime('%Y-%m-%d %H:%M', time.gmtime())
                final_status += ' ' + d_str
            self._wfm.set_scenario_status(self._id, sc_id, 1, final_status, 0)
            self._logger.info(`ncn_id`+": ingestion completed.")

        except StopRequest as e:
            self._logger.info(`ncn_id`+": Stop request from user: Ingestion Stopped")
            self._wfm.set_scenario_status(self._id, sc_id, 1, "STOPPED, IDLE", 0)

        except Exception as e:
            self._logger.error(`ncn_id`+" Error while ingesting: " + `e`)
            self._wfm.set_scenario_status(self._id, sc_id, 1, "INGEST ERROR", 0)
            if IE_DEBUG > 0:
                traceback.print_exc(12,sys.stdout)

        finally:
            self._wfm.set_ingestion_pid(sc_id, 0)
    def post_download_actions(self,
                              scid,
                              ncn_id,
                              dl_dir,
                              scripts,
                              cat_reg,
                              s2pre,
                              tar_result,
                              failed_dirs):
        # For each product that was downloaded into its seperate
        # directory, generate a product manifest for the ODA server,
        # and also split each downloaded product into its parts.
        # Then run the post- ingestion scripts.
        # TODO: the splitting could be done by the EO-WCS DM plugin
        #       instead of doing it here
        dir_list = os.listdir(dl_dir)
        n_dirs = len(dir_list)
        n_errors = 0
        i = 1
        for d in dir_list:
            process = True
            for f in failed_dirs:
                if d in f:
                    self._logger.info("Not proceesing dir (download had failed): " + f)
                    process = False
                    n_errors += 1
                    break

            if not process:
                continue
            self._logger.info("Processing dir " + d)
            
            percent  = 100 * (float(i) / float(n_dirs))
            # keep percent > 0 to ensure webpage updates
            if percent < 1.0: percent = 1
            self._wfm.set_scenario_status(self._id, scid, 0, "RUNNING SCRIPTS", percent)

            try:
                mf_name, metafiles = split_and_create_mf(
                    dl_dir, d, ncn_id, self._logger)
            except Exception as e:
                self._logger.info("Exception" + `e`)
                mf_name = None
            if not mf_name:
                self._logger.info("Error processing download directory " + `d`)
                n_errors += 1
                continue

            # archive products that were downloaded
            for m in metafiles:
                archive_metadata(scid, m)

            scripts_args = self.mk_scripts_args(
                scripts, mf_name, cat_reg)
            n_errors += self.run_scripts(scid, ncn_id, scripts_args)

            i += 1

        # run the tar script if requested
        if tar_result:
            if check_status_stopping(scid):
                raise StopRequest("Stop Request")

            tar_script = os.path.join(IE_SCRIPTS_DIR, IE_TAR_RESULT_SCRIPT)
            script_arg = [tar_script, dl_dir]
            if cat_reg:
                script_arg.append(self.mk_catreg_arg())
            self._logger.info(`ncn_id`+": running " + `script_arg`)
            r = subprocess.call(script_arg)
            if 0 != r:
                n_errors += 1
                self._logger.error(`ncn_id`+": tar script returned status:"+`r`)
            else:
                self._logger.info(`ncn_id`+": tar file is ready: " +
                                  dl_dir + IE_TAR_FILE_SUFFIX);

        return n_errors
def wait_for_download(scid, dar_url, dar_id, ncn_id, max_wait=None):
    """
    scid may be None

    blocks until the DM reports that the DAR with this dar_url
    has completed all constituent individual product downloads
    """

    if None == ncn_id: ncn_id = "(None)"

    set_status(scid, "Downloading", 1)

    request = get_dar_status(dar_url)

    if check_status_stopping(scid):
        stop_download(scid, request)
        raise StopRequest("Stop Request")

    if None == request:
        # wait and try again
        time.sleep(DAR_STATUS_INTERVAL)
        request = get_dar_status(dar_url)
        if None == request:
            time.sleep(1)
            request = get_dar_status(dar_url)
            if check_status_stopping(scid):
                stop_download(scid, request)
                raise StopRequest("Stop Request")
        if None == request:
            time.sleep(1)
            request = get_dar_status(dar_url)
        if None == request:
            if None != scid: wfm_clear_dar(scid)
            raise DMError(
                "Bad DAR status from DM; no 'dataAccessRequests' found.")

    product_list = request["productList"]
    n_products = len(product_list)
    total_percent = n_products * 100
    all_done = False
    n_done = 0
    total_size = 0
    n_errors = 0
    failed_urls = []
    failed_dirs = []
    try:
        ts = time.time()
        tdiff = 0
        last_status = {}
        last_st_message = ""
        while not all_done:
            tdiff = time.time() - ts
            all_done = True
            if None != max_wait and tdiff > max_wait:
                n_errors += 1
                logger.warning("Time-out waiting for download")
                break
            part_percent = 0
            n_done = 0
            n_errors = 0
            for product in product_list:
                if "productProgress" not in product:
                    continue

                dl_dir = product["downloadDirectory"]
                progress = product["productProgress"]
                dl_status = progress["status"]

                if dl_status == "IN_ERROR":
                    if "message" in progress: msg = progress["message"]
                    else: msg = "(none)"
                    if "uuid" in product: uuid = product["uuid"]
                    else: uuid = "(unknown)"
                    if "productAccessUrl" in product:
                        url = product["productAccessUrl"]
                    else:
                        url = "(unknown)"

                    if url not in failed_urls:
                        n_errors += 1
                        n_done += 1
                        failed_urls.append(url)
                        failed_dirs.append(dl_dir)
                        logger.info("Dl Manager reports 'IN_ERROR' for uuid " +
                                    uuid + ", message: " + msg + "\n url: " +
                                    url)
                        dl_report = simplejson.dumps(product, indent=2)
                        logger.info("Dl Manager status: \n" + dl_report)

                elif dl_status == "COMPLETED":
                    n_done += 1

                else:
                    all_done = False

                if IE_DEBUG > 0:
                    prod_uuid = None
                    if 'uuid' in product:
                        prod_uuid = product['uuid']
                    else:
                        prod_uuid = 'unknown'
                    if not (prod_uuid in last_status and \
                            last_status[prod_uuid] == dl_status):
                        logger.debug("Status from DM: " + ` dl_status ` +
                                     ", prod. uuid=" + ` prod_uuid `)
                        last_status[prod_uuid] = dl_status

                if "progressPercentage" not in progress:
                    part_percent += 100
                else:
                    part_percent += progress["progressPercentage"]

                if "downloadedSize" in progress:
                    total_size += progress["downloadedSize"]

            percent_done = int(
                (float(part_percent) / float(total_percent)) * 100)
            if percent_done < 1: percent_done = 1
            if all_done:
                if n_errors > 0:
                    set_status(scid, ` n_errors ` + " errors during Dl.",
                               percent_done)
                else:
                    set_status(scid, "Finished Dl. (" + ` n_products ` + ")",
                               percent_done)
                if total_size < 102400:
                    ts = ` total_size ` + ' bytes'
                else:
                    ts = ` total_size / 1024 ` + ' kb'
                logger.info("Dl Manager reports downloaded "+ts+\
                                " in " + `n_products`+ ' products')
                break
            elif check_status_stopping(scid):
                stop_download(scid, request)
                raise StopRequest("Stop Request")
            else:
                status_message = "Downloading (" + ` n_done ` + '/' + ` n_products ` + ")"
                set_status(scid, status_message, percent_done)
                new_st_message = ncn_id + " Status: " + status_message + " done: " + ` percent_done ` + "%"
                if new_st_message != last_st_message:
                    last_st_message = new_st_message
                    logger.info(new_st_message)

            if check_status_stopping(scid):
                stop_download(scid, request)
                raise StopRequest("Stop Request")

            sleep_time = DAR_STATUS_INTERVAL
            if tdiff > (32 * DAR_STATUS_INTERVAL):
                sleep_time = 5 * DAR_STATUS_INTERVAL
            elif tdiff > (6 * DAR_STATUS_INTERVAL):
                sleep_time = 2 * DAR_STATUS_INTERVAL
            time.sleep(sleep_time)
            request = get_dar_status(dar_url)

            if check_status_stopping(scid):
                stop_download(scid, request)
                raise StopRequest("Stop Request")

            product_list = request["productList"]

        # all done

        last_status = None

        if n_errors > 0:
            logger.info("Completed download with " + ` n_errors ` + " errors")

    except StopRequest:
        logger.info("StopRequest while waiting for download")
        raise

    except Exception as e:
        logger.warning("Unexpected exception in wait_for_download: " + ` e `)
        if IE_DEBUG > 0:
            traceback.print_exc(12, sys.stdout)
            raise e

    finally:
        if None != scid: wfm_clear_dar(scid)

    return n_errors, failed_dirs, failed_urls
def process_csDescriptions(params, aoi_toi, service_version, wcs_type,
                           md_urls):
    """ Input: md_urls is a tuple, where each element is a pair containg
                   the MetaData URL and its EOID :  (MetaData_URL, EOID)
               aoi_toi is a tuple containing Area-of-interest
               Bounding-Box and the Time of Interest time range
        Each md_url is accessed in turn to get the metatada from the
        product facility.
        The MD is expected to contain a wcseo:EOCoverageSetDescription,
          with a number of coverageSetDescriptions
    """
    logger.info("Processing " + ` len(md_urls) ` +
                " EOCoverageSetDescription urls.")

    base_url = params['dsrc'] + \
        "?" + SERVICE_WCS + \
        '&version=' + service_version + \
        "&" + WCS_GET_COVERAGE +\
        "&" + WCS_IMAGE_FORMAT

    if params['download_subset']:
        if WCS_TYPE_DRAFT201 == wcs_type:
            base_url += \
                "&subset=Lat," +EPSG_4326+"("+`aoi_toi[0].ll[1]`+","+`aoi_toi[0].ur[1]`+")"+\
                "&subset=Long,"+EPSG_4326+"("+`aoi_toi[0].ll[0]`+","+`aoi_toi[0].ur[0]`+")"
        else:
            base_url += \
                "&subset=lat("+`aoi_toi[0].ll[1]`+","+`aoi_toi[0].ur[1]`+")"+\
                "&subset=lon("+`aoi_toi[0].ll[0]`+","+`aoi_toi[0].ur[0]`+")"+\
                "&subsettingcrs="+EPSG_4326

    dl_reqests = []
    ndeocs = 0  # number of DescribeEOCoverageSet urls processed

    toteocs = float(len(md_urls))

    coastcache = None
    if should_check_coastline(params):
        shpfile = IE_30KM_SHPFILE
        prjfile = None
        coastcache = None
        try:
            coastcache = coastline_cache_from_aoi(shpfile, prjfile, aoi_toi[0])
        except Exception as e:
            logger.error(
                "NOT checking coastline due to Error initialising coastline:\n"
                + ` e `)

    for md_url_pair in md_urls:
        md_url = md_url_pair[0]
        eoid = md_url_pair[1]
        if check_status_stopping(params["sc_id"]):
            raise StopRequest("Stop Request")

        #make sure percent_done is > 0
        percent_done = (float(ndeocs) / toteocs) * 100.0
        if percent_done < 0.5: percent_done = 1.0
        set_status(params["sc_id"], "Create DAR: get MD", percent_done)

        logger.info("Processing MD for EOID " + ` eoid `)
        if 0 != DEBUG_MAX_DEOCS_URLS:
            if ndeocs > DEBUG_MAX_DEOCS_URLS: break
            ndeocs += 1

        dl_reqests += gen_dl_urls(params, aoi_toi, base_url, md_url, eoid,
                                  coastcache, wcs_type)

        if 0 != DEBUG_MAX_GETCOV_URLS and dl_reqests:
            dl_reqests = dl_reqests[:DEBUG_MAX_GETCOV_URLS]

    coastcache = None

    set_status(params["sc_id"], "Create DAR: get MD", 100)
    return dl_reqests
Ejemplo n.º 10
0
def gen_dl_urls(params, aoi_toi, base_url, md_url, eoid, ccache, wcs_type):
    """ params is the dictionary of input parameters
        aoi_toi is a tuple containing Area-of-interest Bounding-Box and
                the Time of Interest time range
        md_url  is the metadata url

        This function generates Download URLs:
           1. get_coverage requests based on metadata from the
              DescribeEOCoverageSet request - depending if the metadata
              matches the scenario params, and
           2. Other ULR refs found in the metadata the product and mask
              elements:
                //eop:product//eop:fileName/ows:ServiceReference[@xlink:href]
                //eop:mask//eop:fileName/ows:ServiceReference[@xlink:href]
    """

    if IE_DEBUG > 0:
        logger.info("Generating DL-URLS from DescribeEOCoverageSet: '" +
                    md_url + "'")

    ret = []
    scid = params['sc_id']

    # cd_tree: coverage description tree extracted from the
    #          metadata XML file
    (fp, cd_tree) = getXmlTree(md_url, EOCS_DESCRIPTION_TAG)

    if check_status_stopping(scid):
        if None != fp: fp.close()
        raise StopRequest("Stop Request")

    if None == cd_tree:
        if None != fp: fp.close()
        return ret

    if IE_DEBUG > 1:
        try:
            nreturned = cd_tree.attrib['numberReturned']
            nmatched = cd_tree.attrib['numberMatched']
            logger.info("    MD reports nreturned = "+`nreturned`+\
                            ", nmatched =  "+`nmatched`)
        except KeyError:
            pass

    cds = get_coverageDescriptions(cd_tree)
    if len(cds) < 1:
        logger.warning("No CoverageDescriptions found in '" + md_url + "'")

    should_check_archived = True
    if 'check_arch' in params:
        should_check_archived = params['check_arch']
    failed = set()
    passed = 0
    for cd in cds:

        if check_status_stopping(scid):
            if None != fp: fp.close()
            raise StopRequest("Stop Request")

        coverage_id = extract_CoverageId(cd)
        if None == coverage_id:
            logger.error("EOID " + ` eoid ` + " Cannot find CoverageId in '" +
                         md_url + "'")
            continue
        if IE_DEBUG > 2:
            logger.debug("  coverage_id=" + coverage_id)

        if should_check_archived and check_archived(scid, coverage_id):
            if IE_DEBUG > 0:
                logger.debug("  coverage_id='" + coverage_id +
                             "' is achived, not downloading.")
            continue

        if not check_bbox(cd, aoi_toi[0]):
            if IE_DEBUG > 2: logger.debug("  bbox check failed.")
            if IE_DEBUG > 0: failed.add('bbox')
            continue

        if not check_timePeriod(cd, aoi_toi[1], md_url, wcs_type):
            if IE_DEBUG > 2: logger.debug("  TimePeriod check failed.")
            failed.add('TimePeriod')
            continue

        if not check_text_condition(cd, params, 'sensor_type',
                                    xpaths_sensor(wcs_type)):
            if IE_DEBUG > 2: logger.debug("  sensor type check failed.")
            if IE_DEBUG > 0: failed.add('sensor_type')
            continue

        if not check_float_max(cd, params, 'view_angle',
                               xpaths_incidenceangle(wcs_type), True):
            if IE_DEBUG > 2: logger.debug("  incidence angle check failed.")
            if IE_DEBUG > 0: failed.add('view_angle')
            continue

        if not check_float_max(cd, params, 'cloud_cover',
                               xpaths_cloudcover(wcs_type)):
            if IE_DEBUG > 2: logger.debug("  cloud cover check failed.")
            if IE_DEBUG > 0: failed.add('cloud_cover')
            continue

        if not check_coastline(cd, coverage_id, params, ccache, wcs_type):
            if IE_DEBUG > 2: logger.debug("  coastline check failed.")
            if IE_DEBUG > 0: failed.add('coastline check')
            continue

        if not check_custom_conditions(cd, params):
            if IE_DEBUG > 2: logger.debug("  custom conds check failed.")
            if IE_DEBUG > 0: failed.add('custom conditions')
            continue

        passed = passed + 1
        ret.append(base_url + "&CoverageId=" + coverage_id)

        #  disabled, not supported by ODA server for now:
        #ret.extend( extract_prods_and_masks(cd, True) )

    if None != fp: fp.close()
    cd_tree = None
    if IE_DEBUG > 0:
        logger.info("EOID " + ` eoid ` + " cov descriptions passed: " +
                    ` passed ` + " / " + ` len(cds) `)
    if IE_DEBUG > 0 and IE_DEBUG < 3:
        logger.info("EOID " + ` eoid ` + " summary of conditions failed: " +
                    ` [f for f in failed] `)

    if IE_DEBUG > 1:
        dbg_gen_urls = "\n    ".join(ret)
        logger.info("EOID " + ` eoid ` + " generated URLs:\n    " +
                    dbg_gen_urls)
    del cd_tree
    return ret