Exemple #1
0
class ProductListManager:
    # POLYGON = 'POLYGON ((-6.604981356192942 49.438680703689379,-10.186858447403869 60.557572594302513,0.518974191882126 61.368840444480654,2.668100446608686 53.215944284612512,1.235349610124312 50.589462482174554,-6.604981356192942 49.438680703689379))'
    SEARCH_URL_BASE = 'https://scihub.copernicus.eu/apihub/search'

    def __init__(self, debug):
        self.config = ConfigManager("app.cfg")
        self.debug = debug
        self.logger = logging.getLogger('luigi-interface')

    def __get_last_ingestion_date(self, productList):
        topDate = None

        for product in productList["products"]:
            date = parser.parse(product["ingestionDate"]).date()
            if topDate is None or date > topDate:
                topDate = date

        return topDate

    def __get_search_url(self, lastIngestionDate, page):
        ingestionDateString = lastIngestionDate.strftime(
            '%Y-%m-%d') + 'T00:00:00.000Z'

        q = 'ingestiondate:[%s TO NOW] AND footprint:"Intersects(%s)"' % (
            ingestionDateString, self.config.get_search_polygon())

        if self.config.get_esa_searchCriteria() != None:
            q = '%s AND %s' % (q, self.config.get_esa_searchCriteria())

        criteria = {'start': page, 'rows': 100, 'q': q}

        url = ProductListManager.SEARCH_URL_BASE + \
            '?' + urlencode(criteria)

        if self.debug:
            self.logger.info("search url %s", url)

        return url

    def __get_xml_data(self, url, esaCredentials):

        buffer = BytesIO()

        try:
            c = pycurl.Curl()
            c.setopt(c.URL, str(url))
            c.setopt(c.USERPWD, esaCredentials)
            c.setopt(c.FOLLOWLOCATION, True)
            c.setopt(c.SSL_VERIFYPEER, False)
            c.setopt(c.WRITEFUNCTION, buffer.write)
            c.perform()
            c.close()
        except pycurl.error as e:
            msg = "Available product search failed  with error: %s" % (
                e.args[0], )
            self.logger.error(msg)
            # fail the search without an exception we want to continue
        body = buffer.getvalue()
        return body.decode('iso-8859-1')

    def __get_xml_element_tree(self, data):
        return eTree.fromstring(data)

    def __getGeometry(self, footprintText):

        footprint = {}
        centroid = {}
        geom = None

        try:
            feature = geojson.loads(footprintText)
            footprint = feature.geometry
            geom = shape(feature)
        except ValueError as e:
            # probably failed because footprintText is wkt
            geom = shapely.wkt.loads(footprintText)
            feature = geojson.Feature(geometry=geom)
            footprint = feature.geometry

        if not "crs" in footprint:
            footprint["crs"] = {
                "type": "name",
                "properties": {
                    "name": "EPSG:4326"
                }
            }

        centroidGeom = geom.centroid
        centroidFeature = geojson.Feature(geometry=centroidGeom)
        centroid = centroidFeature.geometry
        centroid["crs"] = {"type": "name", "properties": {"name": "EPSG:4326"}}

        return {"footprint": footprint, "centroid": centroid}

    def __add_products_to_list(self, rawProductsData, productList):
        root = self.__get_xml_element_tree(rawProductsData)

        for entry in root.iter('{http://www.w3.org/2005/Atom}entry'):
            uniqueId = entry.find('{http://www.w3.org/2005/Atom}id').text
            title = entry.find('{http://www.w3.org/2005/Atom}title').text
            footprint = ''
            orbitDirection = ''
            productType = ''
            beginPosition = ''
            endPosition = ''
            ingestionDate = ''
            for string in entry.iter('{http://www.w3.org/2005/Atom}str'):
                if 'name' in string.attrib:
                    if string.attrib['name'] == 'footprint':
                        footprint = string.text
                    if string.attrib['name'] == 'orbitdirection':
                        orbitDirection = string.text
                    if string.attrib['name'] == 'producttype':
                        productType = string.text
                    if string.attrib['name'] == 'platformname':
                        platform = string.text
            for string in entry.iter('{http://www.w3.org/2005/Atom}date'):
                if 'name' in string.attrib:
                    if string.attrib['name'] == 'ingestiondate':
                        ingestionDate = string.text
                    if string.attrib['name'] == 'beginposition':
                        beginPosition = string.text
                    if string.attrib['name'] == 'endposition':
                        endPosition = string.text
            for string in entry.iter('{http://www.w3.org/2005/Atom}int'):
                if 'name' in string.attrib:
                    if string.attrib['name'] == 'orbitnumber':
                        orbitNo = string.text
                    if string.attrib['name'] == 'relativeorbitnumber':
                        relOrbitNo = string.text

            geom = self.__getGeometry(footprint)

            product = {
                "uniqueId": uniqueId,
                "title": title,
                "footprint": geom["footprint"],
                "centroid": geom["centroid"],
                "productType": productType,
                "beginPosition": beginPosition,
                "endPosition": endPosition,
                "ingestionDate": ingestionDate,
                "platform": platform,
                "orbitDirection": orbitDirection,
                "orbitNo": orbitNo,
                "relOrbitNo": relOrbitNo
            }

            productList["products"].append(product)

    def __get_pages(self, rawProductsData):
        root = self.__get_xml_element_tree(rawProductsData)

        pages = 1

        totalResults = int(
            root.find(
                '{http://a9.com/-/spec/opensearch/1.1/}totalResults').text)

        if totalResults > 100:
            pages = int(math.ceil(totalResults / 100))

        return pages

    def create_list(self, runDate, productList, outputListFile, seedDate,
                    esaCredentials, dbConnectionString):
        lastIngestionDate = None

        if seedDate == constants.DEFAULT_DATE:
            lastIngestionDate = self.__get_last_ingestion_date(productList)
            # If latest record is older than 3 days, fail
            if lastIngestionDate is None:
                raise Exception("Unable to determine last ingestion date")
            if (runDate - lastIngestionDate).days > 3:
                raise Exception("Last ingestion date older then 3 days")
        else:
            lastIngestionDate = seedDate

        if lastIngestionDate is None:
            raise Exception("Unable to determine last ingestion date")

        page = 0
        pages = 1

        searchUrl = self.__get_search_url(lastIngestionDate, page)
        rawProductsData = self.__get_xml_data(searchUrl, esaCredentials)

        pages = self.__get_pages(rawProductsData)

        while page <= (pages - 1):
            self.__add_products_to_list(rawProductsData, productList)
            page = page + 1

            searchUrl = self.__get_search_url(lastIngestionDate, page)
            rawProductsData = self.__get_xml_data(searchUrl, esaCredentials)
            if rawProductsData == None:
                break

        # remove duplicate products
        # remove products that are already in the catalog
        with CatalogManager(dbConnectionString) as cat:
            productList["products"] = (seq(
                productList["products"]).distinct_by(
                    lambda x: x["uniqueId"]).filter(lambda x: cat.exists(x[
                        "uniqueId"]) != True)).to_list()

        outputListFile.write(json.dumps(productList))