Example #1
0
    def eval_source(self, coordinates, coordinates_index, out, i, source=None):
        if source is None:
            source = self.source
            # Make a copy to prevent any possibility of memory corruption
            source = Node.from_definition(source.definition)

        success = False
        o = None
        while not success:
            if self.check_worker_available():
                try:
                    o = source.eval(coordinates, output=out)
                    success = True
                except self.async_exception:
                    # This exception is fine and constitutes a success
                    o = None
                    success = True
                except self.no_worker_exception as e:
                    response = e.response
                    if not (response and response.get("Error", {}).get("Code")
                            == "TooManyRequestsException"):
                        raise e  # Raise error again, not the right error
                    _log.debug("Worker {} exception {}".format(i, e))
                    success = False
                    time.sleep(self.sleep_time)
            else:
                _log.debug("Worker unavailable for {}".format(i, e))
                time.sleep(self.sleep_time)
        _log.info("Submitting source {}".format(i))
        return (o, coordinates_index)
Example #2
0
    def eval_source(self, coordinates, coordinates_index, out, i, source=None):
        if source is None:
            source = self.source
            # Make a copy to prevent any possibility of memory corruption
            source = Node.from_definition(source.definition)

        _log.info("Submitting source {}".format(i))
        return (source.eval(coordinates, output=out), coordinates_index)
Example #3
0
    def test_definition_inputs_dict(self):
        global MyNodeWithDictInput

        class MyNodeWithDictInput(Node):
            my_dict = tl.Dict().tag(attr=True)

        node1 = MyNodeWithDictInput(my_dict={"a": podpac.algorithm.Arange()})
        node2 = Node.from_definition(node1.definition)
        assert node2 is not node1 and node2 == node1
Example #4
0
    def test_definition_inputs_array(self):
        global MyNodeWithArrayInput

        class MyNodeWithArrayInput(Node):
            my_array = ArrayTrait().tag(attr=True)

        node1 = MyNodeWithArrayInput(my_array=[podpac.algorithm.Arange()])
        node2 = Node.from_definition(node1.definition)
        assert node2 is not node1 and node2 == node1
Example #5
0
    def eval_source(self, coordinates, coordinates_index, out, i, source=None):
        if source is None:
            source = self.source

        if self.skip_existing:  # This section allows previously computed chunks to be skipped
            dk = self.zarr_data_key
            if isinstance(dk, list):
                dk = dk[0]
            try:
                exists = self.zarr_node.chunk_exists(coordinates_index,
                                                     data_key=dk,
                                                     list_dir=self._list_dir,
                                                     chunks=self._chunks)
            except ValueError as e:  # This was needed in cases where a poor internet connection caused read errors
                exists = False
            if exists:
                _log.info("Skipping {} (already exists)".format(i))
                return out, coordinates_index

        # Make a copy to prevent any possibility of memory corruption
        source = Node.from_definition(source.definition)
        _log.debug("Creating output format.")
        output = dict(
            format="zarr_part",
            format_kwargs=dict(
                part=[[s.start, min(s.stop, self._shape[i]), s.step]
                      for i, s in enumerate(coordinates_index)],
                source=self.zarr_file,
                mode="a",
            ),
        )
        _log.debug("Finished creating output format.")

        if source.has_trait("output_format"):
            source.set_trait("output_format", output)
        _log.debug("output: {}, coordinates.shape: {}".format(
            output, coordinates.shape))
        _log.debug("Evaluating node.")

        o, slc = super(ZarrOutputMixin,
                       self).eval_source(coordinates, coordinates_index, out,
                                         i, source)

        if not source.has_trait("output_format"):
            o.to_format(output["format"], **output["format_kwargs"])
        return o, slc
Example #6
0
    def test_definition(self):
        # definition
        d = self.node.definition
        assert isinstance(d, OrderedDict)
        assert len(d) == 5

        # from_definition
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", "Insecure evaluation.*")
            node = Node.from_definition(d)

        assert node is not self.node
        assert node == self.node
        assert isinstance(node, podpac.algorithm.Arithmetic)
        assert isinstance(node.inputs["A"], podpac.algorithm.Arange)
        assert isinstance(node.inputs["B"], podpac.data.Array)
        assert isinstance(node.inputs["C"],
                          podpac.compositor.OrderedCompositor)
Example #7
0
    def test_style(self):
        node = podpac.data.Array(
            source=[10, 20, 30],
            coordinates=podpac.Coordinates([[0, 1, 2]], dims=["lat"]),
            style=Style(name="test", units="m"),
        )

        d = node.definition
        assert "style" in d[node.base_ref]

        node2 = Node.from_definition(d)
        assert node2 is not node
        assert isinstance(node2, podpac.data.Array)
        assert node2.style is not node.style
        assert node2.style == node.style
        assert node2.style.name == "test"
        assert node2.style.units == "m"

        # default style
        node = podpac.data.Array(source=[10, 20, 30],
                                 coordinates=podpac.Coordinates([[0, 1, 2]],
                                                                dims=["lat"]))
        d = node.definition
        assert "style" not in d[node.base_ref]
Example #8
0
def handler(event, context):
    """Lambda function handler
    
    Parameters
    ----------
    event : dict
        Description
    context : TYPE
        Description
    get_deps : bool, optional
        Description
    ret_pipeline : bool, optional
        Description
    """
    print (event)

    # Add /tmp/ path to handle python path for dependencies
    sys.path.append("/tmp/")

    # handle triggers
    trigger = get_trigger(event)

    # parse event
    pipeline = parse_event(trigger, event)

    # bail if we can't parse
    if pipeline is None:
        return

    # -----
    # TODO: remove when layers is configured
    # get configured bucket to download dependencies
    # If specified in the environmental variables, we cannot overwrite it. Otherwise it HAS to be
    # specified in the settings.
    bucket = os.environ.get("S3_BUCKET_NAME", pipeline["settings"].get("S3_BUCKET_NAME"))

    # get dependencies path
    if "FUNCTION_DEPENDENCIES_KEY" in pipeline["settings"] or "FUNCTION_DEPENDENCIES_KEY" in os.environ:
        dependencies = os.environ.get(
            "FUNCTION_DEPENDENCIES_KEY", pipeline["settings"].get("FUNCTION_DEPENDENCIES_KEY")
        )
    else:
        dependencies = "podpac_deps_{}.zip".format(
            os.environ.get("PODPAC_VERSION", pipeline["settings"].get("PODPAC_VERSION"))
        ) 
        if 'None' in dependencies:
            dependencies = 'podpac_deps.zip'  # Development version of podpac
        # this should be equivalent to version.semver()

    # Check to see if this function is "hot", in which case the dependencies have already been downloaded and are
    # available for use right away.
    if os.path.exists("/tmp/scipy"):
        print (
            "Scipy has been detected in the /tmp/ directory. Assuming this function is hot, dependencies will"
            " not be downloaded."
        )
    else:
        # Download dependencies from specific bucket/object
        print ("Downloading and extracting dependencies from {} {}".format(bucket, dependencies))
        s3 = boto3.client("s3")
        s3.download_file(bucket, dependencies, "/tmp/" + dependencies)
        subprocess.call(["unzip", "/tmp/" + dependencies, "-d", "/tmp"])
        sys.path.append("/tmp/")
        subprocess.call(["rm", "/tmp/" + dependencies])
        # -----

    # Load PODPAC

    # Need to set matplotlib backend to 'Agg' before importing it elsewhere
    import matplotlib

    matplotlib.use("agg")
    from podpac import settings
    from podpac.core.node import Node
    from podpac.core.coordinates import Coordinates
    from podpac.core.utils import JSONEncoder, _get_query_params_from_url
    import podpac.datalib

    # update podpac settings with inputs from the trigger
    settings.update(json.loads(os.environ.get("SETTINGS", "{}")))
    settings.update(pipeline["settings"])
  
    # build the Node and Coordinates
    if trigger in ("eval", "S3"):
        node = Node.from_definition(pipeline["pipeline"])
        coords = Coordinates.from_json(json.dumps(pipeline["coordinates"], indent=4, cls=JSONEncoder))

    # TODO: handle API Gateway better - is this always going to be WCS?
    elif trigger == "APIGateway":
        node = Node.from_url(pipeline["url"])
        coords = Coordinates.from_url(pipeline["url"])

    # make sure pipeline is allowed to be run
    if "PODPAC_RESTRICT_PIPELINES" in os.environ:
        whitelist = json.loads(os.environ["PODPAC_RESTRICT_PIPELINES"])
        if node.hash not in whitelist:
            raise ValueError("Node hash is not in the whitelist for this function")

    # run analysis
    output = node.eval(coords)

    # convert to output format
    body = output.to_format(pipeline["output"]["format"], **pipeline["output"]["format_kwargs"])

    # Response
    if trigger == "eval":
        return body

    elif trigger == "S3":
        s3.put_object(Bucket=settings["S3_BUCKET_NAME"], Key=pipeline["output"]["filename"], Body=body)

    elif trigger == "APIGateway":

        # TODO: can we handle the deserialization better?
        try:
            json.dumps(body)
        except Exception as e:
            print ("Output body is not serializable, attempting to decode.")
            body = body.decode()

        return {
            "statusCode": 200,
            "headers": {"Content-Type": pipeline["output"]["format"]},
            "isBase64Encoded": pipeline["output"]["format_kwargs"]["return_base64"],
            "body": body,
        }