Exemple #1
0
def main(argv: Optional[List[str]] = None) -> int:
    if argv is None:
        argv = sys.argv[1:]
    parser = argparse.ArgumentParser()
    parser.add_argument("query_file", type=str)
    parser.add_argument("--graph_names", type=str, default=["alti"], nargs="+")
    parser.add_argument("--max_age_min", type=int, default=1440)
    parser.add_argument("--raw", default=False, action="store_true")
    args_ns = parser.parse_args(argv)

    with open(args_ns.query_file, "r") as query_fp:
        query = query_fp.read()

    endpoint = discover_neptune_endpoint()
    client = AltimeterNeptuneClient(max_age_min=args_ns.max_age_min,
                                    neptune_endpoint=endpoint)

    if args_ns.raw:
        raw_results = client.run_raw_query(query=query)
        print(raw_results.to_csv(), end="")
    else:
        results = client.run_query(graph_names=args_ns.graph_names,
                                   query=query)
        print(results.to_csv(), end="")
    return 0
Exemple #2
0
def lambda_handler(event, context):
    graph_names_list = get_required_lambda_event_var(event, "graph_names")
    if not isinstance(graph_names_list, list):
        raise ValueError(f"Value for graph_names should be a list. Is {type(graph_names_list)}")
    graph_names = set(graph_names_list)
    query = get_required_lambda_event_var(event, "query")
    if not isinstance(query, str):
        raise ValueError(f"Value for query should be a str. Is {type(query)}")
    max_age_min = get_required_lambda_event_var(event, "max_age_min")
    if not isinstance(max_age_min, int):
        raise ValueError(f"Value for max_age_min should be an int. Is {type(max_age_min)}")

    host = get_required_lambda_env_var("NEPTUNE_HOST")
    port = get_required_lambda_env_var("NEPTUNE_PORT")
    region = get_required_lambda_env_var("NEPTUNE_REGION")
    results_bucket = get_required_lambda_env_var("RESULTS_BUCKET")

    endpoint = NeptuneEndpoint(host=host, port=port, region=region)
    client = AltimeterNeptuneClient(max_age_min=max_age_min, neptune_endpoint=endpoint)
    query_result = client.run_query(graph_names=graph_names, query=query)

    csv_results = query_result.to_csv()

    query_hash = hashlib.sha256(query.encode()).hexdigest()
    now_str = str(int(time.time()))
    results_key = "/".join(("-".join(graph_names), query_hash, f"{now_str}.csv"))
    s3_client = boto3.Session().client("s3")
    s3_client.put_object(Bucket=results_bucket, Key=results_key, Body=csv_results)

    return {
        "results_bucket": results_bucket,
        "results_key": results_key,
        "num_results": query_result.get_length(),
    }
Exemple #3
0
def run_query(job: schemas.Job, config: QueryConfig) -> QueryResult:
    """Run a query and return a QueryResult object"""
    endpoint = NeptuneEndpoint(host=config.neptune_host,
                               port=config.neptune_port,
                               region=config.neptune_region)
    neptune_client = AltimeterNeptuneClient(max_age_min=int(
        job.max_graph_age_sec / 60.0),
                                            neptune_endpoint=endpoint)
    query_result = neptune_client.run_query(graph_names=set(
        job.graph_spec.graph_names),
                                            query=job.query)
    return query_result
Exemple #4
0
def run_query(job: schemas.Job, config: QueryConfig) -> QueryResult:
    """Run a query and return a QueryResult object"""
    endpoint = NeptuneEndpoint(host=config.neptune_host,
                               port=config.neptune_port,
                               region=config.neptune_region)
    neptune_client = AltimeterNeptuneClient(max_age_min=int(
        job.max_graph_age_sec / 60.0),
                                            neptune_endpoint=endpoint)
    if job.raw_query:
        all_graph_metadatas = neptune_client.get_all_graph_metadatas()
        graph_uris_load_times: Dict[str, int] = {
            graph_metadata.uri: graph_metadata.end_time
            for graph_metadata in all_graph_metadatas
        }
        query_result_set = neptune_client.run_raw_query(query=job.query)
        query_result = QueryResult(graph_uris_load_times, query_result_set)
    else:
        query_result = neptune_client.run_query(
            graph_names=set(job.graph_spec.graph_names),
            query=job.query,
        )
    return query_result
def main(argv: Optional[List[str]] = None) -> int:
    if argv is None:
        argv = sys.argv[1:]
    parser = argparse.ArgumentParser()
    parser.add_argument("query_file", type=str)
    parser.add_argument("--graph_names", type=str, default=["alti"], nargs="+")
    parser.add_argument("--max_age_min", type=int, default=1440)
    parser.add_argument("--raw", default=False, action="store_true")
    parser.add_argument("--neptune_endpoint",
                        help="Neptune endpoint specified as host:port:region")
    args_ns = parser.parse_args(argv)

    with open(args_ns.query_file, "r") as query_fp:
        query = query_fp.read()

    if args_ns.neptune_endpoint is not None:
        try:
            host, port_str, region = args_ns.neptune_endpoint.split(":")
            port: int = int(port_str)
        except ValueError:
            print(
                f"neptune_endpoint should be a string formatted as host:port:region"
            )
            return 1
        endpoint = NeptuneEndpoint(host=host, port=port, region=region)
    else:
        endpoint = discover_neptune_endpoint()
    client = AltimeterNeptuneClient(max_age_min=args_ns.max_age_min,
                                    neptune_endpoint=endpoint)

    if args_ns.raw:
        raw_results = client.run_raw_query(query=query)
        print(raw_results.to_csv(), end="")
    else:
        results = client.run_query(graph_names=args_ns.graph_names,
                                   query=query)
        print(results.to_csv(), end="")
    return 0