def lambda_handler(event, context):
    # TODO implement

    logger.info("lambda_function: aws_confing_es_endpoint: " +
                os.environ['aws_config_es_endpoint'])
    destination = os.environ['aws_config_es_endpoint']

    iso_now_time = datetime.datetime.now().isoformat()
    logger.info("lambda_function: Snapshot Time: " + str(iso_now_time))

    event_str = unicode(json.dumps(event)).encode("utf-8")
    logger.info("lambda_function: event_str: " + event_str)

    bucket = event['Records'][0]['s3']['bucket']['name']
    logger.info("lambda_function: bucket: " + bucket)

    snapshot_file_path = event['Records'][0]['s3']['object']['key']
    snapshot_file_path_unquote = unquote(str(snapshot_file_path))
    logger.info("lambda_function: snapshot_file_path_unquote: " +
                snapshot_file_path_unquote)

    s3conn = boto3.resource('s3')
    s3conn.meta.client.download_file(bucket, snapshot_file_path_unquote,
                                     DOWNLOADED_SNAPSHOT_FILE_NAME)

    es = elastic.ElasticSearch(connections=destination, log=None)
    es.set_not_analyzed_template()

    data = None
    if "_ConfigSnapshot_" in snapshot_file_path_unquote:
        logger.info(
            "lambda_function: checking if compressed ConfigSnapshot: " +
            snapshot_file_path_unquote)

        with gzip.open(DOWNLOADED_SNAPSHOT_FILE_NAME, 'rb') as dataFile:
            try:
                data = json.load(dataFile)
                load_data_into_es(data, iso_now_time, es)
            except Exception as e:
                logger.info("lambda_function: compressed: " + e.message)
    else:
        logger.info("lambda_function: Not a Config Snapshot file!")

    if "_ConfigSnapshot_" in snapshot_file_path_unquote:
        logger.info(
            "lambda_function: checking if uncompressed ConfigSnapshot: " +
            snapshot_file_path_unquote)

        with open(DOWNLOADED_SNAPSHOT_FILE_NAME) as dataFile:
            try:
                data = json.load(dataFile)
                load_data_into_es(data, iso_now_time, es)
            except Exception as e:
                logger.info("lambda_function: uncompressed: " + e.message)
    else:
        logger.info("lambda_function: Not a Config Snapshot file!")

    return
예제 #2
0
def ingest_to_elastic(query_results, index="bing", keys=KEYS):
    es = elastic.ElasticSearch()
    es.create_indices(mappings=elastic.BING_MAPPINGS)
    for record in query_results["value"]:
        body = dict(zip(keys,
                        [record["description"], record["url"], record["name"],
                        record["provider"][0]["name"], record["datePublished"],
                        record["description"] + record["name"]]
                    )
                )
        es.add_to_index(es, index, index, body)
예제 #3
0
def ingest_facts_into_es():
    # Initializing Spark Context and reading data in
    spark = SparkSession.builder.enableHiveSupport().getOrCreate()
    es = elastic.ElasticSearch()
    es.create_indices()

    population_df = spark.read.csv(POPULATION_PATH, header=True)
    state_rename_udf = functions.udf(
        lambda x: STATES[x.strip()] if STATES.get(x) else 0, "string")

    for year in YEARS_AVAILABLE:
        df = spark.read.csv(os.path.join(ARSON_PATH,
                                         FILE_PREFIX + year + ".csv"),
                            header=True)
        calculate_stats_with_spark(df, year, es)
        calculate_arson_density(population_df, df, year, state_rename_udf, es)
예제 #4
0
    app_log = logging.getLogger("app")
    app_log.setLevel(level=logging.INFO)

    # Setup the verbose logger
    verbose_log = logging.getLogger("verbose")
    if args.verbose:
        verbose_log.setLevel(level=logging.INFO)
    else:
        verbose_log.setLevel(level=logging.FATAL)

    # Mute all other loggers
    logging.getLogger("root").setLevel(level=logging.FATAL)
    logging.getLogger("botocore.credentials").setLevel(level=logging.FATAL)
    logging.getLogger(
        "botocore.vendored.requests.packages.urllib3.connectionpool").setLevel(
            level=logging.FATAL)
    logging.getLogger("boto3").setLevel(level=logging.FATAL)
    logging.getLogger("requests").setLevel(level=logging.FATAL)

    destination = None
    if args.destination is None:
        app_log.error(
            "You need to enter the IP of your ElasticSearch instance")
        exit()

    destination = "http://" + args.destination

    verbose_log.info("Setting up the elasticsearch instance")

    main(args, elastic.ElasticSearch(connections=destination, log=verbose_log))
예제 #5
0
파일: mapper.py 프로젝트: ahbuckfire/arson
import collections
import json

import pandas as pd

import elastic
from lookup_tables import STATE_CODES, STATES

es = elastic.ElasticSearch()

COLOR_SCALE = [[0.0, 'rgb(242,240,247)'], [0.2, 'rgb(218,218,235)'],
               [0.4, 'rgb(188,189,220)'], [0.6, 'rgb(158,154,200)'],
               [0.8, 'rgb(117,107,177)'], [1.0, 'rgb(84,39,143)']]
YEARS_AVAILABLE = map(str, range(2009, 2015))


def build_density_df(hits):
    d = collections.defaultdict(str)
    for row in hits:
        source = row["_source"]
        d[STATE_CODES[source["state"]]] = source["pop_density"]
    return pd.DataFrame(d.items(), columns=["state", "pop_density"])


def build_text_df(indices, year):
    desc = collections.defaultdict(str)
    for index in indices:
        response = es.query_index(index, year, field="year")
        for row in response:
            source = row["_source"]
            desc[source["state"]] += index + ": " + str(source[index]) + "<br>"