Ejemplo n.º 1
0
def main(context):

    # create the run context from the config and context passed to main
    # this would allow dates etc to be passed from something external
    context = build_context(**context)

    extract_tweet = ExtractTweetDetailsOperator()
    extract_cves = ExtractCvesFromTweetOperator()
    save = SaveToBucketOperator(
        project=context['config'].get('target_project'),
        to_path=context['config'].get('target_path'),
        schema=Schema(context),
        compress=context['config'].get('compress'))
    end = EndOperator()

    flow = extract_tweet > extract_cves > save > end

    while True:
        try:
            listener = TwitterListener(api, flow)
            stream = tweepy.Stream(api.auth, listener, tweet_mode="extended")
            stream.filter(track=["CVE"], languages=["en"])
        except KeyboardInterrupt:
            print('Keyboard Interrupt')
            quit()
        except Exception as err:
            print(
                F"Error {type(err).__name__} {err} - restarting in 5 seconds")
            print(gva.errors.RenderErrorStack())

        time.sleep(5)
Ejemplo n.º 2
0
def main(context: dict = {}):
    # create the run context from the config and context passed to main
    # this would allow dates etc to be passed from something external
    context = build_context(**context)

    # get or default the dates
    start_date = context.get('start_date', datetime.date.today())
    end_date = context.get('end_date', datetime.date.today())

    # for each date in the range - run the daily job
    for date in date_range(start_date, end_date):
        logger.debug(F'Starting QID_CVE_MAP job for {date}')
        my_context = context.copy()
        my_context['date'] = date
        execute_day_of_processing(context=my_context)
        logger.debug(F'Completed QID_CVE_MAP job for {date}')
Ejemplo n.º 3
0
def main(context: dict = {}):
    context['config_file'] = 'MITRE_EDB_MAP.metadata'
    # create the run context from the config and context passed to main
    # this would allow dates etc to be passed from something external
    context = build_context(**context)

    # create the flow
    flow = build_flow(context)
    flow.run(
        data={},
        context=context,
        trace_sample_rate=context['config'].get('sample_rate'))

    # finalize the operators
    summary = flow.finalize()
    logger.trace(summary)
    return 'Finished Ingest'
Ejemplo n.º 4
0
def main(context: dict = {}):
    # create the run context from the config and context passed to main
    # this would allow dates etc to be passed from something external
    context = build_context(**context)
    flow = build_flow(context)

    # load the readers from the config
    reader = iterate_through_readers(context)
    # we're only interested in the CVE column
    reader = dictset.select_from(reader, columns=['CVE'])
    # we only want to see each CVE once
    reader = dictset.distinct(reader)

    for line in reader:
        flow.run(
                data=line,
                context=context,
                trace_sample_rate=context['config'].get('sample_rate'))
Ejemplo n.º 5
0
def main(context: dict = {}):
    # create the run context from the config and context passed to main
    # this would allow dates etc to be passed from something external
    context = build_context(**context)

    reader = create_data_reader(context)

    # create the flow
    flow = build_flow(context)

    for record in reader:
        flow.run(
            data=record,
            context=context,
            trace_sample_rate=context['config'].get('sample_rate'))

    # finalize the operators
    summary = flow.finalize()
    logger.trace(summary)
Ejemplo n.º 6
0
def main(context: dict = {}):
    context['config_file'] = 'MFL_LIST.metadata'
    # create the run context from the config and context passed to main
    # this would allow dates etc to be passed from something external
    context = build_context(**context)

    # the MFL isn't updated every day, so we probably need to look
    # back over previous days for the latest MFL
    found_mfl = False
    look_at_date = datetime.datetime.today()

    while not found_mfl:

        # the day to look for a copy of the MFL
        my_context = context.copy()
        my_context['look_at_date'] = look_at_date

        # create the data reader
        # convert to a list, the MFL is tiny (about 1Kb at time of writing)
        reader = list(create_data_reader(my_context))

        # if we found an MFL we can stop, otherwise look a day further in the past
        found_mfl = len(reader) > 0
        look_at_date = look_at_date - datetime.timedelta(1)

    # build the flow
    flow = build_flow(context)

    # execute the pipeline for each record in the reader
    for line in distinct(reader):
        flow.run(data=line,
                 context=context,
                 trace_sample_rate=context['config'].get('sample_rate'))

    # finalize the operators
    summary = flow.finalize()
    logger.trace(summary)
    return 'Finished Ingest'
Ejemplo n.º 7
0
def main(context: dict = {}):
    context['config_file'] = 'NVD_CVE_LIST.metadata'
    # create the run context from the config and context passed to main
    # this would allow dates etc to be passed from something external
    context = build_context(**context)

    # create the flow
    flow = build_flow(context)

    # NVD files are separate files per year
    currentYear = datetime.datetime.now().year
    for year in range(2018, currentYear + 1):
        my_context = context.copy()
        my_context['year'] = year
        flow.run(
            data={},
            context=my_context,
            trace_sample_rate=context['config'].get('sample_rate'))

    # finalize the operators
    summary = flow.finalize()
    logger.trace(summary)
    return 'Finished Ingest'