Beispiel #1
0
    def test_ingest_sprayday_hadoop_true(self, mock):
        """
        Test that ingest_sprayday actually sends the expected payload to Druid
        when using hadoop index tasks
        """
        file_url = "https://example.com/data.json"
        intervals = "2013-01-01/2013-01-02"

        with self.settings(DRUID_USE_INDEX_HADOOP=True):
            schema = get_sprayday_hadoop_schema()
            schema['spec']['dataSchema']['dataSource'] = sprayday_datasource
            schema['spec']['ioConfig']['inputSpec']['paths'] = file_url

            parse_spec = schema['spec']['dataSchema']['parser']['parseSpec']
            parse_spec['dimensionsSpec'] = dimensions_spec

            timestamp_column = settings.DRUID_TIMESTAMP_COLUMN
            parse_spec['timestampSpec']['column'] = timestamp_column

            schema['spec']['dataSchema']['granularitySpec']['intervals'] =\
                [intervals]
            schema_json = json.dumps(schema)

            ingest_sprayday(file_url, intervals)

            self.assertTrue(mock.called)
            args, kwargs = mock.call_args_list[0]
            self.assertEqual(args[0], schema_json)
            self.assertEqual(args[1], get_druid_indexer_url())
Beispiel #2
0
def get_historical_data(day=None, month=None, year=None):
    """
    Gets and stores data based on one or all of day, month and year
    returns filename
    """
    if any([year, month, day]):
        path = "/".join([str(x) for x in [year, month, day] if x is not None])
        filename = "{datasource}/".format(
            datasource=settings.DRUID_SPRAYDAY_DATASOURCE) + path +\
            "/sprayday.json"

        queryset = SprayDay.objects.all()
        if day:
            queryset = queryset.filter(spray_date__day=day)
        if month:
            queryset = queryset.filter(spray_date__month=month)
        if year:
            queryset = queryset.filter(spray_date__year=year)

        if queryset:
            intervals = get_druid_intervals(queryset)
            path = create_sprayday_druid_json_file(queryset=queryset,
                                                   filename=filename)
            url = get_s3_url(path)
            return ingest_sprayday(url, intervals=intervals)
Beispiel #3
0
def get_data(minutes=settings.DRUID_BATCH_PROCESS_TIME_INTERVAL):
    """
    Gets data submitted in the last x minutes and stores it
    returns filename
    """
    queryset = get_sprayday_queryset_from_x_minutes(minutes)
    if queryset:
        # get intervals
        first = queryset.first().data['_submission_time']
        last = queryset.last().data['_submission_time']
        intervals = get_druid_intervals(queryset, use_timestamp=True)
        filename = "{datasource}/minutes".format(
            datasource=settings.DRUID_SPRAYDAY_DATASOURCE) + \
            "/sprayday-{start_time}-{end_time}.json".format(start_time=first,
                                                            end_time=last)

        path = create_sprayday_druid_json_file(queryset=queryset,
                                               filename=filename)
        url = get_s3_url(path)
        return ingest_sprayday(url, intervals=intervals)
Beispiel #4
0
 def handle(self, *args, **options):
     path = options['path']
     ingest_sprayday(path)