Beispiel #1
0
def submitJob(job_id):
    '''
    Whenever a job status is set to active in the database, the 
    signal attached to the model causes the job to be submitted.
    This causes this task (a celery task) to run, and submit
    the job to the tool.
    '''
    from NMTK_server import models
    job=models.Job.objects.get(pk=job_id)
    # Get a logger to log status for this task.
    logger=submitJob.get_logger()
    logger.debug('Submitting job %s to tool %s for processing', job.pk,
                 job.tool)
    
    configuration={'analysis settings': job.config }
    configuration['job']= {'tool_server_id': "%s" % (job.tool.tool_server.tool_server_id,),
                           'job_id': str(job.job_id),
                           'timestamp': timezone.now().isoformat() }

    config_data=json.dumps(configuration, use_decimal=True) #cls=DjangoJSONEncoder)
    digest_maker =hmac.new(str(job.tool.tool_server.auth_token), 
                           config_data, 
                           hashlib.sha1)
    digest=digest_maker.hexdigest()
    
    files= {'config': ('config', config_data) }
    for jobfile in job.jobfile_set.all():
        if jobfile.datafile.processed_file:
            files[jobfile.namespace]=(jobfile.datafile.processed_file.name, jobfile.datafile.processed_file)
        else:
            files[jobfile.namespace]=(jobfile.datafile.file.name, jobfile.datafile.file)
    logger.debug('Files for job are %s', files)
    r=requests.post(job.tool.analyze_url, files=files,
                    headers={'Authorization': digest })
    logger.debug("Submitted job to %s tool, response was %s (%s)", 
                 job.tool, r.text, r.status_code)
    if r.status_code <> 200:
        job.status=job.TOOL_FAILED
        js=models.JobStatus(job=job,
                            message=('Tool failed to accept ' + 
                                     'job (return code %s)') % (r.status_code,))
        js.save()
        job.save()
    else:
        status_m=models.JobStatus(message='Submitted job to {0} tool, response was {1} ({2})'.format(job.tool, 
                                                                                                 r.text, 
                                                                                                 r.status_code),
                              timestamp=timezone.now(),
                              job=job)
        status_m.save()
Beispiel #2
0
def updateStatus(request):
    '''
    Update the status of a job in the database.  In this case we
    expect there to be three keys in the json payload:
     - timestamp - a timestamp as to when the status update was
                   generated by the tool (ISO8601 formatted!)
     - status - a status message (max length 1024 bytes) to use to
                update the job status.
     - category - A category for the message, valid values are
                  Debug, Message, Warning, or Error (default is Message)
    '''
    logger.debug('Updating status for job id %s', request.NMTK_JOB.pk)
    json_data = json.loads(request.FILES['config'].read())
    logger.debug('Read updated status of %s', json_data)
    try:
        # Convert the client supplied category to one of our choice values.
        if json_data.has_key('category'):
            logger.debug('Key is there!')
            for k, v in models.JobStatus.CATEGORY_CHOICES:
                logger.debug('Check for match! %s, %s', v.lower(),
                             json_data['category'].lower())
                if v.lower() == json_data['category'].lower():
                    json_data['category'] = k
                    break
            else:
                logger.debug('Did not find matching status! using info')
                del json_data['category']
        # Parse the timestamp provided to a regular datetime value.
        if json_data.has_key('timestamp'):
            try:
                json_data['timestamp'] = dateutil.parser.parse(
                    json_data['timestamp'])
            except Exception, e:
                logger.error(
                    'Tool server passed in invalid timestamp data: %s',
                    json_data['timestamp'])
                del json_data['timestamp']
    except:
        json_data = {'status': data}
    # Some defaults to use for missing data.
    status_data = {
        'timestamp': timezone.now(),
        'category': models.JobStatus.CATEGORY_STATUS
    }
    if (json_data.has_key('category')
            and json_data['category'] in (models.JobStatus.CATEGORY_SYSTEM, )):
        logger.info('Tool tried to set category to a system category!')
        status_data['category'] = models.JobStatus.CATEGORY_ERROR
    status_data.update(json_data)
    logger.debug('Final updated status is %s', status_data)

    status_m = models.JobStatus(message=status_data['status'],
                                timestamp=status_data['timestamp'],
                                job=request.NMTK_JOB,
                                category=status_data['category'])
    status_m.save()
    return HttpResponse(json.dumps(
        {'status': 'Status added with key of %s' % (status_m.pk)}),
                        content_type='application/json')
Beispiel #3
0
def updateStatus(request):
    '''
    Update the status of a job in the database.  In this case we 
    expect there to be two keys in the json payload:
     - timestamp - a timestamp as to when the status update was
                   generated by the tool
     - status - a status message (max length 1024 bytes) to use to 
                update the job status.
    '''
    logger.debug('Updating status for job id %s', request.NMTK_JOB.pk)
    data = request.FILES['data'].read()
    logger.debug('Read updated status of %s', data)

    status_m = models.JobStatus(message=data,
                                timestamp=timezone.now(),
                                job=request.NMTK_JOB)
    status_m.save()
    return HttpResponse(json.dumps(
        {'status': 'Status added with key of %s' % (status_m.pk)}),
                        content_type='application/json')
Beispiel #4
0
def processResults(request):
    '''
    The idea here is that this URL always posts successful results, though
    in reality it probably ought to be enhanced to accept both success
    results as well as failure results.  We can worry about handling that
    based on content type.
    '''
    config = json.loads(request.FILES['config'].read())

    base_description = "Results from '{0}'".format(
        request.NMTK_JOB.description)
    if config['status'] == 'results':
        models.JobStatus(message='Received results from Tool Server',
                         timestamp=timezone.now(),
                         job=request.NMTK_JOB).save()
        if ('results' not in config or 'field' not in config['results']
                or 'file' not in config['results']):
            logger.error(
                'Results received with no valid results key ' +
                'in config (old API?) (%s)', config)
            models.JobStatus(
                message='Unable to authenticate request from tool server.',
                timestamp=timezone.now(),
                job=request.NMTK_JOB).save()
            return HttpResponseServerError('Invalid result format')
        result_field = config['results'].get('field', None)
        result_field_units = field_units = None
        field_units = {}
        # field units can be an object also, in which case it's really
        # a set of fields and their units:
        if 'units' in config['results']:
            if result_field and isinstance(
                    config['results'].get('units', None), (str, unicode)):
                result_field_units = config['results']['units']
                field_units[result_field] = result_field_units
            elif 'units' in config['results']:
                try:
                    result_field_units = config['results']['units'].get(
                        result_field, None)
                    field_units = config['results']['units']
                except Exception as e:
                    logger.debug(
                        'Failed to parse field units' +
                        ', expected an object: %s', e)

        # the optional ordered list of fields, we require a list
        # of field names, and use a default of nothing if such a list isn't
        # provided.
        field_order = config['results'].get('field_order', None)
        if field_order is None:
            field_order = []
        elif not isinstance(
                field_order,
            (list, tuple),
        ):
            logger.error(
                'Result field_order should be a list or ' +
                'tuple, not %s: %s', type(field_order), str(field_order))
            field_order = []
        logger.debug('Default field order is %s', field_order)
        # Now we have the field order provided by the tool itself
        # which we need to (eventually) augment with the fields from
        # the job itself.

        result_file = config['results']['file']

        if config['results']['file'] not in request.FILES:
            logger.error('Specified file for results was not uploaded')
            models.JobStatus(
                message='Tool server failed to upload required results file.',
                timestamp=timezone.now(),
                job=request.NMTK_JOB).save()
            return HttpResponseServerError('Invalid result file specified')
        total = len(request.FILES) - 1

        i = 0
        timedata = datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
        for namespace in request.FILES.keys():
            if namespace == 'config':
                continue
            i += 1
            if (total > 1):
                description = base_description + \
                    ' ({0} of {1} files)'.format(i, total)
            else:
                description = base_description
            kwargs = {}
            if namespace == result_file:
                primary = True
                field = result_field
            else:
                field = None
                primary = False
            result = models.DataFile(
                user=request.NMTK_JOB.user,
                #                                      name="job_{0}_results".format(
                # request.NMTK_JOB.pk),
                name="{0}_{1}".format(namespace, timedata),
                # name=os.path.basename(request.FILES[result_file].name),
                description=description,
                content_type=request.FILES[namespace].content_type,
                type=models.DataFile.JOB_RESULT,
                fields=field_order,
                units=field_units,
                result_field=field,
                result_field_units=result_field_units)
            filename = os.path.basename(request.FILES[namespace].name)
            result.file.save(filename,
                             ContentFile(request.FILES[namespace].read()),
                             save=False)

            if namespace == result_file:
                # Pass in the job here so that the data file processor knows to
                # update the job when this is done (only happens with primary
                # file.)
                result.save(job=request.NMTK_JOB)
            else:
                result.save()
            # Save the linkage back to the job...
            rf = models.ResultsFile(job=request.NMTK_JOB,
                                    datafile=result,
                                    primary=primary)
            rf.save()

        request.NMTK_JOB.status = request.NMTK_JOB.POST_PROCESSING
        request.NMTK_JOB.save()

        models.JobStatus(message='Post processing results file(s)',
                         timestamp=timezone.now(),
                         job=request.NMTK_JOB).save()
    elif config['status'] == 'error':
        logger.debug('config is %s', config)
        models.JobStatus(message='\n'.join(config['errors']),
                         timestamp=timezone.now(),
                         job=request.NMTK_JOB).save()
        request.NMTK_JOB.status = request.NMTK_JOB.FAILED
        request.NMTK_JOB.save()

    return HttpResponse(json.dumps({'status': 'Results saved'}),
                        content_type='application/json')
Beispiel #5
0
def processResults(request):
    '''
    The idea here is that this URL always posts successful results, though
    in reality it probably ought to be enhanced to accept both success
    results as well as failure results.  We can worry about handling that
    based on content type.
    '''
    config = json.loads(request.FILES['config'].read())

    base_description = "Results from '{0}'".format(
        request.NMTK_JOB.description)
    if config['status'] == 'results':
        models.JobStatus(message='Received results from Tool Server',
                         timestamp=timezone.now(),
                         job=request.NMTK_JOB).save()
        if (not config.has_key('results')
                or not config['results'].has_key('field')
                or not config['results'].has_key('file')):
            logger.error(
                'Results received with no valid results key ' +
                'in config (old API?) (%s)', config)
            models.JobStatus(
                message='Unable to authenticate request from tool server.',
                timestamp=timezone.now(),
                job=request.NMTK_JOB).save()
            return HttpResponseServerError('Invalid result format')
        result_field = config['results']['field']
        result_field_units = config['results'].get('units', None)

        result_file = config['results']['file']

        if config['results']['file'] not in request.FILES:
            logger.error('Specified file for results was not uploaded')
            models.JobStatus(
                message='Tool server failed to upload required results file.',
                timestamp=timezone.now(),
                job=request.NMTK_JOB).save()
            return HttpResponseServerError('Invalid result file specified')
        total = len(request.FILES) - 1

        i = 0
        for namespace in request.FILES.keys():
            if namespace == 'config':
                continue
            i += 1
            if (total > 1):
                description = base_description + \
                    ' ({0} of {1} files)'.format(i, total)
            else:
                description = base_description
            kwargs = {}
            if namespace == result_file:
                primary = True
                field = result_field
            else:
                field = None
                primary = False
            result = models.DataFile(
                user=request.NMTK_JOB.user,
                name="job_{0}_results".format(request.NMTK_JOB.pk),
                # name=os.path.basename(request.FILES[result_file].name),
                description=description,
                content_type=request.FILES[namespace].content_type,
                type=models.DataFile.JOB_RESULT,
                result_field=field,
                result_field_units=result_field_units)
            filename = os.path.basename(request.FILES[namespace].name)
            result.file.save(filename,
                             ContentFile(request.FILES[namespace].read()),
                             save=False)

            if namespace == result_file:
                # Pass in the job here so that the data file processor knows to
                # update the job when this is done (only happens with primary
                # file.)
                result.save(job=request.NMTK_JOB)
            else:
                result.save()
            # Save the linkage back to the job...
            rf = models.ResultsFile(job=request.NMTK_JOB,
                                    datafile=result,
                                    primary=primary)
            rf.save()

        request.NMTK_JOB.status = request.NMTK_JOB.POST_PROCESSING
        request.NMTK_JOB.save()

        models.JobStatus(message='Post processing results file(s)',
                         timestamp=timezone.now(),
                         job=request.NMTK_JOB).save()
    elif config['status'] == 'error':
        logger.debug('config is %s', config)
        models.JobStatus(message='\n'.join(config['errors']),
                         timestamp=timezone.now(),
                         job=request.NMTK_JOB).save()
        request.NMTK_JOB.status = request.NMTK_JOB.FAILED
        request.NMTK_JOB.save()

    return HttpResponse(json.dumps({'status': 'Results saved'}),
                        content_type='application/json')
Beispiel #6
0
def importDataFile(datafile, job_id=None):
    from NMTK_server import models
    datafile.status_message=None
    try:
        loader=NMTKDataLoader(datafile.file.path, 
                              srid=datafile.srid)
        if loader.is_spatial:
            datafile.srid=loader.info.srid
            datafile.srs=loader.info.srs
            datafile.geom_type=loader.info.type
            logger.debug('Loader extent is %s', loader.info.extent)
            extent=geos.Polygon.from_bbox(loader.info.extent)
            logger.debug("Extent is 'srid=%s;%s'::geometry", loader.info.srid, 
                         extent,)
            if datafile.srid:
                extent.srid=int(loader.info.srid)
                extent.transform(4326)
            logger.debug("Extent is 'srid=%s;%s'::geometry", 4326, 
                         extent,)
            datafile.extent=extent
        datafile.feature_count=loader.info.feature_count
        if loader.is_spatial and not datafile.srid:
            datafile.status=datafile.IMPORT_FAILED
            datafile.status_message='Please specify SRID for this file (unable to auto-identify SRID)'
        elif not job_id:
            datafile.status=datafile.IMPORTED
        else:
            datafile.status=datafile.IMPORT_RESULTS_COMPLETE
        datafile.fields=loader.info.fields
        # Create an empty file using ContentFile, then we can overwrite it 
        # with the desired GeoJSON data.
        if loader.is_spatial: 
            suffix='geojson'
        else: 
            suffix='json'
        if datafile.status in (datafile.IMPORTED, datafile.IMPORT_RESULTS_COMPLETE):
            datafile.processed_file.save('{0}.{1}'.format(datafile.pk, suffix), 
                                         ContentFile(''))
            loader.export_json(datafile.processed_file.path)
            generate_datamodel(datafile, loader)
            # Here we load the spatialite data using the model that was created
            # by generate_datamodel.  We need to use this to get the range
            # and type information for each field...
            try:
                field_attributes={}
                qs=getQuerySet(datafile)
                field_mappings=[(django_model_fields.IntegerField, 'integer',),
                                (django_model_fields.AutoField, 'integer',), # Required because nmtk_id is an autofield..
                                (django_model_fields.BooleanField, 'boolean',),
                                (django_model_fields.DecimalField, 'float',), # Special case holding FIPS
                                (django_model_fields.TextField, 'text',),
                                (django_model_fields.FloatField,'float'),
                                (django_model_fields.DateField, 'date',),
                                (django_model_fields.TimeField, 'time'),
                                (django_model_fields.DateTimeField, 'datetime')]
                if qs.count() > 0:
                    # Get a single row so that we can try to work with the fields.
                    sample_row=qs[0]
                    for field in sample_row._meta.fields:
                        field_name=field.name
                        db_column=field.db_column or field.name
                        # convert the django field type to a text string.
                        for ftype, field_type in field_mappings:
                            if isinstance(field, (ftype,)):
                                break
                        else:
                            logger.info('Unable to map field of type %s (this is expected for GIS fields)', type(field,))
                            continue
                        values_aggregates=qs.aggregate(Count(field_name, distinct=True))
                        field_attributes[db_column]={'type': field_type, 
                                                     'field_name': field_name,
                                                     'distinct': values_aggregates['{0}__count'.format(field_name)]}
                        if field_attributes[db_column]['distinct'] < 10:
                            distinct_values=list(qs.order_by().values_list(field_name, flat=True).distinct())
                            field_attributes[db_column]['values']=distinct_values
                        else:
                            logger.debug('There are more than 10 values for %s (%s), enumerating..', db_column, 
                                         field_attributes[db_column]['distinct'])
                            # formerly the aggregates happened above - with the count. However, Django doesn't
                            # allow those aggregates with boolean fields - so here we split it up to only do the
                            # aggregates in the cases where we have to (i.e., the distinct values is above the threshold.)
                            values_aggregates=qs.aggregate(Max(field_name), Min(field_name), )
                            field_attributes[db_column]['min']= values_aggregates['{0}__min'.format(field_name)]
                            field_attributes[db_column]['max']= values_aggregates['{0}__max'.format(field_name)]
                    datafile.field_attributes=field_attributes
            except Exception, e:
                logger.exception('Failed to get range for model %s',
                                 datafile.pk)
        if job_id:
            try:
                job=models.Job.objects.get(pk=job_id)
                # There might be multiple results files from this job, so we will only
                # mark the job as complete if all the results files are processed.
                if job.status != job.COMPLETE:
                    results_left=job.job_files.filter(status=models.DataFile.PROCESSING_RESULTS).count()
                    if results_left == 0:
                        job.status=job.COMPLETE
                        models.JobStatus(message='Job Completed',
                                         timestamp=timezone.now(),
                                         job=job).save()
                    elif results_left == 1:
                        # Handle the potential race condition here - do we really need this?
                        # sort of.  Since it's possible that two files finish post-processing
                        # at the same time.  In such cases, a second should be more than enough
                        # time to get both committed as complete.
                        time.sleep(1)
                        job=models.Job.objects.get(pk=job_id)
                        if job.status != job.COMPLETE:
                            results_left=job.job_files.filter(status=models.DataFile.PROCESSING_RESULTS).count()
                            if results_left == 0:
                                job.status=job.COMPLETE
                                models.JobStatus(message='Job Completed',
                                                 timestamp=timezone.now(),
                                                 job=job).save()
                    
                    
            except: 
                logger.exception('Failed to update job status to complete?!!')
Beispiel #7
0
def importDataFile(datafile, job_id=None):
    from NMTK_server import models
    logger = importDataFile.get_logger()
    datafile.status_message = None
    job = None
    try:
        loader = NMTKDataLoader(datafile.file.path,
                                srid=datafile.srid,
                                logger=logger)
        destination = None
        for import_file in loader.extract_files():
            # Figure out where these files need to go.
            if not destination:
                destination = os.path.dirname(datafile.file.path)
                # the first file we get (when destination is null,it's our first
                # loop) is the one that needs to be in the model, handle that
                # here...
                if datafile.file.path != import_file:
                    f = open(import_file)
                    datafile.file.save(os.path.basename(import_file), File(f))
            else:
                shutil.copyfile(
                    import_file,
                    os.path.join(destination, os.path.basename(import_file)))
            logger.debug('Created a new file for %s', import_file)
        logger.info('The file is spatial? %s', loader.is_spatial)
        if loader.is_spatial:
            datafile.srid = loader.info.srid
            datafile.srs = loader.info.srs
            datafile.geom_type = loader.info.type
            logger.debug('Loader extent is %s', loader.info.extent)
            extent = geos.Polygon.from_bbox(loader.info.extent)
            logger.debug(
                "Extent is 'srid=%s;%s'::geometry",
                loader.info.srid,
                extent,
            )
            if datafile.srid:
                extent.srid = int(loader.info.srid)
                extent.transform(4326)
            logger.debug(
                "Extent is 'srid=%s;%s'::geometry",
                4326,
                extent,
            )
            datafile.extent = extent
        datafile.feature_count = loader.info.feature_count
        if not datafile.description:
            datafile.description = loader.info.format
        future_status = datafile.status
        if loader.is_spatial and not datafile.srid:
            future_status = datafile.IMPORT_FAILED
            datafile.status_message = 'Please specify SRID for this file (unable to auto-identify SRID)'
        elif not job_id:
            future_status = datafile.IMPORTED
        else:
            future_status = datafile.IMPORT_RESULTS_COMPLETE

        # We need to merge these things..
        desired_field_order = datafile.fields or []
        # Now that we have a desired field order from the model, we can
        # go the next step of getting job data.
        if job_id:
            try:
                job = models.Job.objects.select_related('tool').get(pk=job_id)
            except Exception as e:
                logger.error('Failed to get job with id of %s',
                             job_id,
                             exc_info=True)

        # From the job data we can get the tool config:
        config_field_list = config_namespace = None
        # Get the list of field names, with the unique ones first...
        tool_config_field_units = {}
        job_config_field_units = datafile.units or {}
        if job:
            tool_config = job.tool.toolconfig.json_config
            # there might be multiple input files, but we'll use the first
            # one as the basis for format for the output, since we don't
            # really have a better option.  The tool developer ought to
            # specify a list of fields in the output if they don't like
            # this behaviour, since this is just a "default" for the order.
            for t in job.tool.toolconfig.json_config['input']:
                if t.get('type', '').lower() == 'file':
                    config_namespace = t.get('name', None)
                    if config_namespace:
                        config_field_list = [
                            f['name'] for f in t.get('elements', [])
                            if isinstance(f.get('name', None), (str, unicode))
                        ]
                        # If there are units, then we store the units
                        # here, so we can use that with the field data.
                        for f in t.get('elements', []):
                            if 'units' in f:
                                tool_config_field_units[f['name']] = f.get(
                                    'units', None)
                            elif 'description' in f:
                                tool_config_field_units[f['name']] = f.get(
                                    'description', None)

                    break
            # Now that we have a list of fields from the tool configuration,
            # get the input fields from the file for each of the tool fields,
            # since we want that to be the default order of output.
            if config_field_list:
                job_config = job.config[config_namespace]
                for f in config_field_list:
                    if f in job_config:
                        if job_config[f].get('type', None) == 'property':
                            if isinstance(job_config[f].get('value', None),
                                          (str, unicode)):
                                desired_field_order.append(
                                    job_config[f]['value'])
                            # Map the tool config field (f) to the selected data file field
                            # (job_config[f]['value'] so we can grab the units from the
                            # tool config.
                            if (datafile.units and f in datafile.units
                                    and 'value' in job_config[f]):
                                job_config_field_units[job_config[f][
                                    'value']] = datafile.units.get(f, '')
                            # If the tool didn't give us the units to use for fields
                            # we can fall back to the tool config to see what they
                            # ought to be.
                            elif (f in tool_config_field_units
                                  and 'value' in job_config[f]):
                                job_config_field_units[job_config[f][
                                    'value']] = tool_config_field_units.get(
                                        f, '')

        # Get the list of actual fields in the input datafile...
        available_fields = loader.info.fields
        # eliminate fields that are not in the list of output fields.
        logger.debug('Desired field order is: %s', desired_field_order)
        logger.debug('Loader provided field order is: %s', available_fields)
        ordered_fields = [
            field for field in desired_field_order if field in available_fields
        ]
        # Add in any fields using the order first, then following with
        # any fields not in the ordered list, but in the output list
        # of fields.
        datafile.fields = list(
            unique_everseen(ordered_fields + available_fields))

        logger.debug('Final field order is %s', datafile.fields)
        # Create an empty file using ContentFile, then we can overwrite it
        # with the desired GeoJSON data.
        if loader.is_spatial:
            suffix = 'geojson'
        else:
            suffix = 'json'
        if future_status in (datafile.IMPORTED,
                             datafile.IMPORT_RESULTS_COMPLETE):
            if datafile.geom_type == 99:
                field_attributes = {}
                # This is a raster...
                for pos, band in enumerate(loader.dl_instance.bands()):
                    field_attributes[pos + 1] = {
                        'type': band.type,
                        'field_name': 'pixel',
                        'min': band.min,
                        'max': band.max
                    }
                datafile.field_attributes = field_attributes
            elif datafile.feature_count:
                logger.error('Working on saving the model!')
                datafile.processed_file.save(
                    '{0}.{1}'.format(datafile.pk, suffix), ContentFile(''))
                loader.export_json(datafile.processed_file.path)
                try:
                    generate_datamodel(datafile, loader, logger)
                except Exception as e:
                    logger.error('Error generating data model: %s',
                                 e,
                                 exc_info=logger.isEnabledFor(logging.DEBUG))
                    raise e
                # Here we load the spatialite data using the model that was created
                # by generate_datamodel.  We need to use this to get the range
                # and type information for each field...
                try:
                    field_attributes = {}
                    qs = getQuerySet(datafile)
                    field_mappings = [
                        (django_model_fields.IntegerField, 'integer', int),
                        # Required because nmtk_id is an
                        # autofield..
                        (
                            django_model_fields.AutoField,
                            'integer',
                            int,
                        ),
                        (django_model_fields.BooleanField, 'boolean', bool),
                        # Special case holding FIPS
                        (django_model_fields.DecimalField, 'float', float),
                        (django_model_fields.TextField, 'text', None),
                        (django_model_fields.FloatField, 'float', float),
                        (
                            django_model_fields.DateField,
                            'date',
                            datetime.date.isoformat,
                        ),
                        (
                            django_model_fields.TimeField,
                            'time',
                            datetime.time.isoformat,
                        ),
                        (django_model_fields.DateTimeField, 'datetime',
                         datetime.datetime.isoformat)
                    ]
                    if qs.count() > 0:
                        # Get a single row so that we can try to work with the
                        # fields.
                        sample_row = qs[0]
                        for field in sample_row._meta.fields:
                            field_name = field.name
                            db_column = field.db_column or field.name
                            # convert the django field type to a text string.
                            for ftype, field_type, caster in field_mappings:
                                if isinstance(field, (ftype, )):
                                    break
                            else:
                                logger.info(
                                    'Unable to map field of type %s (this is expected for GIS fields)',
                                    type(field, ))
                                continue
                            values_aggregates = qs.aggregate(
                                Count(field_name, distinct=True))
                            field_attributes[db_column] = {
                                'type':
                                field_type,
                                'field_name':
                                field_name,
                                'distinct':
                                values_aggregates['{0}__count'.format(
                                    field_name)]
                            }
                            # Add the units from the config to the data.
                            if db_column in job_config_field_units:
                                field_attributes[db_column][
                                    'units'] = job_config_field_units[
                                        db_column]
                            if field_attributes[db_column]['distinct'] < 10:
                                distinct_values = [
                                    v for v in qs.order_by().values_list(
                                        field_name, flat=True).distinct()
                                    if v is not None
                                ]
                                if not caster:
                                    field_attributes[db_column][
                                        'values'] = distinct_values
                                else:
                                    logger.info(
                                        'Attempting to cast values: %s',
                                        distinct_values)
                                    field_attributes[db_column][
                                        'values'] = map(
                                            caster, distinct_values)
                            else:
                                logger.debug(
                                    'There are more than 10 values for %s (%s), enumerating..',
                                    db_column,
                                    field_attributes[db_column]['distinct'])
                                # formerly the aggregates happened above - with the count. However, Django doesn't
                                # allow those aggregates with boolean fields - so here we split it up to only do the
                                # aggregates in the cases where we have to (i.e.,
                                # the distinct values is above the threshold.)
                                values_aggregates = qs.aggregate(
                                    Max(field_name),
                                    Min(field_name),
                                )
                                field_attributes[db_column][
                                    'min'] = values_aggregates[
                                        '{0}__min'.format(field_name)]
                                field_attributes[db_column][
                                    'max'] = values_aggregates[
                                        '{0}__max'.format(field_name)]
                                if caster:
                                    field_attributes[db_column][
                                        'min'] = caster(
                                            field_attributes[db_column]['min'])
                                    field_attributes[db_column][
                                        'max'] = caster(
                                            field_attributes[db_column]['max'])
                        datafile.field_attributes = field_attributes
                        datafile.units = job_config_field_units
                except Exception as e:
                    logger.exception('Failed to get range for model %s',
                                     datafile.pk)
        if job:
            try:
                # There might be multiple results files from this job, so we will only
                # mark the job as complete if all the results files are
                # processed.
                if job.status != job.COMPLETE:
                    results_left = job.job_files.filter(
                        status=models.DataFile.PROCESSING_RESULTS).count()
                    if results_left == 0:
                        job.status = job.COMPLETE
                        models.JobStatus(
                            message='Job Completed',
                            timestamp=timezone.now(),
                            job=job,
                            category=models.JobStatus.CATEGORY_SYSTEM).save()
                    elif results_left == 1:
                        # Handle the potential race condition here - do we really need this?
                        # sort of.  Since it's possible that two files finish post-processing
                        # at the same time.  In such cases, a second should be more than enough
                        # time to get both committed as complete.
                        time.sleep(1)
                        job = models.Job.objects.get(pk=job_id)
                        if job.status != job.COMPLETE:
                            results_left = job.job_files.filter(
                                status=models.DataFile.PROCESSING_RESULTS
                            ).count()
                            if results_left == 0:
                                job.status = job.COMPLETE
                                models.JobStatus(message='Job Completed',
                                                 timestamp=timezone.now(),
                                                 job=job,
                                                 category=models.JobStatus.
                                                 CATEGORY_SYSTEM).save()

            except:
                logger.exception('Failed to update job status to complete?!!')
        datafile.status = future_status
    except Exception as e:
        logger.error('Failed import process!', exc_info=True)
        datafile.processed_file = None
        if not job_id:
            datafile.status = datafile.IMPORT_FAILED
        else:
            datafile.status = datafile.IMPORT_RESULTS_FAILED
        datafile.status_message = "%s" % (e, )
        if job_id:
            try:
                if not job:
                    job = models.Job.objects.get(pk=job_id)
                job.status = job.POST_PROCESSING_FAILED
                logger.info('Set post processing to failed for job %s', job.pk)
            except:
                logger.error('Failed to update job status to failed?!!',
                             exc_info=True)

    if job:
        job.save()
    datafile.save()