Example #1
0
    def test_form_validation3(self):

        msgt(
            '(3) Test valid data with extra fields.  Are the extra fields removed?'
        )

        # retrieve data and add extra fields
        tdata = self.test_data.copy()
        tdata.update({ 'map_layer' : 'extra field 1'\
                    , 'comment' : 'extra field 2'\
                    , 'token_check' : 'extra field 3'\
                })

        validation_form = DataverseInfoValidationForm(tdata)
        msg('is valid: %s' % validation_form.is_valid())
        err_msgs = validation_form.errors.values()
        msg('errs: %s' % err_msgs)

        self.assertEqual(validation_form.is_valid(), True)

        cleaned_data = validation_form.cleaned_data
        self.assertEqual(cleaned_data.has_key('map_layer'), False)
        self.assertEqual(cleaned_data.has_key('comment'), False)
        self.assertEqual(cleaned_data.has_key('token_check'), False)

        msg('Yes, strips out the extra fields?')
Example #2
0
    def test_form_validation1(self):

        msgt('(1) Test valid data')

        validation_form = DataverseInfoValidationForm(self.test_data)
        #print 'valid',validation_form.is_valid()
        self.assertEqual(validation_form.is_valid(), True)
    def test_form_validation3(self):
        
        msgt('(3) Test valid data with extra fields.  Are the extra fields removed?')
        
        # retrieve data and add extra fields
        tdata = self.test_data.copy()
        tdata.update({ 'map_layer' : 'extra field 1'\
                    , 'comment' : 'extra field 2'\
                    , 'token_check' : 'extra field 3'\
                })

        validation_form = DataverseInfoValidationForm(tdata)
        msg ('is valid: %s' % validation_form.is_valid())
        err_msgs = validation_form.errors.values()
        msg('errs: %s' % err_msgs)
    
        self.assertEqual(validation_form.is_valid(), True)
        
        cleaned_data = validation_form.cleaned_data
        self.assertEqual(cleaned_data.has_key('map_layer'), False)
        self.assertEqual(cleaned_data.has_key('comment'), False)
        self.assertEqual(cleaned_data.has_key('token_check'), False)
        
        msg('Yes, strips out the extra fields?')
        #msg(cleaned_data)
        
        #dvinfo_obj = validation_form.save(commit=False)
        #self.assertEqual(type(dvinfo_obj), DataverseInfo)
        #msg('yes, converts into a DataverseInfo object (minus the map_layer)')
        
 def test_form_validation1(self):
 
     msgt('(1) Test valid data')
     
     validation_form = DataverseInfoValidationForm(self.test_data)
     #print 'valid',validation_form.is_valid()
     self.assertEqual(validation_form.is_valid(), True)
def get_dataverse_info_dict(gis_data_file):
    """
    Convert a GISDataFile or ShapefileInfo object into a dict containing only DataverseInfo attributes
    
    GISDataFile and ShapefileInfo should always pass the DataverseInfoValidationForm
    """
    assert isinstance(gis_data_file, GISDataFile), "gis_data_file must be a GISDataFile object. (Note: A ShapefileInfo is also a GISDataFile object)"

    f = DataverseInfoValidationForm(gis_data_file.__dict__)    
    
    if f.is_valid():
        return f.cleaned_data
    
    raise Exception('Dataverse Info is not valid')
Example #6
0
def get_dataverse_info_dict(gis_data_file):
    """
    Convert a GISDataFile or ShapefileInfo object into a dict containing only DataverseInfo attributes
    
    GISDataFile and ShapefileInfo should always pass the DataverseInfoValidationForm
    """
    assert isinstance(
        gis_data_file, GISDataFile
    ), "gis_data_file must be a GISDataFile object. (Note: A ShapefileInfo is also a GISDataFile object)"

    f = DataverseInfoValidationForm(gis_data_file.__dict__)

    if f.is_valid():
        return f.cleaned_data

    raise Exception('Dataverse Info is not valid')
Example #7
0
    def test_form_validation2(self):

        msgt('(2) Test invalid data')

        tdata = self.test_data.copy()
        tdata['dataset_id'] = '11z'
        tdata['return_to_dataverse_url'] = 'ha'
        #msg(tdata)
        validation_form = DataverseInfoValidationForm(tdata)
        #msg('valid: %s' % validation_form.is_valid())
        self.assertEqual(validation_form.is_valid(), False)

        msg('check for attributes in error')
        err_keys = validation_form.errors.keys()
        msg(err_keys)
        self.assertEqual('return_to_dataverse_url' in err_keys, True)
        self.assertEqual('dataset_id' in err_keys, True)

        msg('check for err messages')
        err_msgs = validation_form.errors.values()
        msg(err_msgs)
        self.assertEqual([u'Enter a valid URL.'] in err_msgs, True)
        self.assertEqual([u'Enter a whole number.'] in err_msgs, True)
    def test_form_validation2(self):

        msgt('(2) Test invalid data')

        tdata = self.test_data.copy()
        tdata['dataset_id'] = '11z'
        tdata['return_to_dataverse_url'] = 'ha'
        #msg(tdata)
        validation_form = DataverseInfoValidationForm(tdata)
        #msg('valid: %s' % validation_form.is_valid())
        self.assertEqual(validation_form.is_valid(), False)

        msg('check for attributes in error')
        err_keys = validation_form.errors.keys()
        msg(err_keys)        
        self.assertEqual('return_to_dataverse_url' in err_keys, True)
        self.assertEqual('dataset_id' in err_keys, True)
        
        msg('check for err messages')
        err_msgs = validation_form.errors.values()
        msg(err_msgs)
        self.assertEqual([u'Enter a valid URL.'] in err_msgs, True)
        self.assertEqual([u'Enter a whole number.'] in err_msgs, True)
    def run_test01_datafile_metadata(self):
        
        #-----------------------------------------------------------
        msgt("--- Retrieve metadata ---")
        #-----------------------------------------------------------
        api_url = '%s/api/worldmap/datafile/' % (self.dataverse_server)

        #-----------------------------------------------------------
        msgn("(1a) Try with no json params")
        #-----------------------------------------------------------
        msg('api_url: %s' % api_url)
        try:
            r = requests.post(api_url)
        except requests.exceptions.ConnectionError as e:
            msg('error: %s' % e.message)
            return
            msgx('Connection error: %s' % e.message)
        except:
            msg('error: %s' % sys.exc_info()[0])
            #msgx("Unexpected error: %s" % sys.exc_info()[0])
            return 

        msg(r.status_code)
        self.assertEqual(r.status_code, 400, "Try with no json params")

        #-----------------------------------------------------------
        msgn("(1b) Try with empty string token")
        #-----------------------------------------------------------
        msg('api_url: %s' % api_url)
        try:
            r = requests.post(api_url, data=json.dumps({ self.wm_token_name: ''} ))
        except requests.exceptions.ConnectionError as e:
            msgx('Connection error: %s' % e.message)
        except:
            msgx("Unexpected error: %s" % sys.exc_info()[0])
        msg(r.status_code)
        self.assertEqual(r.status_code, 400, "Try without a token")

        #-----------------------------------------------------------
        msgn("(1c) Try a random token")
        #-----------------------------------------------------------
        msg('api_url: %s' % api_url)
        try:
            r = requests.post(api_url, data=json.dumps({ self.wm_token_name: self.get_random_token() } ))
        except requests.exceptions.ConnectionError as e:
            msgx('Connection error: %s' % e.message)
        except:
            msgx("Unexpected error: %s" % sys.exc_info()[0])
        msg(r.status_code)
        self.assertEqual(r.status_code, 401, "Try without a random token")


        #-----------------------------------------------------------
        msgn("(1d) Retrieve metadata")
        #-----------------------------------------------------------
        params = self.get_worldmap_token_dict()
        
        msg('api_url: %s' % api_url)     
        msg('params: %s' % params)     
        try:
            r = requests.post(api_url, data=json.dumps(params))
        except requests.exceptions.ConnectionError as e:
            msgx('Connection error: %s' % e.message)
        except:
            msgx("Unexpected error: %s" % sys.exc_info()[0])

        #-----------------------------------------------------------
        msgn("(1e) Check metadata")
        #-----------------------------------------------------------
        msg(r.text)
        self.assertEqual(r.status_code, 200, "API call successful, with a 200 response?")
        
        json_resp = r.json()
        self.assertEqual(json_resp.get('status'), 'OK', "status is 'OK'")

        metadata_json = json_resp.get('data', None)
        self.assertTrue(type(metadata_json) is not None, "Check that metadata_json is a dict")

        #-----------------------------------------------------------
        msgn("(1f) Check metadata with DataverseInfoValidationForm")
        #-----------------------------------------------------------
        # Metadata validation form (used directly by GeoConnect and WorldMap)
        #
        f = DataverseInfoValidationForm(metadata_json)
        msg('metadata valid? %s' % f.is_valid())
        if not f.is_valid():
            msg(f.errors)
        self.assertTrue(f.is_valid(), "Check Metadata in validation form.  Errors:\n%s" % f.errors)

        self.assertTrue(metadata_json.has_key('datafile_download_url') is True, "Check that metadata_json has 'datafile_download_url'")
        self.assertTrue(metadata_json.has_key('datafile_filesize') is True, "Check that metadata_json has 'datafile_filesize'")

        #-----------------------------------------------------------
        msgt("(2) Retrieve file")
        #-----------------------------------------------------------
        #msgn("(2a) Try without token--should be unauthorized")
        msgn("(2a) Try without token--should be ok b/c dataset is published")
        #-----------------------------------------------------------
        download_api_url = metadata_json['datafile_download_url']
        msg('download_api_url: %s' % download_api_url)
        try:
            r = requests.get(download_api_url)
        except requests.exceptions.ConnectionError as e:
            msgx('Connection error: %s' % e.message)
        except:
            msgx("Unexpected error: %s" % sys.exc_info()[0])
        msg(r.status_code)
        #self.assertEqual(r.status_code, 401, "API call should be forbidden--no token")
        self.assertEqual(r.status_code, 200, "API call should be ok b/c dataset is published")

        """
        #-----------------------------------------------------------
        msgn("(2b) Try with bad token, not WorldMap token length--should be forbidden.")
        #-----------------------------------------------------------
        random_non_worldmap_token = self.get_random_token(36)
        download_api_url = '%s?key=%s' % (metadata_json['datafile_download_url'], random_non_worldmap_token)
        msg('download_api_url: %s' % download_api_url)
        try:
            r = requests.get(download_api_url)
        except requests.exceptions.ConnectionError as e:
            msgx('Connection error: %s' % e.message)
        except:
            msgx("Unexpected error: %s" % sys.exc_info()[0])
        msg(r.status_code)
        self.assertEqual(r.status_code, 403, "API call should be forbidden--bad token")
        
        #-----------------------------------------------------------
        msgn("(2c) Try with bad token, WorldMap token length, but random")
        #-----------------------------------------------------------
        random_worldmap_token = self.get_random_token()
        download_api_url = '%s?key=%s' % (metadata_json['datafile_download_url'], random_worldmap_token)
        msg('download_api_url: %s' % download_api_url)
        try:
            r = requests.get(download_api_url)
        except requests.exceptions.ConnectionError as e:
            msgx('Connection error: %s' % e.message)
        except:
            msgx("Unexpected error: %s" % sys.exc_info()[0])
        msg(r.status_code)
        self.assertEqual(r.status_code, 403, "API call should be forbidden--no token")
        """

        #-----------------------------------------------------------
        msgn("(2d) Legit request with real token (takes a couple of seconds to get file)")
        #-----------------------------------------------------------
        download_api_url = '%s?key=%s' % (metadata_json['datafile_download_url'], GEOCONNECT_TOKEN_VALUE)
        msg('download_api_url: %s' % download_api_url)
        try:
            r = requests.get(download_api_url)
        except requests.exceptions.ConnectionError as e:
            msgx('Connection error: %s' % e.message)
        except:
            msgx("Unexpected error: %s" % sys.exc_info()[0])

        msg(r.status_code)
        msg('downloaded file size: %s' % len(r.text))
        msg('expected file size: %s' % metadata_json['datafile_filesize'])
    
        self.assertEqual(r.status_code, 200, "API call successful to file download")
        self.assertEqual(metadata_json['datafile_filesize'], len(r.text), "Actual file size matches size in metadata")
Example #10
0
def get_tabular_file_from_dv_api_info(dv_session_token, dataverse_info_dict):
    """Using Dataverse API information, create a :model:`gis_tabular.TabularFileInfo' object.
    This function should only return successful responses.

    return True/False, shp_md5 or ErrResultMsg

    Examples:  True, md5 from TabularFileInfo
               False,  ErrResultMsg
    """
    assert dv_session_token is not None, "dv_session_token cannot be None"
    assert type(dataverse_info_dict) is dict,\
        "dataverse_info_dict must be type 'dict'"

    msgt('dataverse_info_dict: {0}'.format(dataverse_info_dict))
    #------------------------------
    # (1) Validate the data (DataverseInfoValidationForm)
    #------------------------------
    #dataverse_info_dict.update({'datafile_id':None})   # for testing
    validation_form = DataverseInfoValidationForm(dataverse_info_dict)
    if not validation_form.is_valid():
        errs = ['%s: %s' % (k, v) for k, v in validation_form.errors.items()]
        LOGGER.debug('errors: %s', errs)
        form_errs = '\n'.join(errs)
        return False, ErrResultMsg(None, form_errs)

    #-------------------------------------------------
    # (2) Check if this is a Registered Dataverse
    #-------------------------------------------------
    registered_dataverse = find_registered_dataverse(
        dataverse_info_dict['return_to_dataverse_url'])
    if registered_dataverse is None:
        return False, ErrResultMsg(\
                    FAILED_NOT_A_REGISTERED_DATAVERSE,
                    "This dataverse url was not recognized: %s" %\
                    dataverse_info_dict['return_to_dataverse_url'])

    #-------------------------------------------------
    # (3b) Look for existing Dataverse files in the database
    #    ShapefileInfo and TabularFileInfo objects are routinely
    #    deleted, but if file is already here, use it
    #-------------------------------------------------
    params_for_existing_check = dict(datafile_id=dataverse_info_dict.get('datafile_id', -1)\
                                    , dataverse_installation_name=dataverse_info_dict.get('dataverse_installation_name', -1)\
                                    )

    existing_sets = TabularFileInfo.objects.filter(**params_for_existing_check\
                                ).values_list('id', flat=True\
                                ).order_by('created')

    existing_tabular_info_ids = list(existing_sets)
    msgt('existing_tabular_info_ids: %s' % existing_tabular_info_ids)

    #-------------------------------------------------
    # add dv_session_token and registered_dataverse to dataverse_info_dict
    #-------------------------------------------------
    dataverse_info_dict['dv_session_token'] = dv_session_token
    dataverse_info_dict['registered_dataverse'] = registered_dataverse

    #------------------------------
    # (4) Existing TabularFileInfo(s) found:
    #  (a) Update the TabularFileInfo object
    #  (b) Delete other groups TabularFileInfo object for this datafile and user
    #  (c) Return the md5
    #------------------------------
    if len(existing_tabular_info_ids) > 1:

        # pop the last TabularFileInfo id off the list of existing_tabular_info_ids
        shp_id = existing_tabular_info_ids.pop()

        # delete the rest
        if len(existing_sets) > 0:
            # delete older TabularFileInfo objects
            TabularFileInfo.objects.filter(
                id__in=existing_tabular_info_ids).delete()

    #------------------------------
    # (5) Get or create a new TabularFileInfo object
    #------------------------------
    msgt('(5) Get or create a new TabularFileInfo object')
    try:
        # Existing TabularFileInfo:
        #   (1) Assume file is already saved
        #   (2) update the data
        #
        tabular_info = TabularFileInfo.objects.get(**params_for_existing_check)

        for key, value in dataverse_info_dict.iteritems():
            if key == 'column_names':
                tabular_info.add_column_names(value)
            else:
                setattr(tabular_info, key, value)

        # Save
        tabular_info.save()
        msg('tabular_info info saved')

        # If the file is still available, return it
        if tabular_info.is_dv_file_available():
            add_worldmap_layerinfo_if_exists(tabular_info)
            return True, tabular_info.md5
        else:
            # But the file isn't there!!  Delete TabularFileInfo and make a new one
            tabular_info.delete()

    except TabularFileInfo.DoesNotExist:
        pass
    #except:
    #    msg('Failed to retrieve an existing ShapefileInfo object -- so create a new one')
    #    #return False, ErrResultMsg(None, 'Failed to retrieve an existing ShapefileInfo object')

    msg('new file')

    #------------------------------
    # New tabular_info, create object and attach file
    #------------------------------

    # Add name parameter
    dataverse_info_dict['name'] = dataverse_info_dict.get(
        'datafile_label', '(no datafile_label found)')
    tabular_info = TabularFileInfo(**dataverse_info_dict)
    tabular_info.save()

    #------------------------------
    # Download and attach file
    #------------------------------
    datafile_download_url = dataverse_info_dict.get('datafile_download_url',
                                                    '')

    # Add session token.  Gives permission to download/retrieve the file
    #   - http://localhost:8080/api/access/datafile/FILEID?key=YOURAPIKEY
    #
    datafile_download_url = '%s?key=%s' % (datafile_download_url,
                                           dv_session_token)
    msg('datafile_download_url: %s' % datafile_download_url)
    datafile_filename = dataverse_info_dict.get('datafile_label', '')

    img_temp = NamedTemporaryFile(delete=True)

    try:
        img_temp.write(urllib2.urlopen(datafile_download_url).read())
    except urllib2.HTTPError as e:
        tabular_info.delete()  # clear tabular info
        err_msg = 'Failed to download tabular file. HTTPError: %s \n\nurl: %s' % (
            str(e), datafile_download_url)
        return False, ErrResultMsg(None, err_msg)
    img_temp.flush()

    tabular_info.dv_file.save(datafile_filename, File(img_temp))
    tabular_info.save()
    add_worldmap_layerinfo_if_exists(tabular_info)

    return True, tabular_info.md5
Example #11
0
def get_shapefile_from_dv_api_info(dv_session_token, dv_info_dict):
    """Using Dataverse API information, create a "ShapefileInfo" object.
    This function should only result in successful responses.

    return True/False, shp_md5 or ErrResultMsg

    Examples:  True, md5 from ShapefileInfo
               False,  ErrResultMsg

    To do: Make this into a separate class
    """
    assert dv_session_token is not None, "dv_session_token cannot be None"
    assert type(dv_info_dict) is dict, "dv_info_dict must be type 'dict'"

    #------------------------------
    # (1) Validate the data (DataverseInfoValidationForm)
    #------------------------------
    validation_form = DataverseInfoValidationForm(dv_info_dict)
    if not validation_form.is_valid():
        errs = ['%s: %s' % (k, v) for k, v in validation_form.errors.items()]
        LOGGER.debug(errs)
        form_errs = '\n'.join(errs)
        return False, ErrResultMsg(None, form_errs)

    #-------------------------------------------------
    # (2) Check if this is a Registered Dataverse
    #-------------------------------------------------
    registered_dataverse = find_registered_dataverse(
        dv_info_dict['return_to_dataverse_url'])
    if registered_dataverse is None:
        return False, ErrResultMsg(FAILED_NOT_A_REGISTERED_DATAVERSE\
                        , "This dataverse url was not recognized: %s" % dv_info_dict['return_to_dataverse_url']\
                    )

    #-------------------------------------------------
    # (3) Look for existing ShapefileInfo objects in the database
    #    ShapefileInfo objects are routinely deleted, but if file is already here, use it
    #       * todo: check for staleness, if the data is old delete it
    #-------------------------------------------------
    LOGGER.debug('(3) Look for existing ShapefileInfo objects in the database')
    params_for_existing_check = dict(datafile_id=dv_info_dict.get('datafile_id', -1),\
        dataverse_installation_name=dv_info_dict.get('dataverse_installation_name', -1),\
        )

    existing_sets = ShapefileInfo.objects.filter(**params_for_existing_check\
                                ).values_list('id', flat=True\
                                ).order_by('created')

    existing_shapefile_info_ids = list(existing_sets)

    #-------------------------------------------------
    # add dv_session_token and registered_dataverse to dv_info_dict
    #-------------------------------------------------
    dv_info_dict['dv_session_token'] = dv_session_token
    dv_info_dict['registered_dataverse'] = registered_dataverse

    #------------------------------
    # (4) Existing ShapefileInfo(s) found:
    #  (a) Update the ShapefileInfo object
    #  (b) Delete other ShapefileInfo objects for this datafile and user
    #  (c) Return the md5
    #------------------------------
    if len(existing_shapefile_info_ids) > 1:

        # pop the last ShapefileInfo id off the list of existing_shapefile_info_ids
        shp_id = existing_shapefile_info_ids.pop()

        # delete the rest
        if len(existing_sets) > 0:
            ShapefileInfo.objects.filter(
                id__in=existing_shapefile_info_ids).delete(
                )  # delete older ShapefileInfo(s)

    #------------------------------
    # (5) Get or create a new ShapefileInfo object
    #------------------------------
    LOGGER.debug('(5) Get or create a new ShapefileInfo object')
    try:
        # Existing ShapefileInfo:
        #   (1) Assume file is already saved
        #   (2) update the data
        #
        shapefile_info = ShapefileInfo.objects.get(**params_for_existing_check)

        for key, value in dv_info_dict.iteritems():
            setattr(shapefile_info, key, value)

        # Save
        shapefile_info.save()
        LOGGER.debug('shapefile info updated')
        LOGGER.debug('shapefile_info: %s' % shapefile_info.id)

        # If the file is still available, return it
        if shapefile_info.is_dv_file_available():
            return True, shapefile_info.md5
        else:
            # But the file isn't there!!  Delete ShapefileInfo and make a new one
            shapefile_info.delete()

    except ShapefileInfo.DoesNotExist:
        pass
    #except:
    #    msg('Failed to retrieve an existing ShapefileInfo object -- so create a new one')
    #    #return False, ErrResultMsg(None, 'Failed to retrieve an existing ShapefileInfo object')

    LOGGER.debug('Create a new file')

    #------------------------------
    # New shapefile info, create object and attach file
    #------------------------------

    shapefile_info = ShapefileInfo(**dv_info_dict)
    shapefile_info.save()

    #------------------------------
    # Download and attach file
    #------------------------------
    datafile_download_url = dv_info_dict.get('datafile_download_url', '')

    # Add session token.  Gives permission to download/retrieve the file
    #   - http://localhost:8080/api/access/datafile/FILEID?key=YOURAPIKEY
    #
    datafile_download_url = '%s?key=%s' % (datafile_download_url,
                                           dv_session_token)
    LOGGER.debug('datafile_download_url: %s' % datafile_download_url)
    datafile_filename = dv_info_dict.get('datafile_label', '')

    tmp_shapefile = NamedTemporaryFile(delete=True)

    try:
        tmp_shapefile.write(urllib2.urlopen(datafile_download_url).read())
    except urllib2.HTTPError as e:
        shapefile_info.delete()  # clear shapefile
        err_msg = 'Failed to download shapefile. HTTPError: %s \n\nurl: %s' % (
            str(e), datafile_download_url)
        return False, ErrResultMsg(None, err_msg)

    tmp_shapefile.flush()

    shapefile_info.dv_file.save(datafile_filename, File(tmp_shapefile))
    shapefile_info.save()

    return True, shapefile_info.md5
Example #12
0
def get_tabular_file_from_dv_api_info(dv_session_token, dataverse_info_dict):
    """Using Dataverse API information, create a :model:`gis_tabular.TabularFileInfo' object.
    This function should only return successful responses.

    return True/False, shp_md5 or ErrResultMsg

    Examples:  True, md5 from TabularFileInfo
               False,  ErrResultMsg
    """
    assert dv_session_token is not None, "dv_session_token cannot be None"
    assert type(dataverse_info_dict) is dict,\
        "dataverse_info_dict must be type 'dict'"

    msgt('dataverse_info_dict: {0}'.format(dataverse_info_dict))
    #------------------------------
    # (1) Validate the data (DataverseInfoValidationForm)
    #------------------------------
    #dataverse_info_dict.update({'datafile_id':None})   # for testing
    validation_form = DataverseInfoValidationForm(dataverse_info_dict)
    if not validation_form.is_valid():
        errs = ['%s: %s' % (k, v) for k,v in validation_form.errors.items()]
        LOGGER.debug('errors: %s', errs)
        form_errs = '\n'.join(errs)
        return False, ErrResultMsg(None, form_errs)


    #-------------------------------------------------
    # (2) Check if this is a Registered Dataverse
    #-------------------------------------------------
    registered_dataverse = find_registered_dataverse(dataverse_info_dict['return_to_dataverse_url'])
    if registered_dataverse is None:
        return False, ErrResultMsg(\
                    FAILED_NOT_A_REGISTERED_DATAVERSE,
                    "This dataverse url was not recognized: %s" %\
                    dataverse_info_dict['return_to_dataverse_url'])

    #-------------------------------------------------
    # (3b) Look for existing Dataverse files in the database
    #    ShapefileInfo and TabularFileInfo objects are routinely
    #    deleted, but if file is already here, use it
    #-------------------------------------------------
    params_for_existing_check = dict(datafile_id=dataverse_info_dict.get('datafile_id', -1)\
                                    , dataverse_installation_name=dataverse_info_dict.get('dataverse_installation_name', -1)\
                                    )

    existing_sets = TabularFileInfo.objects.filter(**params_for_existing_check\
                                ).values_list('id', flat=True\
                                ).order_by('created')

    existing_tabular_info_ids = list(existing_sets)
    msgt('existing_tabular_info_ids: %s' % existing_tabular_info_ids)

    #-------------------------------------------------
    # add dv_session_token and registered_dataverse to dataverse_info_dict
    #-------------------------------------------------
    dataverse_info_dict['dv_session_token'] = dv_session_token
    dataverse_info_dict['registered_dataverse'] = registered_dataverse

    #------------------------------
    # (4) Existing TabularFileInfo(s) found:
    #  (a) Update the TabularFileInfo object
    #  (b) Delete other groups TabularFileInfo object for this datafile and user
    #  (c) Return the md5
    #------------------------------
    if len(existing_tabular_info_ids) > 1:

        # pop the last TabularFileInfo id off the list of existing_tabular_info_ids
        shp_id = existing_tabular_info_ids.pop()

        # delete the rest
        if len(existing_sets) > 0:
            # delete older TabularFileInfo objects
            TabularFileInfo.objects.filter(id__in=existing_tabular_info_ids).delete()


    #------------------------------
    # (5) Get or create a new TabularFileInfo object
    #------------------------------
    msgt('(5) Get or create a new TabularFileInfo object')
    try:
        # Existing TabularFileInfo:
        #   (1) Assume file is already saved
        #   (2) update the data
        #
        tabular_info = TabularFileInfo.objects.get(**params_for_existing_check)

        for key, value in dataverse_info_dict.iteritems():
            if key == 'column_names':
                tabular_info.add_column_names(value)
            else:
                setattr(tabular_info, key, value)

        # Save
        tabular_info.save()
        msg('tabular_info info saved')

        # If the file is still available, return it
        if tabular_info.is_dv_file_available():
            add_worldmap_layerinfo_if_exists(tabular_info)
            return True, tabular_info.md5
        else:
            # But the file isn't there!!  Delete TabularFileInfo and make a new one
            tabular_info.delete()

    except TabularFileInfo.DoesNotExist:
        pass
    #except:
    #    msg('Failed to retrieve an existing ShapefileInfo object -- so create a new one')
    #    #return False, ErrResultMsg(None, 'Failed to retrieve an existing ShapefileInfo object')

    msg('new file')

    #------------------------------
    # New tabular_info, create object and attach file
    #------------------------------

    # Add name parameter
    dataverse_info_dict['name'] = dataverse_info_dict.get('datafile_label', '(no datafile_label found)')
    tabular_info = TabularFileInfo(**dataverse_info_dict)
    tabular_info.save()

    #------------------------------
    # Download and attach file
    #------------------------------
    datafile_download_url = dataverse_info_dict.get('datafile_download_url', '')

    # Add session token.  Gives permission to download/retrieve the file
    #   - http://localhost:8080/api/access/datafile/FILEID?key=YOURAPIKEY
    #
    datafile_download_url = '%s?key=%s' % (datafile_download_url, dv_session_token)
    msg('datafile_download_url: %s' % datafile_download_url)
    datafile_filename = dataverse_info_dict.get('datafile_label', '')

    img_temp = NamedTemporaryFile(delete=True)

    try:
        img_temp.write(urllib2.urlopen(datafile_download_url).read())
    except urllib2.HTTPError as e:
        tabular_info.delete() # clear tabular info
        err_msg = 'Failed to download tabular file. HTTPError: %s \n\nurl: %s' % (str(e), datafile_download_url)
        return False, ErrResultMsg(None, err_msg)
    img_temp.flush()

    tabular_info.dv_file.save(datafile_filename, File(img_temp))
    tabular_info.save()
    add_worldmap_layerinfo_if_exists(tabular_info)

    return True, tabular_info.md5
Example #13
0
def get_shapefile_from_dv_api_info(dv_session_token, dv_info_dict):
    """Using Dataverse API information, create a "ShapefileInfo" object.
    This function should only result in successful responses.

    return True/False, shp_md5 or ErrResultMsg

    Examples:  True, md5 from ShapefileInfo
               False,  ErrResultMsg

    To do: Make this into a separate class
    """
    assert dv_session_token is not None, "dv_session_token cannot be None"
    assert type(dv_info_dict) is dict, "dv_info_dict must be type 'dict'"

    #------------------------------
    # (1) Validate the data (DataverseInfoValidationForm)
    #------------------------------
    validation_form = DataverseInfoValidationForm(dv_info_dict)
    if not validation_form.is_valid():
        errs = [ '%s: %s' % (k, v) for k,v in validation_form.errors.items()]
        LOGGER.debug(errs)
        form_errs = '\n'.join(errs)
        return False, ErrResultMsg(None, form_errs)


    #-------------------------------------------------
    # (2) Check if this is a Registered Dataverse
    #-------------------------------------------------
    registered_dataverse = find_registered_dataverse(dv_info_dict['return_to_dataverse_url'])
    if registered_dataverse is None:
        return False, ErrResultMsg(FAILED_NOT_A_REGISTERED_DATAVERSE\
                        , "This dataverse url was not recognized: %s" % dv_info_dict['return_to_dataverse_url']\
                    )

    #-------------------------------------------------
    # (3) Look for existing ShapefileInfo objects in the database
    #    ShapefileInfo objects are routinely deleted, but if file is already here, use it
    #       * todo: check for staleness, if the data is old delete it
    #-------------------------------------------------
    LOGGER.debug('(3) Look for existing ShapefileInfo objects in the database')
    params_for_existing_check = dict(datafile_id=dv_info_dict.get('datafile_id', -1),\
        dataverse_installation_name=dv_info_dict.get('dataverse_installation_name', -1),\
        )

    existing_sets = ShapefileInfo.objects.filter(**params_for_existing_check\
                                ).values_list('id', flat=True\
                                ).order_by('created')

    existing_shapefile_info_ids = list(existing_sets)

    #-------------------------------------------------
    # add dv_session_token and registered_dataverse to dv_info_dict
    #-------------------------------------------------
    dv_info_dict['dv_session_token'] = dv_session_token
    dv_info_dict['registered_dataverse'] = registered_dataverse

    #------------------------------
    # (4) Existing ShapefileInfo(s) found:
    #  (a) Update the ShapefileInfo object
    #  (b) Delete other ShapefileInfo objects for this datafile and user
    #  (c) Return the md5
    #------------------------------
    if len(existing_shapefile_info_ids) > 1:

        # pop the last ShapefileInfo id off the list of existing_shapefile_info_ids
        shp_id = existing_shapefile_info_ids.pop()

        # delete the rest
        if len(existing_sets) > 0:
            ShapefileInfo.objects.filter(id__in=existing_shapefile_info_ids).delete()   # delete older ShapefileInfo(s)


    #------------------------------
    # (5) Get or create a new ShapefileInfo object
    #------------------------------
    LOGGER.debug('(5) Get or create a new ShapefileInfo object')
    try:
        # Existing ShapefileInfo:
        #   (1) Assume file is already saved
        #   (2) update the data
        #
        shapefile_info = ShapefileInfo.objects.get(**params_for_existing_check)

        for key, value in dv_info_dict.iteritems():
            setattr(shapefile_info, key, value)

        # Save
        shapefile_info.save()
        LOGGER.debug('shapefile info updated')
        LOGGER.debug('shapefile_info: %s' % shapefile_info.id)

        # If the file is still available, return it
        if shapefile_info.is_dv_file_available():
            return True, shapefile_info.md5
        else:
            # But the file isn't there!!  Delete ShapefileInfo and make a new one
            shapefile_info.delete()

    except ShapefileInfo.DoesNotExist:
        pass
    #except:
    #    msg('Failed to retrieve an existing ShapefileInfo object -- so create a new one')
    #    #return False, ErrResultMsg(None, 'Failed to retrieve an existing ShapefileInfo object')

    LOGGER.debug('Create a new file')

    #------------------------------
    # New shapefile info, create object and attach file
    #------------------------------

    shapefile_info = ShapefileInfo(**dv_info_dict)
    shapefile_info.save()

    #------------------------------
    # Download and attach file
    #------------------------------
    datafile_download_url = dv_info_dict.get('datafile_download_url', '')

    # Add session token.  Gives permission to download/retrieve the file
    #   - http://localhost:8080/api/access/datafile/FILEID?key=YOURAPIKEY
    #
    datafile_download_url = '%s?key=%s' % (datafile_download_url, dv_session_token)
    LOGGER.debug('datafile_download_url: %s' % datafile_download_url)
    datafile_filename = dv_info_dict.get('datafile_label', '')

    tmp_shapefile = NamedTemporaryFile(delete=True)

    try:
        tmp_shapefile.write(urllib2.urlopen(datafile_download_url).read())
    except urllib2.HTTPError as e:
        shapefile_info.delete() # clear shapefile
        err_msg = 'Failed to download shapefile. HTTPError: %s \n\nurl: %s' % (str(e), datafile_download_url)
        return False, ErrResultMsg(None, err_msg)

    tmp_shapefile.flush()

    shapefile_info.dv_file.save(datafile_filename, File(tmp_shapefile))
    shapefile_info.save()

    return True, shapefile_info.md5