Exemple #1
0
    def remove_stale_data(self, stale_age_in_seconds=None):
        """Main method called for running stale data removal process"""
        if stale_age_in_seconds is None:
            stale_age_in_seconds = settings.STALE_DATA_SECONDS_TO_EXPIRATION

        assert isinstance(stale_age_in_seconds, int),\
            'stale_age_in_seconds must be an int'

        msgt(("Remove stale WorldMap data"
              "\n(older than %s seconds)") % stale_age_in_seconds)

        # Reset object counters
        self.num_objects_checked = 0
        self.num_objects_removed = 0

        # Remove Geoconnect objects
        self.remove_geoconnect_objects(stale_age_in_seconds)
        self.remove_s3_data(stale_age_in_seconds, len(GEOCONNECT_OBJECTS_TO_CHECK)+1)

        # Remove older JoinTarget information retrieved from the WorldMap
        self.remove_old_join_target_information(len(GEOCONNECT_OBJECTS_TO_CHECK)+2)

        # Add message notes
        self.add_message_title_line(' -- Final counts  --')
        self.add_message_line("Count of objects Checked: %s" % self.num_objects_checked)
        self.add_message_line("Count of objects Removed: %s" % self.num_objects_removed)
Exemple #2
0
    def send_email_notice(self):
        """Send email notice to settings.ADMINS"""
        msgt('Send email notice!')

        subject = 'GeoConnect: Clear stale data (%s)' % timezone.now()

        self.add_message_line(\
                'This is an email notice from Geoconnect',
                prepend=True)
        self.add_message_title_line('(end of message)')

        if len(settings.ADMINS) == 0:
            msg('No one to email! (no one in settings.ADMINS)')
            return

        to_addresses = [x[1] for x in settings.ADMINS]
        if len(settings.ADMINS) == 0:
            msg('No one to email! (no one in settings.ADMINS)')
            return

        #email_msg = render_to_string('task_scripts/prune_scratch_directories_email.txt', d)
        #msg(subject)
        #msg(email_msg)
        from_email = to_addresses[0]
        email_msg = '\n'.join(self.message_lines)

        send_mail(subject, email_msg,
                  from_email, to_addresses,
                  fail_silently=False)

        msg('email sent to: %s' % to_addresses)
    def test_02_col_names_load(self):
        msgt(self.test_02_col_names_load.__doc__)

        # grab a tabular file obj
        tab_file_info = TabularFileInfo.objects.get(
            pk=14)  # Election precinct test

        # --------------------------------------------
        #  Attach actual file -- path from fixture is not correct
        # --------------------------------------------
        cbg_filepath = join(dirname(__file__), 'input',
                            'CBG_Annual_and_Longitudinal_Measures.tab')
        tab_file_info.dv_file.save(\
                        'CBG_Annual_and_Longitudinal_Measures',
                        File(open(cbg_filepath, 'r')),
                        save=False)

        # clear out the column info
        tab_file_info.column_names = None
        tab_file_info.save()

        # re-run column info
        tab_file_stats = TabFileStats.create_tab_stats_from_tabular_info(
            tab_file_info)
        self.assertTrue(not tab_file_stats.has_error())

        # Make sure num_rows and num_columns are the same
        self.assertEqual(tab_file_stats.tabular_info.num_rows, 554)
        self.assertEqual(tab_file_stats.tabular_info.num_columns, 49)

        # Is column_names a python list?
        self.assertEqual(type(tab_file_info.column_names), list)

        # Are the column_names correct?
        expected_colnames = [
            'BG_ID_10', 'DisSens_2010', 'PublicDenigration_2010',
            'PrivateNeglect_2010', 'Housing_2010', 'UncivilUse_2010',
            'BigBuild_2010', 'Trash_2010', 'Graffiti_2010', 'DisSens_2011',
            'PublicDenigration_2011', 'PrivateNeglect_2011', 'Housing_2011',
            'UncivilUse_2011', 'BigBuild_2011', 'Trash_2011', 'Graffiti_2011',
            'DisSens_2012', 'PublicDenigration_2012', 'PrivateNeglect_2012',
            'Housing_2012', 'UncivilUse_2012', 'BigBuild_2012', 'Trash_2012',
            'Graffiti_2012', 'DisSens_2013', 'PublicDenigration_2013',
            'PrivateNeglect_2013', 'Housing_2013', 'UncivilUse_2013',
            'BigBuild_2013', 'Trash_2013', 'Graffiti_2013', 'DisSens_2014',
            'PublicDenigration_2014', 'PrivateNeglect_2014', 'Housing_2014',
            'UncivilUse_2014', 'BigBuild_2014', 'Trash_2014', 'Graffiti_2014',
            'DisSens_long', 'PublicDenigration_long', 'PrivateNeglect_long',
            'Housing_long', 'UncivilUse_long', 'BigBuild_long', 'Trash_long',
            'Graffiti_long'
        ]
        self.assertEqual(tab_file_info.column_names, expected_colnames)
Exemple #4
0
    def test_target_read_functions(self):
        """Check expected format type functions including:
        - is_target_column_string()
        - requires_zero_padding()
        - get_zero_pad_length()
        - does_join_column_potentially_need_formatting()
        """
        msgt(self.test_target_read_functions.__doc__)

        j = JoinTargetInformation(name='test',
                                  target_info=self.join_targets_json)
        j.save()

        self.assertEqual(type(j.target_info), dict)

        cnt = 0
        for info in j.target_info['data']:
            target_info = SingleJoinTargetInfo(info)
            cnt += 1
            #target_info.show()
            if target_info.target_layer_name == 'geonode:census_tracts_2010_boston_6f6':
                msg('a) checking target: %s' % (target_info.target_layer_name))
                self.assertEqual(target_info.is_target_column_string(), True)
                self.assertEqual(
                    target_info.does_join_column_potentially_need_formatting(),
                    True)
            elif target_info.target_layer_name == 'geonode:us_zip_code_2015_boston_v3q':
                msg('b) checking target: %s' % (target_info.target_layer_name))
                self.assertEqual(target_info.is_target_column_string(), True)
                self.assertEqual(target_info.requires_zero_padding(), True)
                self.assertEqual(target_info.get_zero_pad_length(), 5)
                self.assertEqual(
                    target_info.does_join_column_potentially_need_formatting(),
                    True)
            elif target_info.name == 'Roads, Boston':
                msg('c) checking target: %s' % (target_info.target_layer_name))
                self.assertEqual(target_info.is_target_column_string(), False)
                self.assertEqual(target_info.requires_zero_padding(), False)
                self.assertEqual(target_info.get_zero_pad_length(), None)
                self.assertEqual(
                    target_info.does_join_column_potentially_need_formatting(),
                    False)
    def test_01_initial_join(self):
        """Using WorldMap successful JSON response to test "build_from_worldmap_json"""
        msgt(self.test_01_initial_join.__doc__)

        tab_file_info = TabularFileInfo.objects.get(
            pk=15)  # Election precinct test

        # --------------------------------------------
        #  Attach actual file -- path from fixture is not correct
        # --------------------------------------------
        elect_filepath = join(dirname(__file__), 'input',
                              'election_precincts2.csv')
        tab_file_info.dv_file.save(\
                        'election_precincts2.csv',
                        File(open(elect_filepath, 'r')),
                        save=False)

        self.assertEqual(tab_file_info.id, 15)

        # ------------------------------------------
        # Fail by passing a string instead of JSON
        # ------------------------------------------
        tab_map_info = WorldMapTabularLayerInfo.build_from_worldmap_json(
            tab_file_info, self.json_join_data_string)
        self.assertEqual(tab_map_info, None)

        # ------------------------------------------
        # Load successful info
        # ------------------------------------------
        tab_map_info = WorldMapTabularLayerInfo.build_from_worldmap_json(\
                            tab_file_info,\
                            json.loads(self.json_join_data_string))
        self.assertTrue(tab_map_info.id is not None)

        # ------------------------------------------
        # Make sure data loading as expected
        # ------------------------------------------
        self.assertEqual(type(tab_map_info.core_data), dict)
        self.assertEqual(type(tab_map_info.attribute_data), list)
        self.assertEqual(type(tab_map_info.download_links), dict)
Exemple #6
0
    def test_target_read_functions(self):
        """Check expected format type functions including:
        - is_target_column_string()
        - requires_zero_padding()
        - get_zero_pad_length()
        - does_join_column_potentially_need_formatting()
        """
        msgt(self.test_target_read_functions.__doc__)


        j = JoinTargetInformation(name='test', target_info=self.join_targets_json)
        j.save()

        self.assertEqual(type(j.target_info), dict)

        cnt = 0
        for info in j.target_info['data']:
            target_info = SingleJoinTargetInfo(info)
            cnt += 1
            #target_info.show()
            if target_info.target_layer_name == 'geonode:census_tracts_2010_boston_6f6':
                msg('a) checking target: %s' % (target_info.target_layer_name))
                self.assertEqual(target_info.is_target_column_string(), True)
                self.assertEqual(target_info.does_join_column_potentially_need_formatting(), True)
            elif target_info.target_layer_name == 'geonode:us_zip_code_2015_boston_v3q':
                msg('b) checking target: %s' % (target_info.target_layer_name))
                self.assertEqual(target_info.is_target_column_string(), True)
                self.assertEqual(target_info.requires_zero_padding(), True)
                self.assertEqual(target_info.get_zero_pad_length(), 5)
                self.assertEqual(target_info.does_join_column_potentially_need_formatting(), True)
            elif target_info.name == 'Roads, Boston':
                msg('c) checking target: %s' % (target_info.target_layer_name))
                self.assertEqual(target_info.is_target_column_string(), False)
                self.assertEqual(target_info.requires_zero_padding(), False)
                self.assertEqual(target_info.get_zero_pad_length(), None)
                self.assertEqual(target_info.does_join_column_potentially_need_formatting(), False)
 def test_03_test_static_method(self):
     msgt(self.test_03_test_static_method.__doc__)
Exemple #8
0
def view_delete_tabular_map(request):
    """
    Attempt to delete a dataverse-created WorldMap layer
    """
    if not request.POST:
        raise Http404('Delete Not Found.')

    d = get_common_lookup(request)
    d['WORLDMAP_SERVER_URL'] = settings.WORLDMAP_SERVER_URL
    d['DATAVERSE_SERVER_URL'] = settings.DATAVERSE_SERVER_URL

    d['page_title'] = PANEL_TITLE_DELETE_MAP
    d['IS_DELETE_PAGE'] = True
    # Check the delete request
    f = DeleteMapForm(request.POST)

    if not f.is_valid():
        d['ERROR_FOUND'] = True
        d['FAILED_TO_VALIDATE'] = True
        return render(request, 'worldmap_layers/view_delete_layer.html', d)

    # Form params look good
    worldmap_layer_info = f.get_worldmap_layer_info()
    if not worldmap_layer_info:
        raise Http404('WorldMap Layer info no longer available')

    # depending on the type: tabular_info, shapefile_info, etc
    #
    if worldmap_layer_info.is_shapefile_layer():
        d['is_shapefile_layer'] = True
    else:
        d['is_tabular_layer'] = True

    gis_data_info = worldmap_layer_info.get_gis_data_info()

    d['gis_data_info'] = gis_data_info

    # -----------------------------------
    # Delete map from WorldMap
    # -----------------------------------
    flag_delete_local_worldmap_info = False

    (success, err_msg_or_None) = delete_map_layer(gis_data_info,
                                                  worldmap_layer_info)
    if success is False:
        LOGGER.error("Failed to delete WORLDMAP layer: %s", err_msg_or_None)

        if err_msg_or_None and err_msg_or_None.find(
                '"Existing layer not found."') > -1:
            pass
        else:
            d['ERROR_FOUND'] = True
            d['WORLDMAP_DATA_DELETE_FAILURE'] = True
            d['ERR_MSG'] = err_msg_or_None
            return render(request, 'worldmap_layers/view_delete_layer.html', d)
    else:
        # At this point, the layer no longer exists on WorldMap,
        # set a flag to delete it from geoconnect, even if the Dataverse
        # delete fails
        flag_delete_local_worldmap_info = True

    # -----------------------------------
    # Delete metadata from dataverse
    # -----------------------------------

    (success2, err_msg_or_None2
     ) = MetadataUpdater.delete_dataverse_map_metadata(worldmap_layer_info)

    # Delete the Geoconnect WorldMap info -- regardless of
    # whether the data was removed from Dataverse
    if flag_delete_local_worldmap_info:
        msgt('Delete worldmap_layer_info: %s' % worldmap_layer_info)
        worldmap_layer_info.delete()

    if success2 is False:
        LOGGER.error("Failed to delete Map Metadata from Dataverse: %s",
                     err_msg_or_None)

        d['ERROR_FOUND'] = True
        d['DATAVERSE_DATA_DELETE_FAILURE'] = True
        d['ERR_MSG'] = err_msg_or_None2

        return render(request, 'worldmap_layers/view_delete_layer.html', d)

    d['DELETE_SUCCESS'] = True
    d['page_title'] = PANEL_TITLE_REMAP

    return render(request, 'worldmap_layers/view_delete_layer.html', d)
Exemple #9
0
def get_tabular_file_from_dv_api_info(dv_session_token, dataverse_info_dict):
    """Using Dataverse API information, create a :model:`gis_tabular.TabularFileInfo' object.
    This function should only return successful responses.

    return True/False, shp_md5 or ErrResultMsg

    Examples:  True, md5 from TabularFileInfo
               False,  ErrResultMsg
    """
    assert dv_session_token is not None, "dv_session_token cannot be None"
    assert type(dataverse_info_dict) is dict,\
        "dataverse_info_dict must be type 'dict'"

    msgt('dataverse_info_dict: {0}'.format(dataverse_info_dict))
    #------------------------------
    # (1) Validate the data (DataverseInfoValidationForm)
    #------------------------------
    #dataverse_info_dict.update({'datafile_id':None})   # for testing
    validation_form = DataverseInfoValidationForm(dataverse_info_dict)
    if not validation_form.is_valid():
        errs = ['%s: %s' % (k, v) for k, v in validation_form.errors.items()]
        LOGGER.debug('errors: %s', errs)
        form_errs = '\n'.join(errs)
        return False, ErrResultMsg(None, form_errs)

    #-------------------------------------------------
    # (2) Check if this is a Registered Dataverse
    #-------------------------------------------------
    registered_dataverse = find_registered_dataverse(
        dataverse_info_dict['return_to_dataverse_url'])
    if registered_dataverse is None:
        return False, ErrResultMsg(\
                    FAILED_NOT_A_REGISTERED_DATAVERSE,
                    "This dataverse url was not recognized: %s" %\
                    dataverse_info_dict['return_to_dataverse_url'])

    #-------------------------------------------------
    # (3b) Look for existing Dataverse files in the database
    #    ShapefileInfo and TabularFileInfo objects are routinely
    #    deleted, but if file is already here, use it
    #-------------------------------------------------
    params_for_existing_check = dict(datafile_id=dataverse_info_dict.get('datafile_id', -1)\
                                    , dataverse_installation_name=dataverse_info_dict.get('dataverse_installation_name', -1)\
                                    )

    existing_sets = TabularFileInfo.objects.filter(**params_for_existing_check\
                                ).values_list('id', flat=True\
                                ).order_by('created')

    existing_tabular_info_ids = list(existing_sets)
    msgt('existing_tabular_info_ids: %s' % existing_tabular_info_ids)

    #-------------------------------------------------
    # add dv_session_token and registered_dataverse to dataverse_info_dict
    #-------------------------------------------------
    dataverse_info_dict['dv_session_token'] = dv_session_token
    dataverse_info_dict['registered_dataverse'] = registered_dataverse

    #------------------------------
    # (4) Existing TabularFileInfo(s) found:
    #  (a) Update the TabularFileInfo object
    #  (b) Delete other groups TabularFileInfo object for this datafile and user
    #  (c) Return the md5
    #------------------------------
    if len(existing_tabular_info_ids) > 1:

        # pop the last TabularFileInfo id off the list of existing_tabular_info_ids
        shp_id = existing_tabular_info_ids.pop()

        # delete the rest
        if len(existing_sets) > 0:
            # delete older TabularFileInfo objects
            TabularFileInfo.objects.filter(
                id__in=existing_tabular_info_ids).delete()

    #------------------------------
    # (5) Get or create a new TabularFileInfo object
    #------------------------------
    msgt('(5) Get or create a new TabularFileInfo object')
    try:
        # Existing TabularFileInfo:
        #   (1) Assume file is already saved
        #   (2) update the data
        #
        tabular_info = TabularFileInfo.objects.get(**params_for_existing_check)

        for key, value in dataverse_info_dict.iteritems():
            if key == 'column_names':
                tabular_info.add_column_names(value)
            else:
                setattr(tabular_info, key, value)

        # Save
        tabular_info.save()
        msg('tabular_info info saved')

        # If the file is still available, return it
        if tabular_info.is_dv_file_available():
            add_worldmap_layerinfo_if_exists(tabular_info)
            return True, tabular_info.md5
        else:
            # But the file isn't there!!  Delete TabularFileInfo and make a new one
            tabular_info.delete()

    except TabularFileInfo.DoesNotExist:
        pass
    #except:
    #    msg('Failed to retrieve an existing ShapefileInfo object -- so create a new one')
    #    #return False, ErrResultMsg(None, 'Failed to retrieve an existing ShapefileInfo object')

    msg('new file')

    #------------------------------
    # New tabular_info, create object and attach file
    #------------------------------

    # Add name parameter
    dataverse_info_dict['name'] = dataverse_info_dict.get(
        'datafile_label', '(no datafile_label found)')
    tabular_info = TabularFileInfo(**dataverse_info_dict)
    tabular_info.save()

    #------------------------------
    # Download and attach file
    #------------------------------
    datafile_download_url = dataverse_info_dict.get('datafile_download_url',
                                                    '')

    # Add session token.  Gives permission to download/retrieve the file
    #   - http://localhost:8080/api/access/datafile/FILEID?key=YOURAPIKEY
    #
    datafile_download_url = '%s?key=%s' % (datafile_download_url,
                                           dv_session_token)
    msg('datafile_download_url: %s' % datafile_download_url)
    datafile_filename = dataverse_info_dict.get('datafile_label', '')

    img_temp = NamedTemporaryFile(delete=True)

    try:
        img_temp.write(urllib2.urlopen(datafile_download_url).read())
    except urllib2.HTTPError as e:
        tabular_info.delete()  # clear tabular info
        err_msg = 'Failed to download tabular file. HTTPError: %s \n\nurl: %s' % (
            str(e), datafile_download_url)
        return False, ErrResultMsg(None, err_msg)
    img_temp.flush()

    tabular_info.dv_file.save(datafile_filename, File(img_temp))
    tabular_info.save()
    add_worldmap_layerinfo_if_exists(tabular_info)

    return True, tabular_info.md5
    def test_01_update_dataverse_metadata(self):
        """Test Dataverse "update metadata" url endpoint. Only testing
        fail conditions, e.g. can't contact server, etc."""
        msgt(self.test_01_update_dataverse_metadata.__doc__)

        tab_file_info = TabularFileInfo.objects.get(
            pk=15)  # Election precinct test

        # --------------------------------------------
        #  Attach actual file -- path from fixture is not correct
        # --------------------------------------------
        elect_filepath = join(dirname(__file__), 'input',
                              'election_precincts2.csv')
        tab_file_info.dv_file.save(\
                        'election_precincts2.csv',
                        File(open(elect_filepath, 'r')),
                        save=False)

        self.assertEqual(tab_file_info.id, 15)

        # ------------------------------------------
        # Load successful info
        # ------------------------------------------
        tab_map_info = WorldMapTabularLayerInfo.build_from_worldmap_json(\
                            tab_file_info,\
                            json.loads(self.json_join_data_string))
        self.assertTrue(tab_map_info.id is not None)

        # ------------------------------------------
        # Make sure data loading as expected
        # ------------------------------------------
        self.assertEqual(type(tab_map_info.core_data), dict)
        self.assertEqual(type(tab_map_info.attribute_data), list)
        self.assertEqual(type(tab_map_info.download_links), dict)

        # ------------------------------------------
        # Send message to non-existent server
        # ------------------------------------------
        msgt('Send message to non-existent server')
        url_non_existent = 'https://nope.dataverse.harvard.edu'

        success, resp_dict = MetadataUpdater.update_dataverse_with_metadata(\
                                    tab_map_info,
                                    url_non_existent)

        self.assertEqual(success, False)
        self.assertTrue(resp_dict['message'].startswith(\
                        ERROR_DV_NO_SERVER_RESPONSE))

        # ------------------------------------------
        # Send message to server without an endpoint
        # ------------------------------------------
        msgt('Send message to server without an endpoint')
        url_no_endpoint = 'http://www.harvard.edu'

        success, resp_dict = MetadataUpdater.update_dataverse_with_metadata(\
                                    tab_map_info,
                                    url_no_endpoint)

        self.assertEqual(success, False)
        self.assertTrue(resp_dict['message'].startswith(\
                        ERROR_DV_PAGE_NOT_FOUND))

        # ------------------------------------------
        # No token in request to Dataverse
        # ------------------------------------------
        msgt(('No token in request to Dataverse'
              ' (requires working endpoint at https://dataverse.harvard.edu)'))
        url_no_endpoint = 'https://dataverse.harvard.edu'

        success, resp_dict = MetadataUpdater.update_dataverse_with_metadata(\
                                    tab_map_info,
                                    url_no_endpoint)

        self.assertEqual(success, False)
        self.assertEqual(resp_dict['message'],
                         'Token not found in JSON request.')
Exemple #11
0
def get_tabular_file_from_dv_api_info(dv_session_token, dataverse_info_dict):
    """Using Dataverse API information, create a :model:`gis_tabular.TabularFileInfo' object.
    This function should only return successful responses.

    return True/False, shp_md5 or ErrResultMsg

    Examples:  True, md5 from TabularFileInfo
               False,  ErrResultMsg
    """
    assert dv_session_token is not None, "dv_session_token cannot be None"
    assert type(dataverse_info_dict) is dict,\
        "dataverse_info_dict must be type 'dict'"

    msgt('dataverse_info_dict: {0}'.format(dataverse_info_dict))
    #------------------------------
    # (1) Validate the data (DataverseInfoValidationForm)
    #------------------------------
    #dataverse_info_dict.update({'datafile_id':None})   # for testing
    validation_form = DataverseInfoValidationForm(dataverse_info_dict)
    if not validation_form.is_valid():
        errs = ['%s: %s' % (k, v) for k,v in validation_form.errors.items()]
        LOGGER.debug('errors: %s', errs)
        form_errs = '\n'.join(errs)
        return False, ErrResultMsg(None, form_errs)


    #-------------------------------------------------
    # (2) Check if this is a Registered Dataverse
    #-------------------------------------------------
    registered_dataverse = find_registered_dataverse(dataverse_info_dict['return_to_dataverse_url'])
    if registered_dataverse is None:
        return False, ErrResultMsg(\
                    FAILED_NOT_A_REGISTERED_DATAVERSE,
                    "This dataverse url was not recognized: %s" %\
                    dataverse_info_dict['return_to_dataverse_url'])

    #-------------------------------------------------
    # (3b) Look for existing Dataverse files in the database
    #    ShapefileInfo and TabularFileInfo objects are routinely
    #    deleted, but if file is already here, use it
    #-------------------------------------------------
    params_for_existing_check = dict(datafile_id=dataverse_info_dict.get('datafile_id', -1)\
                                    , dataverse_installation_name=dataverse_info_dict.get('dataverse_installation_name', -1)\
                                    )

    existing_sets = TabularFileInfo.objects.filter(**params_for_existing_check\
                                ).values_list('id', flat=True\
                                ).order_by('created')

    existing_tabular_info_ids = list(existing_sets)
    msgt('existing_tabular_info_ids: %s' % existing_tabular_info_ids)

    #-------------------------------------------------
    # add dv_session_token and registered_dataverse to dataverse_info_dict
    #-------------------------------------------------
    dataverse_info_dict['dv_session_token'] = dv_session_token
    dataverse_info_dict['registered_dataverse'] = registered_dataverse

    #------------------------------
    # (4) Existing TabularFileInfo(s) found:
    #  (a) Update the TabularFileInfo object
    #  (b) Delete other groups TabularFileInfo object for this datafile and user
    #  (c) Return the md5
    #------------------------------
    if len(existing_tabular_info_ids) > 1:

        # pop the last TabularFileInfo id off the list of existing_tabular_info_ids
        shp_id = existing_tabular_info_ids.pop()

        # delete the rest
        if len(existing_sets) > 0:
            # delete older TabularFileInfo objects
            TabularFileInfo.objects.filter(id__in=existing_tabular_info_ids).delete()


    #------------------------------
    # (5) Get or create a new TabularFileInfo object
    #------------------------------
    msgt('(5) Get or create a new TabularFileInfo object')
    try:
        # Existing TabularFileInfo:
        #   (1) Assume file is already saved
        #   (2) update the data
        #
        tabular_info = TabularFileInfo.objects.get(**params_for_existing_check)

        for key, value in dataverse_info_dict.iteritems():
            if key == 'column_names':
                tabular_info.add_column_names(value)
            else:
                setattr(tabular_info, key, value)

        # Save
        tabular_info.save()
        msg('tabular_info info saved')

        # If the file is still available, return it
        if tabular_info.is_dv_file_available():
            add_worldmap_layerinfo_if_exists(tabular_info)
            return True, tabular_info.md5
        else:
            # But the file isn't there!!  Delete TabularFileInfo and make a new one
            tabular_info.delete()

    except TabularFileInfo.DoesNotExist:
        pass
    #except:
    #    msg('Failed to retrieve an existing ShapefileInfo object -- so create a new one')
    #    #return False, ErrResultMsg(None, 'Failed to retrieve an existing ShapefileInfo object')

    msg('new file')

    #------------------------------
    # New tabular_info, create object and attach file
    #------------------------------

    # Add name parameter
    dataverse_info_dict['name'] = dataverse_info_dict.get('datafile_label', '(no datafile_label found)')
    tabular_info = TabularFileInfo(**dataverse_info_dict)
    tabular_info.save()

    #------------------------------
    # Download and attach file
    #------------------------------
    datafile_download_url = dataverse_info_dict.get('datafile_download_url', '')

    # Add session token.  Gives permission to download/retrieve the file
    #   - http://localhost:8080/api/access/datafile/FILEID?key=YOURAPIKEY
    #
    datafile_download_url = '%s?key=%s' % (datafile_download_url, dv_session_token)
    msg('datafile_download_url: %s' % datafile_download_url)
    datafile_filename = dataverse_info_dict.get('datafile_label', '')

    img_temp = NamedTemporaryFile(delete=True)

    try:
        img_temp.write(urllib2.urlopen(datafile_download_url).read())
    except urllib2.HTTPError as e:
        tabular_info.delete() # clear tabular info
        err_msg = 'Failed to download tabular file. HTTPError: %s \n\nurl: %s' % (str(e), datafile_download_url)
        return False, ErrResultMsg(None, err_msg)
    img_temp.flush()

    tabular_info.dv_file.save(datafile_filename, File(img_temp))
    tabular_info.save()
    add_worldmap_layerinfo_if_exists(tabular_info)

    return True, tabular_info.md5
Exemple #12
0
def get_shapefile_from_dv_api_info(dv_session_token, dv_info_dict):
    """Using Dataverse API information, create a "ShapefileInfo" object.
    This function should only result in successful responses.

    return True/False, shp_md5 or ErrResultMsg

    Examples:  True, md5 from ShapefileInfo
               False,  ErrResultMsg

    To do: Make this into a separate class
    """
    assert dv_session_token is not None, "dv_session_token cannot be None"
    assert type(dv_info_dict) is dict, "dv_info_dict must be type 'dict'"

    #------------------------------
    # (1) Validate the data (DataverseInfoValidationForm)
    #------------------------------
    validation_form = DataverseInfoValidationForm(dv_info_dict)
    if not validation_form.is_valid():
        errs = ['%s: %s' % (k, v) for k, v in validation_form.errors.items()]
        print(errs)
        form_errs = '\n'.join(errs)
        return False, ErrResultMsg(None, form_errs)

    #-------------------------------------------------
    # (2) Check if this is a Registered Dataverse
    #-------------------------------------------------
    registered_dataverse = find_registered_dataverse(
        dv_info_dict['return_to_dataverse_url'])
    if registered_dataverse is None:
        return False, ErrResultMsg(FAILED_NOT_A_REGISTERED_DATAVERSE\
                        , "This dataverse url was not recognized: %s" % dv_info_dict['return_to_dataverse_url']\
                    )

    #-------------------------------------------------
    # (3) Look for existing ShapefileInfo objects in the database
    #    ShapefileInfo objects are routinely deleted, but if file is already here, use it
    #       * todo: check for staleness, if the data is old delete it
    #-------------------------------------------------
    params_for_existing_check = dict(datafile_id=dv_info_dict.get('datafile_id', -1),\
        dataverse_installation_name=dv_info_dict.get('dataverse_installation_name', -1),\
        )

    existing_sets = ShapefileInfo.objects.filter(**params_for_existing_check\
                                ).values_list('id', flat=True\
                                ).order_by('created')

    existing_shapefile_info_ids = list(existing_sets)
    msgt('existing_shapefile_info_ids: %s' % existing_shapefile_info_ids)

    #-------------------------------------------------
    # add dv_session_token and registered_dataverse to dv_info_dict
    #-------------------------------------------------
    dv_info_dict['dv_session_token'] = dv_session_token
    dv_info_dict['registered_dataverse'] = registered_dataverse

    #------------------------------
    # (4) Existing ShapefileInfo(s) found:
    #  (a) Update the ShapefileInfo object
    #  (b) Delete other ShapefileInfo objects for this datafile and user
    #  (c) Return the md5
    #------------------------------
    if len(existing_shapefile_info_ids) > 1:

        # pop the last ShapefileInfo id off the list of existing_shapefile_info_ids
        shp_id = existing_shapefile_info_ids.pop()

        # delete the rest
        if len(existing_sets) > 0:
            ShapefileInfo.objects.filter(
                id__in=existing_shapefile_info_ids).delete(
                )  # delete older ShapefileInfo(s)

    #------------------------------
    # (5) Get or create a new ShapefileInfo object
    #------------------------------
    msgt('(5) Get or create a new ShapefileInfo object')
    try:
        # Existing ShapefileInfo:
        #   (1) Assume file is already saved
        #   (2) update the data
        #
        shapefile_info = ShapefileInfo.objects.get(**params_for_existing_check)

        for key, value in dv_info_dict.iteritems():
            setattr(shapefile_info, key, value)

        # Save
        shapefile_info.save()
        msg('shapefile info saved')

        # If the file is still available, return it
        if shapefile_info.is_dv_file_available():
            return True, shapefile_info.md5
        else:
            # But the file isn't there!!  Delete ShapefileInfo and make a new one
            shapefile_info.delete()

    except ShapefileInfo.DoesNotExist:
        pass
    #except:
    #    msg('Failed to retrieve an existing ShapefileInfo object -- so create a new one')
    #    #return False, ErrResultMsg(None, 'Failed to retrieve an existing ShapefileInfo object')

    msg('new file')

    #------------------------------
    # New shapefile info, create object and attach file
    #------------------------------

    shapefile_info = ShapefileInfo(**dv_info_dict)
    shapefile_info.save()

    #------------------------------
    # Download and attach file
    #------------------------------
    datafile_download_url = dv_info_dict.get('datafile_download_url', '')

    # Add session token.  Gives permission to download/retrieve the file
    #   - http://localhost:8080/api/access/datafile/FILEID?key=YOURAPIKEY
    #
    datafile_download_url = '%s?key=%s' % (datafile_download_url,
                                           dv_session_token)
    msg('datafile_download_url: %s' % datafile_download_url)
    datafile_filename = dv_info_dict.get('datafile_label', '')

    tmp_shapefile = NamedTemporaryFile(delete=True)

    try:
        tmp_shapefile.write(urllib2.urlopen(datafile_download_url).read())
    except urllib2.HTTPError as e:
        shapefile_info.delete()  # clear shapefile
        err_msg = 'Failed to download shapefile. HTTPError: %s \n\nurl: %s' % (
            str(e), datafile_download_url)
        return False, ErrResultMsg(None, err_msg)

    tmp_shapefile.flush()

    shapefile_info.dv_file.save(datafile_filename, File(tmp_shapefile))
    shapefile_info.save()

    return True, shapefile_info.md5
Exemple #13
0
def view_delete_tabular_map(request):
    """
    Attempt to delete a dataverse-created WorldMap layer
    """
    if not request.POST:
        raise Http404('Delete Not Found.')

    d = get_common_lookup(request)
    d['WORLDMAP_SERVER_URL'] = settings.WORLDMAP_SERVER_URL
    d['DATAVERSE_SERVER_URL'] = settings.DATAVERSE_SERVER_URL

    d['page_title'] = PANEL_TITLE_DELETE_MAP
    d['IS_DELETE_PAGE'] = True
    # Check the delete request
    f = DeleteMapForm(request.POST)

    if not f.is_valid():
        d['ERROR_FOUND'] = True
        d['FAILED_TO_VALIDATE'] = True
        return render(request, 'worldmap_layers/view_delete_layer.html', d)

    # Form params look good
    worldmap_layer_info = f.get_worldmap_layer_info()
    if not worldmap_layer_info:
        raise Http404('WorldMap Layer info no longer available')

    # depending on the type: tabular_info, shapefile_info, etc
    #
    if worldmap_layer_info.is_shapefile_layer():
        d['is_shapefile_layer'] = True
    else:
        d['is_tabular_layer'] = True

    gis_data_info = worldmap_layer_info.get_gis_data_info()

    d['gis_data_info'] = gis_data_info

    # -----------------------------------
    # Delete map from WorldMap
    # -----------------------------------
    flag_delete_local_worldmap_info = False

    (success, err_msg_or_None) = delete_map_layer(gis_data_info, worldmap_layer_info)
    if success is False:
        LOGGER.error("Failed to delete WORLDMAP layer: %s", err_msg_or_None)

        if err_msg_or_None and err_msg_or_None.find('"Existing layer not found."') > -1:
            pass
        else:
            d['ERROR_FOUND'] = True
            d['WORLDMAP_DATA_DELETE_FAILURE'] = True
            d['ERR_MSG'] = err_msg_or_None
            return render(request, 'worldmap_layers/view_delete_layer.html', d)
    else:
        # At this point, the layer no longer exists on WorldMap,
        # set a flag to delete it from geoconnect, even if the Dataverse
        # delete fails
        flag_delete_local_worldmap_info = True

    # -----------------------------------
    # Delete metadata from dataverse
    # -----------------------------------

    (success2, err_msg_or_None2) = MetadataUpdater.delete_dataverse_map_metadata(worldmap_layer_info)

    # Delete the Geoconnect WorldMap info -- regardless of
    # whether the data was removed from Dataverse
    if flag_delete_local_worldmap_info:
        msgt('Delete worldmap_layer_info: %s' % worldmap_layer_info)
        worldmap_layer_info.delete()

    if success2 is False:
        LOGGER.error("Failed to delete Map Metadata from Dataverse: %s", err_msg_or_None)

        d['ERROR_FOUND'] = True
        d['DATAVERSE_DATA_DELETE_FAILURE'] = True
        d['ERR_MSG'] = err_msg_or_None2

        return render(request, 'worldmap_layers/view_delete_layer.html', d)

    d['DELETE_SUCCESS'] = True
    d['page_title'] = PANEL_TITLE_REMAP

    return render(request, 'worldmap_layers/view_delete_layer.html', d)
    def test_01_update_dataverse_metadata(self):
        """Test Dataverse "update metadata" url endpoint. Only testing
        fail conditions, e.g. can't contact server, etc."""
        msgt(self.test_01_update_dataverse_metadata.__doc__)

        tab_file_info = TabularFileInfo.objects.get(pk=15) # Election precinct test

        # --------------------------------------------
        #  Attach actual file -- path from fixture is not correct
        # --------------------------------------------
        elect_filepath = join(dirname(__file__),
                            'input',
                            'election_precincts2.csv')
        tab_file_info.dv_file.save(\
                        'election_precincts2.csv',
                        File(open(elect_filepath, 'r')),
                        save=False)


        self.assertEqual(tab_file_info.id, 15)

        # ------------------------------------------
        # Load successful info
        # ------------------------------------------
        tab_map_info = WorldMapTabularLayerInfo.build_from_worldmap_json(\
                            tab_file_info,\
                            json.loads(self.json_join_data_string))
        self.assertTrue(tab_map_info.id is not None)

        # ------------------------------------------
        # Make sure data loading as expected
        # ------------------------------------------
        self.assertEqual(type(tab_map_info.core_data), dict)
        self.assertEqual(type(tab_map_info.attribute_data), list)
        self.assertEqual(type(tab_map_info.download_links), dict)


        # ------------------------------------------
        # Send message to non-existent server
        # ------------------------------------------
        msgt('Send message to non-existent server')
        url_non_existent = 'https://nope.dataverse.harvard.edu'

        success, resp_dict = MetadataUpdater.update_dataverse_with_metadata(\
                                    tab_map_info,
                                    url_non_existent)

        self.assertEqual(success, False)
        self.assertTrue(resp_dict['message'].startswith(\
                        ERROR_DV_NO_SERVER_RESPONSE))

        # ------------------------------------------
        # Send message to server without an endpoint
        # ------------------------------------------
        msgt('Send message to server without an endpoint')
        url_no_endpoint = 'http://www.harvard.edu'

        success, resp_dict = MetadataUpdater.update_dataverse_with_metadata(\
                                    tab_map_info,
                                    url_no_endpoint)

        self.assertEqual(success, False)
        self.assertTrue(resp_dict['message'].startswith(\
                        ERROR_DV_PAGE_NOT_FOUND))

        # ------------------------------------------
        # No token in request to Dataverse
        # ------------------------------------------
        msgt(('No token in request to Dataverse'
             ' (requires working endpoint at https://dataverse.harvard.edu)'))
        url_no_endpoint = 'https://dataverse.harvard.edu'

        success, resp_dict = MetadataUpdater.update_dataverse_with_metadata(\
                                    tab_map_info,
                                    url_no_endpoint)

        self.assertEqual(success, False)
        self.assertEqual(resp_dict['message'], 'Token not found in JSON request.')