Exemple #1
0
def aip_delete(request, uuid):
    try:
        aip = elasticSearchFunctions.connect_and_get_aip_data(uuid)
        aip_filepath = aip['filePath']
        os.remove(aip_filepath)
        elasticSearchFunctions.delete_aip(uuid)
        elasticSearchFunctions.connect_and_delete_aip_files(uuid)
        return HttpResponseRedirect(reverse('components.archival_storage.views.overview'))
    except:
        raise Http404
Exemple #2
0
def aip_delete(request, uuid):
    try:
        aip = elasticSearchFunctions.connect_and_get_aip_data(uuid)
        aip_filepath = aip['filePath']
        os.remove(aip_filepath)
        elasticSearchFunctions.delete_aip(uuid)
        elasticSearchFunctions.connect_and_delete_aip_files(uuid)
        return HttpResponseRedirect(
            reverse('components.archival_storage.views.overview'))
    except:
        raise Http404
 def test_delete_aip_files(self):
     # Verify AIP exists
     results = self.conn.search(
         index='aips',
         doc_type='aipfile',
         body={'query': {'term': {'AIPUUID': self.aip_uuid}}},
         fields='AIPUUID,FILEUUID',
         sort='FILEUUID:desc',
     )
     assert results['hits']['total'] == 3
     assert results['hits']['hits'][0]['fields']['AIPUUID'] == [self.aip_uuid]
     assert results['hits']['hits'][0]['fields']['FILEUUID'] == ['b8bd3cdd-f224-4237-b0d7-99c217ff8e67']
     assert results['hits']['hits'][1]['fields']['AIPUUID'] == [self.aip_uuid]
     assert results['hits']['hits'][1]['fields']['FILEUUID'] == ['68babd3e-7e6b-40e5-99f6-00ea724d4ce8']
     assert results['hits']['hits'][2]['fields']['AIPUUID'] == [self.aip_uuid]
     assert results['hits']['hits'][2]['fields']['FILEUUID'] == ['547bbd92-d8a0-4624-a9d3-69ba706eacee']
     # Delete AIP
     success = elasticSearchFunctions.connect_and_delete_aip_files(self.aip_uuid)
     # Verify AIP gone
     assert success is True
     results = self.conn.search(
         index='aips',
         doc_type='aipfile',
         body={'query': {'term': {'AIPUUID': self.aip_uuid}}},
         fields='AIPUUID,FILEUUID',
     )
     assert results['hits']['total'] == 0
Exemple #4
0
def index_aip():
    """ Write AIP information to ElasticSearch. """
    sip_uuid = sys.argv[1]  # %SIPUUID%
    sip_name = sys.argv[2]  # %SIPName%
    sip_path = sys.argv[3]  # %SIPDirectory%
    sip_type = sys.argv[4]  # %SIPType%

    # Check if ElasticSearch is enabled
    client_config_path = '/etc/archivematica/MCPClient/clientConfig.conf'
    config = ConfigParser.SafeConfigParser()
    config.read(client_config_path)
    elastic_search_disabled = False
    try:
        elastic_search_disabled = config.getboolean(
            'MCPClient', "disableElasticsearchIndexing")
    except ConfigParser.NoOptionError:
        pass
    if elastic_search_disabled:
        print('Skipping indexing: indexing is currently disabled in', client_config_path)
        return 0

    print('SIP UUID:', sip_uuid)
    aip_info = storage_service.get_file_info(uuid=sip_uuid)
    print('AIP info:', aip_info)
    aip_info = aip_info[0]

    mets_name = 'METS.{}.xml'.format(sip_uuid)
    mets_path = os.path.join(sip_path, mets_name)

    mods_paths = list_mods(sip_path)
    identifiers = []
    for mods in mods_paths:
        identifiers.extend(extract_identifiers_from_mods(mods))

    # If this is an AIC, find the number of AIP stored in it and index that
    aips_in_aic = None
    if sip_type == "AIC":
        try:
            uv = UnitVariable.objects.get(unittype="SIP",
                                          unituuid=sip_uuid,
                                          variable="AIPsinAIC")
            aips_in_aic = uv.variablevalue
        except UnitVariable.DoesNotExist:
            pass

    print('Indexing AIP info')
    # Delete ES index before creating new one if reingesting
    if 'REIN' in sip_type:
        print('Deleting outdated entry for AIP and AIP files with UUID', sip_uuid, 'from archival storage')
        elasticSearchFunctions.delete_aip(sip_uuid)
        elasticSearchFunctions.connect_and_delete_aip_files(sip_uuid)

    # Index AIP
    elasticSearchFunctions.connect_and_index_aip(
        sip_uuid,
        sip_name,
        aip_info['current_full_path'],
        mets_path,
        size=aip_info['size'],
        aips_in_aic=aips_in_aic,
        identifiers=identifiers)

    # Index AIP files
    print('Indexing AIP files')
    # Even though we treat MODS identifiers as SIP-level, we need to index them
    # here because the archival storage tab actually searches on the
    # aips/aipfile index.
    exitCode = elasticSearchFunctions.connect_and_index_files(
        index='aips',
        type='aipfile',
        uuid=sip_uuid,
        pathToArchive=sip_path,
        identifiers=identifiers,
        sipName=sip_name,
    )
    if exitCode == 1:
        print('Error indexing AIP files', file=sys.stderr)
        return 1

    return 0
Exemple #5
0
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Archivematica is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Archivematica.  If not, see <http://www.gnu.org/licenses/>.

# @package Archivematica
# @subpackage archivematicaClientScript
# @author Joseph Perry <*****@*****.**>
import os
import subprocess
import shlex
import sys
import MySQLdb
sys.path.append("/usr/lib/archivematica/archivematicaCommon")
import databaseInterface, elasticSearchFunctions
from executeOrRunSubProcess import executeOrRun
from fileOperations import renameAsSudo
import elasticSearchFunctions

if __name__ == '__main__':
    AIPUUID = sys.argv[1]
    print 'Removing indexed files for AIP ' + AIPUUID + '...'
    elasticSearchFunctions.connect_and_delete_aip_files(AIPUUID)
    print 'Done.'
Exemple #6
0
def list_display(request):
    current_page_number = int(request.GET.get('page', 1))
    logger.debug('Current page: %s', current_page_number)

    # get count of AIP files
    aip_indexed_file_count = aip_file_count()

    # get AIPs
    order_by = request.GET.get('order_by', 'name_unanalyzed')
    sort_by  = request.GET.get('sort_by', 'up')

    if sort_by == 'down':
        sort_direction = 'desc'
    else:
        sort_direction = 'asc'

    sort_specification = order_by + ':' + sort_direction
    sort_params = 'order_by=' + order_by + '&sort_by=' + sort_by

    conn = elasticSearchFunctions.connect_and_create_index('aips')

    # get list of UUIDs of AIPs that are deleted or pending deletion
    aips_deleted_or_pending_deletion = []
    should_haves = [
        {'match': {'status': 'DEL_REQ'}},
        {'match': {'status': 'DELETED'}},
    ]
    query = {
        "query": {
            "bool": {
                "should": should_haves
            }
        }
    }
    deleted_aip_results = conn.search(
        body=query,
        index='aips',
        doc_type='aip',
        fields='uuid,status'
    )
    for deleted_aip in deleted_aip_results['hits']['hits']:
        aips_deleted_or_pending_deletion.append(deleted_aip['fields']['uuid'][0])

    # Fetch results and paginate
    def es_pager(page, page_size):
        """
        Fetch one page of normalized entries from Elasticsearch.

        :param page: 1-indexed page to fetch
        :param page_size: Number of entries on a page
        :return: List of dicts for each entry, where keys and values have been cleaned up
        """
        start = (page - 1) * page_size
        results = conn.search(
            index='aips',
            doc_type='aip',
            body=elasticSearchFunctions.MATCH_ALL_QUERY,
            fields='origin,uuid,filePath,created,name,size',
            sort=sort_specification,
            size=page_size,
            from_=start,
        )
        # normalize results - each of the fields contains a single value,
        # but is returned from the ES API as a single-length array
        # e.g. {"fields": {"uuid": ["abcd"], "name": ["aip"] ...}}
        return [elasticSearchFunctions.normalize_results_dict(d) for d in results['hits']['hits']]

    items_per_page = 10
    count = conn.count(index='aips', doc_type='aip', body=elasticSearchFunctions.MATCH_ALL_QUERY)['count']
    results = LazyPagedSequence(es_pager, page_size=items_per_page, length=count)

    # Paginate
    page = helpers.pager(
        results,
        items_per_page,
        current_page_number
    )

    # process deletion, etc., and format results
    aips = []
    for aip in page.object_list:
        # If an AIP was deleted or is pending deletion, react if status changed
        if aip['uuid'] in aips_deleted_or_pending_deletion:
            # check with storage server to see current status
            api_results = storage_service.get_file_info(uuid=aip['uuid'])
            try:
                aip_status = api_results[0]['status']
            except IndexError:
                # Storage service does not know about this AIP
                # TODO what should happen here?
                logger.info("AIP not found in storage service: {}".format(aip))
                continue

            # delete AIP metadata in ElasticSearch if AIP has been deleted from the
            # storage server
            # TODO: handle this asynchronously
            if aip_status == 'DELETED':
                elasticSearchFunctions.delete_aip(aip['uuid'])
                elasticSearchFunctions.connect_and_delete_aip_files(aip['uuid'])
            elif aip_status != 'DEL_REQ':
                # update the status in ElasticSearch for this AIP
                elasticSearchFunctions.connect_and_mark_stored(aip['uuid'])
        else:
            aip_status = 'UPLOADED'

        # Tweak AIP presentation and add to display array
        if aip_status != 'DELETED':
            aip['status'] = AIP_STATUS_DESCRIPTIONS[aip_status]

            try:
                size = '{0:.2f} MB'.format(float(aip['size']))
            except (TypeError, ValueError):
                size = 'Removed'

            aip['size'] = size

            aip['href'] = aip['filePath'].replace(AIPSTOREPATH + '/', "AIPsStore/")
            aip['date'] = aip['created']

            aips.append(aip)

    total_size = total_size_of_aips(conn)

    return render(request, 'archival_storage/archival_storage.html',
        {
            'total_size': total_size,
            'aip_indexed_file_count': aip_indexed_file_count,
            'aips': aips,
            'page': page,
            'search_params': sort_params,
        }
    )