def dump_oai_registry(self, dumpRecords=True, dumpSets=True, dumpMetadataFormats=True):
     self.assertEquals(len(OaiRegistry.objects()), 0)
     self.restoreDump(join(DUMP_OAI_PMH_TEST_PATH, 'oai_registry.bson'), 'oai_registry')
     self.assertTrue(len(OaiRegistry.objects()) > 0)
     self.dump_oai_identify()
     if dumpMetadataFormats:
         self.dump_oai_metadata_format()
     if dumpSets:
         self.dump_oai_set()
     if dumpRecords:
         self.dump_oai_record()
Beispiel #2
0
 def dump_oai_registry(self,
                       dumpRecords=True,
                       dumpSets=True,
                       dumpMetadataFormats=True):
     self.assertEquals(len(OaiRegistry.objects()), 0)
     self.restoreDump(join(DUMP_OAI_PMH_TEST_PATH, 'oai_registry.bson'),
                      'oai_registry')
     self.assertTrue(len(OaiRegistry.objects()) > 0)
     self.dump_oai_identify()
     if dumpMetadataFormats:
         self.dump_oai_metadata_format()
     if dumpSets:
         self.dump_oai_set()
     if dumpRecords:
         self.dump_oai_record()
Beispiel #3
0
def get_metadata_formats_detail(request):
    template = loader.get_template('oai_pmh/explore/explore_metadata_formats_detail.html')
    try:
        #Get metadata formats
        infos = json.loads(request.GET['metadataFormats'])
        metadataFormats = infos['oai-pmh']
        if 'local' in infos:
            localTemplate = infos['local']
        else:
            localTemplate = None
    except:
        metadataFormats = []
        localTemplate = None

    list_metadata_formats = OaiMetadataFormat.objects(pk__in=metadataFormats).all()
    list_metadata_formats_info = []
    for metadataFormat in list_metadata_formats:
        item = {
            'registry' : OaiRegistry.objects(isDeactivated=False).only('name').get(pk=metadataFormat.registry).name,
            'metadataPrefix' : metadataFormat.metadataPrefix,
            'schema' : metadataFormat.schema,
        }
        list_metadata_formats_info.append(item)

    context = RequestContext(request, {
        'list_metadata_formats_info': list_metadata_formats_info,
        'local' : localTemplate
    })

    return HttpResponse(json.dumps(template.render(context)), content_type='application/javascript')
Beispiel #4
0
def _get_metadata_formats_id(schemas, user_schemas, registries):
    # We get all template versions for the given schemas
    # First, we take care of user defined schema
    templates_id_user = Template.objects(title__in=user_schemas).distinct(
        field="id")
    templates_id_user = [str(x) for x in templates_id_user]
    # Take care of the rest, with versions
    templates_versions = Template.objects(title__in=schemas).distinct(
        field="templateVersion")
    # We get all templates ID, for all versions
    all_templates_id_common = TemplateVersion.objects(pk__in=templates_versions, isDeleted=False)\
        .distinct(field="versions")
    # We remove the removed version
    all_templates_id_common_removed = TemplateVersion.objects(pk__in=templates_versions, isDeleted=False)\
        .distinct( field="deletedVersions")
    templates_id_common = list(
        set(all_templates_id_common) - set(all_templates_id_common_removed))
    templates_id = templates_id_user + templates_id_common
    if len(registries) == 0:
        # We retrieve deactivated registries so as not to get their metadata formats
        deactivatedRegistries = [
            str(x.id)
            for x in OaiRegistry.objects(isDeactivated=True).order_by('id')
        ]
        metadataFormatsID = OaiMetadataFormat.objects(
            template__in=templates_id,
            registry__not__in=deactivatedRegistries).distinct(field="id")
    else:
        # We retrieve registries from the refinement
        metadataFormatsID = OaiMetadataFormat.objects(
            template__in=templates_id,
            registry__in=registries).distinct(field="id")

    return metadataFormatsID
def get_metadata_formats_detail(request):
    template = loader.get_template('oai_pmh/explore/explore_metadata_formats_detail.html')
    try:
        #Get metadata formats
        infos = json.loads(request.GET['metadataFormats'])
        metadataFormats = infos['oai-pmh']
        if 'local' in infos:
            localTemplate = infos['local']
        else:
            localTemplate = None
    except:
        metadataFormats = []
        localTemplate = None

    list_metadata_formats = OaiMetadataFormat.objects(pk__in=metadataFormats).all()
    list_metadata_formats_info = []
    for metadataFormat in list_metadata_formats:
        item = {
            'registry' : OaiRegistry.objects(isDeactivated=False).only('name').get(pk=metadataFormat.registry).name,
            'metadataPrefix' : metadataFormat.metadataPrefix,
            'schema' : metadataFormat.schema,
        }
        list_metadata_formats_info.append(item)

    context = RequestContext(request, {
        'list_metadata_formats_info': list_metadata_formats_info,
        'local' : localTemplate
    })

    return HttpResponse(json.dumps(template.render(context)), content_type='application/javascript')
def get_metadata_formats_detail(request):
    template = loader.get_template("oai_pmh/explore/explore_metadata_formats_detail.html")
    try:
        # Get metadata formats
        infos = json.loads(request.GET["metadataFormats"])
        metadataFormats = infos["oai-pmh"]
        if "local" in infos:
            localTemplate = infos["local"]
        else:
            localTemplate = None
    except:
        metadataFormats = []
        localTemplate = None

    list_metadata_formats = OaiMetadataFormat.objects(pk__in=metadataFormats).all()
    list_metadata_formats_info = []
    for metadataFormat in list_metadata_formats:
        item = {
            "registry": OaiRegistry.objects(isDeactivated=False).only("name").get(pk=metadataFormat.registry).name,
            "metadataPrefix": metadataFormat.metadataPrefix,
            "schema": metadataFormat.schema,
        }
        list_metadata_formats_info.append(item)

    context = RequestContext(
        request, {"list_metadata_formats_info": list_metadata_formats_info, "local": localTemplate}
    )

    return HttpResponse(json.dumps(template.render(context)), content_type="application/javascript")
 def test_oai_pmh_admin_with_data(self):
     self.dump_oai_registry()
     url = '/oai_pmh/admin/oai-pmh'
     r = self.doRequestGetAdminClientLogged(url=url)
     self.isStatusOK(r.status_code)
     self.assertIsNotNone(r.content)
     self.assertIsNotNone(r.context[0].dicts[1].get('registry_form'))
     self.assertEquals(len(r.context[0].dicts[1].get('registries')), len(OaiRegistry.objects()))
 def __init__ (self):
     super(RequestForm, self).__init__()
     self.dataproviders = []
     self.dataproviders.append(('0', 'Pick one'))
     self.fields['metadataprefix'].choices = self.dataproviders
     self.fields['set'].choices = self.dataproviders
     for o in OaiRegistry.objects(isDeactivated=False).all():
         self.dataproviders.append((str(o.id)+'|'+o.url, str(o.name)))
     self.fields['dataProvider'].choices = self.dataproviders
Beispiel #9
0
 def test_oai_pmh_admin_with_data(self):
     self.dump_oai_registry()
     url = '/oai_pmh/admin/oai-pmh'
     r = self.doRequestGetAdminClientLogged(url=url)
     self.isStatusOK(r.status_code)
     self.assertIsNotNone(r.content)
     self.assertIsNotNone(r.context[0].dicts[1].get('registry_form'))
     self.assertEquals(len(r.context[0].dicts[1].get('registries')),
                       len(OaiRegistry.objects()))
Beispiel #10
0
 def __init__(self):
     super(RequestForm, self).__init__()
     self.dataproviders = []
     self.dataproviders.append(('0', 'Pick one'))
     self.fields['metadataprefix'].choices = self.dataproviders
     self.fields['set'].choices = self.dataproviders
     for o in OaiRegistry.objects(isDeactivated=False).all():
         self.dataproviders.append((str(o.id) + '|' + o.url, str(o.name)))
     self.fields['dataProvider'].choices = self.dataproviders
Beispiel #11
0
def init_harvest():
    #Kill all tasks
    purge_all_tasks()
    #Init all registry isQueued to False in case of a server reboot after an issue
    registries = OaiRegistry.objects(isDeactivated=False).all()
    for registry in registries:
        registry.isQueued = False
        registry.save()

    #Check every X seconds if a registry need to be harvested
    watch_harvest_task.apply_async()
    def __init__(self, userId=""):
        self.SCHEMAS_OPTIONS = []
        self.REGISTRIES_OPTIONS = []

        #We retrieve all registries (data providers)
        registries = OaiRegistry.objects(isDeactivated=False).order_by('name')

        for registry in registries:
            #We add them
            self.REGISTRIES_OPTIONS.append((registry.id, registry.name))
        super(KeywordForm, self).__init__()
        self.fields['my_registries'].choices = []
        self.fields['my_registries'].choices = self.REGISTRIES_OPTIONS
        self.my_registries_nb = len(self.REGISTRIES_OPTIONS)
Beispiel #13
0
def watch_harvest_task():
    registries = OaiRegistry.objects(isDeactivated=False).all()
    message = "No new registries need to be updated and harvested"
    #We launch the backround task for each registry
    for registry in registries:
        #If we need to harvest and a task doesn't already exist for this registry
        if registry.harvest and not registry.isQueued:
            message = message + "Registry {!s} need to be updated and harvested.".format(registry.name.encode("utf-8"))
            task = harvest_task.apply_async((str(registry.id),))
            registry.isQueued = True
            registry.save()

    #Periodic call every X seconds
    watch_harvest_task.apply_async(countdown=10)
    return message
    def __init__(self, listRegistriesId=[]):
        self.SCHEMAS_OPTIONS = []
        #Retrieve registries name
        registriesName = {}
        for registryId in listRegistriesId:
            obj = OaiRegistry.objects(pk=registryId).get()
            registriesName[str(registryId)] = obj.name

        #We retrieve all common schemas
        schemas = OaiMetadataFormat.objects(registry__in=listRegistriesId).order_by('metadataPrefix')
        groups = []

        for k, g in groupby(schemas, lambda x: x.hash):
            groups.append(list(g))      # Store group iterator as a list

        #For each group
        for group in groups:
            #Get metadata prefix
            name = group[0].metadataPrefix
            #Get template name
            template = group[0].template
            listValues = []
            for elt in group:
                listValues.append((str(elt.id)))
            #Provide information about the number of registries using this MF
            if len(listValues) == 1: name = format_html(name + "<br> (in 1 Registry)")
            else: name = format_html(name + "<br> (in %s Registries)" % len(listValues))
            #If it's linked to a template
            if template != None:
                name += format_html(" <text class='local'> + Local </text>")
                template = Template.objects.only('id', 'title').get(pk=template.id)
                t = json.dumps({'oai-pmh': listValues, 'local': template.title})
            else:
                t = json.dumps({'oai-pmh': listValues})

            self.SCHEMAS_OPTIONS.append((( t , name)))

        super(MetadataFormatsForm, self).__init__()
        self.fields['my_schemas'].choices = []
        self.fields['my_schemas'].choices = self.SCHEMAS_OPTIONS

        self.my_schemas_nb = len(self.SCHEMAS_OPTIONS)
def get_results_by_instance_keyword(request):
    print 'BEGIN def getResultsKeyword(request)'
    resultsByKeyword = []
    results = []
    resultString = ""

    #Instance
    json_instances = []
    if 'HTTPS' in request.META['SERVER_PROTOCOL']:
        protocol = "https"
    else:
        protocol = "http"
    instance = Instance(name="Local", protocol=protocol, address=request.META['REMOTE_ADDR'], port=request.META['SERVER_PORT'], access_token="token", refresh_token="token")
    json_instances.append(instance.to_json())
    request.session['instancesExplore'] = json_instances
    sessionName = "resultsExploreOaiPMh" + instance['name']


    try:
        keyword = request.GET['keyword']
        schemas = request.GET.getlist('schemas[]')
        userSchemas = request.GET.getlist('userSchemas[]')
        refinements = refinements_to_mongo(request.GET.getlist('refinements[]'))
        if 'onlySuggestions' in request.GET:
            onlySuggestions = json.loads(request.GET['onlySuggestions'])
        else:
            onlySuggestions = False
    except:
        keyword = ''
        schemas = []
        userSchemas = []
        refinements = {}
        onlySuggestions = True
    #We get all template versions for the given schemas
    #First, we take care of user defined schema
    templatesIDUser = Template.objects(title__in=userSchemas).distinct(field="id")
    templatesIDUser = [str(x) for x in templatesIDUser]

    #Take care of the rest, with versions
    templatesVersions = Template.objects(title__in=schemas).distinct(field="templateVersion")

    #We get all templates ID, for all versions
    allTemplatesIDCommon = TemplateVersion.objects(pk__in=templatesVersions, isDeleted=False).distinct(field="versions")
    #We remove the removed version
    allTemplatesIDCommonRemoved = TemplateVersion.objects(pk__in=templatesVersions, isDeleted=False).distinct(field="deletedVersions")
    templatesIDCommon = list(set(allTemplatesIDCommon) - set(allTemplatesIDCommonRemoved))

    templatesID = templatesIDUser + templatesIDCommon
    #We retrieve deactivated registries so as not to get their metadata formats
    deactivatedRegistries = [str(x.id) for x in OaiRegistry.objects(isDeactivated=True).order_by('id')]
    metadataFormatsID = OaiMetadataFormat.objects(template__in=templatesID, registry__not__in=deactivatedRegistries).distinct(field="id")


    instanceResults = OaiRecord.executeFullTextQuery(keyword, metadataFormatsID, refinements)
    if len(instanceResults) > 0:
        if not onlySuggestions:
            xsltPath = os.path.join(settings.SITE_ROOT, 'static/resources/xsl/xml2html.xsl')
            xslt = etree.parse(xsltPath)
            transform = etree.XSLT(xslt)
            template = loader.get_template('oai_pmh/explore/explore_result_keyword.html')

        #Retrieve schema and registries. Avoid to retrieve the information for each result
        registriesName = {}
        objMetadataFormats = {}
        listRegistriesID = set([x['registry'] for x in instanceResults])
        for registryId in listRegistriesID:
            obj = OaiRegistry.objects(pk=registryId).get()
            registriesName[str(registryId)] = obj.name
        listSchemaId = set([x['metadataformat'] for x in instanceResults])
        for schemaId in listSchemaId:
            obj = OaiMetadataFormat.objects(pk=schemaId).get()
            objMetadataFormats[str(schemaId)] = obj

        listItems = []
        xmltodictunparse = xmltodict.unparse
        appendResult = results.append
        toXML = etree.XML
        parse = etree.parse
        XSLT = etree.XSLT
        if not onlySuggestions:
            for instanceResult in instanceResults:
                custom_xslt = False
                appendResult({'title':instanceResult['identifier'], 'content':xmltodictunparse(instanceResult['metadata']),'id':str(instanceResult['_id'])})
                dom = toXML(str(xmltodictunparse(instanceResult['metadata']).encode('utf-8')))
                #Check if a custom list result XSLT has to be used
                try:
                    metadataFormat = objMetadataFormats[str(instanceResult['metadataformat'])]
                    if metadataFormat.template.ResultXsltList:
                        listXslt = parse(BytesIO(metadataFormat.template.ResultXsltList.content.encode('utf-8')))
                        listTransform = XSLT(listXslt)
                        newdom = listTransform(dom)
                        custom_xslt = True
                    else:
                        newdom = transform(dom)
                except Exception, e:
                    #We use the default one
                    newdom = transform(dom)
                    custom_xslt = False

                context = RequestContext(request, {'id':str(instanceResult['_id']),
                                   'xml': str(newdom),
                                   'title': instanceResult['identifier'],
                                   'custom_xslt': custom_xslt,
                                   'schema_name': metadataFormat.metadataPrefix,
                                   'registry_name': registriesName[instanceResult['registry']],
                                   'oai_pmh': True})


                resultString+= template.render(context)

        else:
            for instanceResult in instanceResults[:20]:
                wordList = re.sub("[^\w]", " ",  keyword).split()
                wordList = [x + "|" + x +"\w+" for x in wordList]
                wordList = '|'.join(wordList)
                listWholeKeywords = re.findall("\\b("+ wordList +")\\b", xmltodict.unparse(instanceResult['metadata']).encode('utf-8'), flags=re.IGNORECASE)
                labels = list(set(listWholeKeywords))

                for label in labels:
                    label = label.lower()
                    result_json = {}
                    result_json['label'] = label
                    result_json['value'] = label
                    if not result_json in resultsByKeyword:
                        resultsByKeyword.append(result_json)
def get_results_by_instance_keyword(request):
    print 'BEGIN def getResultsKeyword(request)'
    resultsByKeyword = []
    results = []
    resultString = ""

    #Instance
    json_instances = []
    if 'HTTPS' in request.META['SERVER_PROTOCOL']:
        protocol = "https"
    else:
        protocol = "http"
    instance = Instance(name="Local",
                        protocol=protocol,
                        address=request.META['REMOTE_ADDR'],
                        port=request.META['SERVER_PORT'],
                        access_token="token",
                        refresh_token="token")
    json_instances.append(instance.to_json())
    request.session['instancesExplore'] = json_instances
    sessionName = "resultsExploreOaiPMh" + instance['name']

    try:
        keyword = request.GET['keyword']
        schemas = request.GET.getlist('schemas[]')
        userSchemas = request.GET.getlist('userSchemas[]')
        refinements = refinements_to_mongo(
            request.GET.getlist('refinements[]'))
        if 'onlySuggestions' in request.GET:
            onlySuggestions = json.loads(request.GET['onlySuggestions'])
        else:
            onlySuggestions = False
    except:
        keyword = ''
        schemas = []
        userSchemas = []
        refinements = {}
        onlySuggestions = True
    #We get all template versions for the given schemas
    #First, we take care of user defined schema
    templatesIDUser = Template.objects(title__in=userSchemas).distinct(
        field="id")
    templatesIDUser = [str(x) for x in templatesIDUser]

    #Take care of the rest, with versions
    templatesVersions = Template.objects(title__in=schemas).distinct(
        field="templateVersion")

    #We get all templates ID, for all versions
    allTemplatesIDCommon = TemplateVersion.objects(
        pk__in=templatesVersions, isDeleted=False).distinct(field="versions")
    #We remove the removed version
    allTemplatesIDCommonRemoved = TemplateVersion.objects(
        pk__in=templatesVersions,
        isDeleted=False).distinct(field="deletedVersions")
    templatesIDCommon = list(
        set(allTemplatesIDCommon) - set(allTemplatesIDCommonRemoved))

    templatesID = templatesIDUser + templatesIDCommon
    #We retrieve deactivated registries so as not to get their metadata formats
    deactivatedRegistries = [
        str(x.id)
        for x in OaiRegistry.objects(isDeactivated=True).order_by('id')
    ]
    metadataFormatsID = OaiMetadataFormat.objects(
        template__in=templatesID,
        registry__not__in=deactivatedRegistries).distinct(field="id")

    instanceResults = OaiRecord.executeFullTextQuery(keyword,
                                                     metadataFormatsID,
                                                     refinements)
    if len(instanceResults) > 0:
        if not onlySuggestions:
            xsltPath = os.path.join(settings.SITE_ROOT,
                                    'static/resources/xsl/xml2html.xsl')
            xslt = etree.parse(xsltPath)
            transform = etree.XSLT(xslt)
            template = loader.get_template(
                'oai_pmh/explore/explore_result_keyword.html')

        #Retrieve schema and registries. Avoid to retrieve the information for each result
        registriesName = {}
        objMetadataFormats = {}
        listRegistriesID = set([x['registry'] for x in instanceResults])
        for registryId in listRegistriesID:
            obj = OaiRegistry.objects(pk=registryId).get()
            registriesName[str(registryId)] = obj.name
        listSchemaId = set([x['metadataformat'] for x in instanceResults])
        for schemaId in listSchemaId:
            obj = OaiMetadataFormat.objects(pk=schemaId).get()
            objMetadataFormats[str(schemaId)] = obj

        listItems = []
        xmltodictunparse = xmltodict.unparse
        appendResult = results.append
        toXML = etree.XML
        parse = etree.parse
        XSLT = etree.XSLT
        if not onlySuggestions:
            for instanceResult in instanceResults:
                custom_xslt = False
                appendResult({
                    'title':
                    instanceResult['identifier'],
                    'content':
                    xmltodictunparse(instanceResult['metadata']),
                    'id':
                    str(instanceResult['_id'])
                })
                dom = toXML(
                    str(
                        xmltodictunparse(
                            instanceResult['metadata']).encode('utf-8')))
                #Check if a custom list result XSLT has to be used
                try:
                    metadataFormat = objMetadataFormats[str(
                        instanceResult['metadataformat'])]
                    if metadataFormat.template.ResultXsltList:
                        listXslt = parse(
                            BytesIO(
                                metadataFormat.template.ResultXsltList.content.
                                encode('utf-8')))
                        listTransform = XSLT(listXslt)
                        newdom = listTransform(dom)
                        custom_xslt = True
                    else:
                        newdom = transform(dom)
                except Exception, e:
                    #We use the default one
                    newdom = transform(dom)
                    custom_xslt = False

                context = RequestContext(
                    request, {
                        'id': str(instanceResult['_id']),
                        'xml': str(newdom),
                        'title': instanceResult['identifier'],
                        'custom_xslt': custom_xslt,
                        'schema_name': metadataFormat.metadataPrefix,
                        'registry_name':
                        registriesName[instanceResult['registry']],
                        'oai_pmh': True
                    })

                resultString += template.render(context)

        else:
            for instanceResult in instanceResults[:20]:
                wordList = re.sub("[^\w]", " ", keyword).split()
                wordList = [x + "|" + x + "\w+" for x in wordList]
                wordList = '|'.join(wordList)
                listWholeKeywords = re.findall(
                    "\\b(" + wordList + ")\\b",
                    xmltodict.unparse(
                        instanceResult['metadata']).encode('utf-8'),
                    flags=re.IGNORECASE)
                labels = list(set(listWholeKeywords))

                for label in labels:
                    label = label.lower()
                    result_json = {}
                    result_json['label'] = label
                    result_json['value'] = label
                    if not result_json in resultsByKeyword:
                        resultsByKeyword.append(result_json)
Beispiel #17
0
def get_results_by_instance_keyword(request):
    print 'BEGIN def getResultsKeyword(request)'
    resultsByKeyword = []
    results = []
    resultString = ""

    #Instance
    json_instances = []
    if 'HTTPS' in request.META['SERVER_PROTOCOL']:
        protocol = "https"
    else:
        protocol = "http"
    instance = Instance(name="Local",
                        protocol=protocol,
                        address=request.META['REMOTE_ADDR'],
                        port=request.META['SERVER_PORT'],
                        access_token="token",
                        refresh_token="token")
    json_instances.append(instance.to_json())
    request.session['instancesExplore'] = json_instances
    sessionName = "resultsExploreOaiPMh" + instance['name']

    keyword = request.POST.get('keyword', '')
    schemas = request.POST.getlist('schemas[]', [])
    user_schemas = request.POST.getlist('userSchemas[]', [])
    refinements = refinements_to_mongo(
        json.loads(request.POST.get('refinements', '{}')))
    registries = request.POST.getlist('registries[]', [])
    if 'onlySuggestions' in request.POST:
        onlySuggestions = json.loads(request.POST['onlySuggestions'])
    else:
        onlySuggestions = False

    metadata_format_ids = _get_metadata_formats_id(schemas=schemas,
                                                   user_schemas=user_schemas,
                                                   registries=registries)
    instanceResults = OaiRecord.executeFullTextQuery(keyword,
                                                     metadata_format_ids,
                                                     refinements)
    if len(instanceResults) > 0:
        if not onlySuggestions:
            xsltPath = os.path.join(settings.SITE_ROOT,
                                    'static/resources/xsl/xml2html.xsl')
            xslt = etree.parse(xsltPath)
            transform = etree.XSLT(xslt)
            template = loader.get_template(
                'oai_pmh/explore/explore_result_keyword.html')

        #Retrieve schema and registries. Avoid to retrieve the information for each result
        registriesName = {}
        objMetadataFormats = {}
        listRegistriesID = set([x['registry'] for x in instanceResults])
        registriesURL = {}
        for registryId in listRegistriesID:
            obj = OaiRegistry.objects(pk=registryId).get()
            registriesName[str(registryId)] = obj.name
            registriesURL[str(registryId)] = obj.url
        listSchemaId = set([x['metadataformat'] for x in instanceResults])
        for schemaId in listSchemaId:
            obj = OaiMetadataFormat.objects(pk=schemaId).get()
            objMetadataFormats[str(schemaId)] = obj

        listItems = []
        xmltodictunparse = XMLdata.unparse
        appendResult = results.append
        toXML = etree.XML
        parse = etree.parse
        XSLT = etree.XSLT
        if not onlySuggestions:
            for instanceResult in instanceResults:
                custom_xslt = False
                appendResult({
                    'title':
                    instanceResult['identifier'],
                    'content':
                    xmltodictunparse(instanceResult['metadata']),
                    'id':
                    str(instanceResult['_id'])
                })
                dom = toXML(
                    str(
                        xmltodictunparse(
                            instanceResult['metadata']).encode('utf-8')))
                #Check if a custom list result XSLT has to be used
                try:
                    metadataFormat = objMetadataFormats[str(
                        instanceResult['metadataformat'])]
                    if metadataFormat.template.ResultXsltList:
                        listXslt = parse(
                            BytesIO(
                                metadataFormat.template.ResultXsltList.content.
                                encode('utf-8')))
                        listTransform = XSLT(listXslt)
                        newdom = listTransform(dom)
                        custom_xslt = True
                    else:
                        newdom = transform(dom)
                except Exception, e:
                    #We use the default one
                    newdom = transform(dom)
                    custom_xslt = False

                registry_name = registriesName[instanceResult['registry']]
                if len(registry_name) > 30:
                    registry_name = "{0}...".format(registry_name[:30])

                url = urlparse(registriesURL[instanceResult['registry']])
                context = RequestContext(
                    request, {
                        'id': str(instanceResult['_id']),
                        'xml': str(newdom),
                        'title': instanceResult['identifier'],
                        'custom_xslt': custom_xslt,
                        'template_name': metadataFormat.template.title,
                        'registry_name': registry_name,
                        'registry_url': "{0}://{1}".format(
                            url.scheme, url.netloc),
                        'oai_pmh': True
                    })

                resultString += template.render(context)

        else:
            for instanceResult in instanceResults[:20]:
                wordList = re.sub("[^\w]", " ", keyword).split()
                wordList = [x + "|" + x + "\w+" for x in wordList]
                wordList = '|'.join(wordList)
                listWholeKeywords = re.findall(
                    "\\b(" + wordList + ")\\b",
                    XMLdata.unparse(
                        instanceResult['metadata']).encode('utf-8'),
                    flags=re.IGNORECASE)
                labels = list(set(listWholeKeywords))

                for label in labels:
                    label = label.lower()
                    result_json = {}
                    result_json['label'] = label
                    result_json['value'] = label
                    if not result_json in resultsByKeyword:
                        resultsByKeyword.append(result_json)
Beispiel #18
0
def get_results_by_instance_keyword(request):
    print 'BEGIN def getResultsKeyword(request)'
    resultsByKeyword = []
    results = []
    resultString = ""

    #Instance
    json_instances = []
    if 'HTTPS' in request.META['SERVER_PROTOCOL']:
        protocol = "https"
    else:
        protocol = "http"
    instance = Instance(name="Local",
                        protocol=protocol,
                        address=request.META['REMOTE_ADDR'],
                        port=request.META['SERVER_PORT'],
                        access_token="token",
                        refresh_token="token")
    json_instances.append(instance.to_json())
    request.session['instancesExplore'] = json_instances
    sessionName = "resultsExploreOaiPMh" + instance['name']

    try:
        keyword = request.GET['keyword']
        schemas = request.GET.getlist('schemas[]')
        mergedSchemas = []
        for schema in schemas:
            t = json.loads(schema)
            mergedSchemas += t['oai-pmh']
        if 'onlySuggestions' in request.GET:
            onlySuggestions = json.loads(request.GET['onlySuggestions'])
        else:
            onlySuggestions = False
    except:
        keyword = ''
        schemas = []
        onlySuggestions = True
        mergedSchemas = []

    instanceResults = OaiRecord.executeFullTextQuery(keyword, mergedSchemas)
    if len(instanceResults) > 0:
        if not onlySuggestions:
            xsltPath = os.path.join(settings.SITE_ROOT,
                                    'static/resources/xsl/xml2html.xsl')
            xslt = etree.parse(xsltPath)
            transform = etree.XSLT(xslt)

        #Retrieve schema and registries. Avoid to retrieve the information for each result
        registriesName = {}
        schemasName = {}
        listRegistriesID = set([x['registry'] for x in instanceResults])
        for registryId in listRegistriesID:
            obj = OaiRegistry.objects(pk=registryId).get()
            registriesName[str(registryId)] = obj.name
        listSchemaId = set([x['metadataformat'] for x in instanceResults])
        for schemaId in listSchemaId:
            obj = OaiMetadataFormat.objects(pk=schemaId).get()
            schemasName[str(schemaId)] = obj

        listItems = []
        xmltodictunparse = unparse
        appendResult = results.append
        toXML = etree.XML
        parse = etree.parse
        XSLT = etree.XSLT
        if not onlySuggestions:
            for instanceResult in instanceResults:
                custom_xslt = False
                appendResult({
                    'title':
                    instanceResult['identifier'],
                    'content':
                    xmltodictunparse(instanceResult['metadata']),
                    'id':
                    str(instanceResult['_id'])
                })
                dom = toXML(
                    str(
                        xmltodictunparse(
                            instanceResult['metadata']).encode('utf-8')))
                #Check if a custom list result XSLT has to be used
                try:
                    schema = schemasName[str(instanceResult['metadataformat'])]
                    if schema.ResultXsltList:
                        listXslt = parse(
                            BytesIO(
                                schema.ResultXsltList.content.encode('utf-8')))
                        listTransform = XSLT(listXslt)
                        newdom = listTransform(dom)
                        custom_xslt = True
                    else:
                        newdom = transform(dom)
                except Exception, e:
                    #We use the default one
                    newdom = transform(dom)
                    custom_xslt = False

                item = {
                    'id': str(instanceResult['_id']),
                    'xml': str(newdom),
                    'title': instanceResult['identifier'],
                    'custom_xslt': custom_xslt,
                    'schema_name': schema.metadataPrefix,
                    'registry_name': registriesName[instanceResult['registry']]
                }

                listItems.append(item)
                context = RequestContext(request, {'list_results': listItems})

        else:
            for instanceResult in instanceResults[:20]:
                wordList = re.sub("[^\w]", " ", keyword).split()
                wordList = [x + "|" + x + "\w+" for x in wordList]
                wordList = '|'.join(wordList)
                listWholeKeywords = re.findall(
                    "\\b(" + wordList + ")\\b",
                    unparse(instanceResult['metadata']).encode('utf-8'),
                    flags=re.IGNORECASE)
                labels = list(set(listWholeKeywords))

                for label in labels:
                    label = label.lower()
                    result_json = {}
                    result_json['label'] = label
                    result_json['value'] = label
                    if not result_json in resultsByKeyword:
                        resultsByKeyword.append(result_json)

        if not onlySuggestions:
            template = loader.get_template(
                'oai_pmh/explore/explore_result_keyword.html')
            resultString += template.render(context)