def get_results_by_instance_keyword(request):
    print 'BEGIN def getResultsKeyword(request)'
    resultsByKeyword = []
    results = []
    resultString = ""

    #Instance
    json_instances = []
    if 'HTTPS' in request.META['SERVER_PROTOCOL']:
        protocol = "https"
    else:
        protocol = "http"
    instance = Instance(name="Local", protocol=protocol, address=request.META['REMOTE_ADDR'], port=request.META['SERVER_PORT'], access_token="token", refresh_token="token")
    json_instances.append(instance.to_json())
    request.session['instancesExplore'] = json_instances
    sessionName = "resultsExploreOaiPMh" + instance['name']


    try:
        keyword = request.GET['keyword']
        schemas = request.GET.getlist('schemas[]')
        userSchemas = request.GET.getlist('userSchemas[]')
        refinements = refinements_to_mongo(request.GET.getlist('refinements[]'))
        if 'onlySuggestions' in request.GET:
            onlySuggestions = json.loads(request.GET['onlySuggestions'])
        else:
            onlySuggestions = False
    except:
        keyword = ''
        schemas = []
        userSchemas = []
        refinements = {}
        onlySuggestions = True
    #We get all template versions for the given schemas
    #First, we take care of user defined schema
    templatesIDUser = Template.objects(title__in=userSchemas).distinct(field="id")
    templatesIDUser = [str(x) for x in templatesIDUser]

    #Take care of the rest, with versions
    templatesVersions = Template.objects(title__in=schemas).distinct(field="templateVersion")

    #We get all templates ID, for all versions
    allTemplatesIDCommon = TemplateVersion.objects(pk__in=templatesVersions, isDeleted=False).distinct(field="versions")
    #We remove the removed version
    allTemplatesIDCommonRemoved = TemplateVersion.objects(pk__in=templatesVersions, isDeleted=False).distinct(field="deletedVersions")
    templatesIDCommon = list(set(allTemplatesIDCommon) - set(allTemplatesIDCommonRemoved))

    templatesID = templatesIDUser + templatesIDCommon
    #We retrieve deactivated registries so as not to get their metadata formats
    deactivatedRegistries = [str(x.id) for x in OaiRegistry.objects(isDeactivated=True).order_by('id')]
    metadataFormatsID = OaiMetadataFormat.objects(template__in=templatesID, registry__not__in=deactivatedRegistries).distinct(field="id")


    instanceResults = OaiRecord.executeFullTextQuery(keyword, metadataFormatsID, refinements)
    if len(instanceResults) > 0:
        if not onlySuggestions:
            xsltPath = os.path.join(settings.SITE_ROOT, 'static/resources/xsl/xml2html.xsl')
            xslt = etree.parse(xsltPath)
            transform = etree.XSLT(xslt)
            template = loader.get_template('oai_pmh/explore/explore_result_keyword.html')

        #Retrieve schema and registries. Avoid to retrieve the information for each result
        registriesName = {}
        objMetadataFormats = {}
        listRegistriesID = set([x['registry'] for x in instanceResults])
        for registryId in listRegistriesID:
            obj = OaiRegistry.objects(pk=registryId).get()
            registriesName[str(registryId)] = obj.name
        listSchemaId = set([x['metadataformat'] for x in instanceResults])
        for schemaId in listSchemaId:
            obj = OaiMetadataFormat.objects(pk=schemaId).get()
            objMetadataFormats[str(schemaId)] = obj

        listItems = []
        xmltodictunparse = xmltodict.unparse
        appendResult = results.append
        toXML = etree.XML
        parse = etree.parse
        XSLT = etree.XSLT
        if not onlySuggestions:
            for instanceResult in instanceResults:
                custom_xslt = False
                appendResult({'title':instanceResult['identifier'], 'content':xmltodictunparse(instanceResult['metadata']),'id':str(instanceResult['_id'])})
                dom = toXML(str(xmltodictunparse(instanceResult['metadata']).encode('utf-8')))
                #Check if a custom list result XSLT has to be used
                try:
                    metadataFormat = objMetadataFormats[str(instanceResult['metadataformat'])]
                    if metadataFormat.template.ResultXsltList:
                        listXslt = parse(BytesIO(metadataFormat.template.ResultXsltList.content.encode('utf-8')))
                        listTransform = XSLT(listXslt)
                        newdom = listTransform(dom)
                        custom_xslt = True
                    else:
                        newdom = transform(dom)
                except Exception, e:
                    #We use the default one
                    newdom = transform(dom)
                    custom_xslt = False

                context = RequestContext(request, {'id':str(instanceResult['_id']),
                                   'xml': str(newdom),
                                   'title': instanceResult['identifier'],
                                   'custom_xslt': custom_xslt,
                                   'schema_name': metadataFormat.metadataPrefix,
                                   'registry_name': registriesName[instanceResult['registry']],
                                   'oai_pmh': True})


                resultString+= template.render(context)

        else:
            for instanceResult in instanceResults[:20]:
                wordList = re.sub("[^\w]", " ",  keyword).split()
                wordList = [x + "|" + x +"\w+" for x in wordList]
                wordList = '|'.join(wordList)
                listWholeKeywords = re.findall("\\b("+ wordList +")\\b", xmltodict.unparse(instanceResult['metadata']).encode('utf-8'), flags=re.IGNORECASE)
                labels = list(set(listWholeKeywords))

                for label in labels:
                    label = label.lower()
                    result_json = {}
                    result_json['label'] = label
                    result_json['value'] = label
                    if not result_json in resultsByKeyword:
                        resultsByKeyword.append(result_json)
Example #2
0
def get_results_by_instance_keyword(request):
    print 'BEGIN def getResultsKeyword(request)'
    resultsByKeyword = []
    results = []
    resultString = ""

    #Instance
    json_instances = []
    if 'HTTPS' in request.META['SERVER_PROTOCOL']:
        protocol = "https"
    else:
        protocol = "http"
    instance = Instance(name="Local",
                        protocol=protocol,
                        address=request.META['REMOTE_ADDR'],
                        port=request.META['SERVER_PORT'],
                        access_token="token",
                        refresh_token="token")
    json_instances.append(instance.to_json())
    request.session['instancesExplore'] = json_instances
    sessionName = "resultsExploreOaiPMh" + instance['name']

    try:
        keyword = request.GET['keyword']
        schemas = request.GET.getlist('schemas[]')
        mergedSchemas = []
        for schema in schemas:
            t = json.loads(schema)
            mergedSchemas += t['oai-pmh']
        if 'onlySuggestions' in request.GET:
            onlySuggestions = json.loads(request.GET['onlySuggestions'])
        else:
            onlySuggestions = False
    except:
        keyword = ''
        schemas = []
        onlySuggestions = True
        mergedSchemas = []

    instanceResults = OaiRecord.executeFullTextQuery(keyword, mergedSchemas)
    if len(instanceResults) > 0:
        if not onlySuggestions:
            xsltPath = os.path.join(settings.SITE_ROOT,
                                    'static/resources/xsl/xml2html.xsl')
            xslt = etree.parse(xsltPath)
            transform = etree.XSLT(xslt)

        #Retrieve schema and registries. Avoid to retrieve the information for each result
        registriesName = {}
        schemasName = {}
        listRegistriesID = set([x['registry'] for x in instanceResults])
        for registryId in listRegistriesID:
            obj = OaiRegistry.objects(pk=registryId).get()
            registriesName[str(registryId)] = obj.name
        listSchemaId = set([x['metadataformat'] for x in instanceResults])
        for schemaId in listSchemaId:
            obj = OaiMetadataFormat.objects(pk=schemaId).get()
            schemasName[str(schemaId)] = obj

        listItems = []
        xmltodictunparse = unparse
        appendResult = results.append
        toXML = etree.XML
        parse = etree.parse
        XSLT = etree.XSLT
        if not onlySuggestions:
            for instanceResult in instanceResults:
                custom_xslt = False
                appendResult({
                    'title':
                    instanceResult['identifier'],
                    'content':
                    xmltodictunparse(instanceResult['metadata']),
                    'id':
                    str(instanceResult['_id'])
                })
                dom = toXML(
                    str(
                        xmltodictunparse(
                            instanceResult['metadata']).encode('utf-8')))
                #Check if a custom list result XSLT has to be used
                try:
                    schema = schemasName[str(instanceResult['metadataformat'])]
                    if schema.ResultXsltList:
                        listXslt = parse(
                            BytesIO(
                                schema.ResultXsltList.content.encode('utf-8')))
                        listTransform = XSLT(listXslt)
                        newdom = listTransform(dom)
                        custom_xslt = True
                    else:
                        newdom = transform(dom)
                except Exception, e:
                    #We use the default one
                    newdom = transform(dom)
                    custom_xslt = False

                item = {
                    'id': str(instanceResult['_id']),
                    'xml': str(newdom),
                    'title': instanceResult['identifier'],
                    'custom_xslt': custom_xslt,
                    'schema_name': schema.metadataPrefix,
                    'registry_name': registriesName[instanceResult['registry']]
                }

                listItems.append(item)
                context = RequestContext(request, {'list_results': listItems})

        else:
            for instanceResult in instanceResults[:20]:
                wordList = re.sub("[^\w]", " ", keyword).split()
                wordList = [x + "|" + x + "\w+" for x in wordList]
                wordList = '|'.join(wordList)
                listWholeKeywords = re.findall(
                    "\\b(" + wordList + ")\\b",
                    unparse(instanceResult['metadata']).encode('utf-8'),
                    flags=re.IGNORECASE)
                labels = list(set(listWholeKeywords))

                for label in labels:
                    label = label.lower()
                    result_json = {}
                    result_json['label'] = label
                    result_json['value'] = label
                    if not result_json in resultsByKeyword:
                        resultsByKeyword.append(result_json)

        if not onlySuggestions:
            template = loader.get_template(
                'oai_pmh/explore/explore_result_keyword.html')
            resultString += template.render(context)
Example #3
0
def get_results_by_instance_keyword(request):
    print 'BEGIN def getResultsKeyword(request)'
    resultsByKeyword = []
    results = []
    resultString = ""

    #Instance
    json_instances = []
    if 'HTTPS' in request.META['SERVER_PROTOCOL']:
        protocol = "https"
    else:
        protocol = "http"
    instance = Instance(name="Local",
                        protocol=protocol,
                        address=request.META['REMOTE_ADDR'],
                        port=request.META['SERVER_PORT'],
                        access_token="token",
                        refresh_token="token")
    json_instances.append(instance.to_json())
    request.session['instancesExplore'] = json_instances
    sessionName = "resultsExploreOaiPMh" + instance['name']

    keyword = request.POST.get('keyword', '')
    schemas = request.POST.getlist('schemas[]', [])
    user_schemas = request.POST.getlist('userSchemas[]', [])
    refinements = refinements_to_mongo(
        json.loads(request.POST.get('refinements', '{}')))
    registries = request.POST.getlist('registries[]', [])
    if 'onlySuggestions' in request.POST:
        onlySuggestions = json.loads(request.POST['onlySuggestions'])
    else:
        onlySuggestions = False

    metadata_format_ids = _get_metadata_formats_id(schemas=schemas,
                                                   user_schemas=user_schemas,
                                                   registries=registries)
    instanceResults = OaiRecord.executeFullTextQuery(keyword,
                                                     metadata_format_ids,
                                                     refinements)
    if len(instanceResults) > 0:
        if not onlySuggestions:
            xsltPath = os.path.join(settings.SITE_ROOT,
                                    'static/resources/xsl/xml2html.xsl')
            xslt = etree.parse(xsltPath)
            transform = etree.XSLT(xslt)
            template = loader.get_template(
                'oai_pmh/explore/explore_result_keyword.html')

        #Retrieve schema and registries. Avoid to retrieve the information for each result
        registriesName = {}
        objMetadataFormats = {}
        listRegistriesID = set([x['registry'] for x in instanceResults])
        registriesURL = {}
        for registryId in listRegistriesID:
            obj = OaiRegistry.objects(pk=registryId).get()
            registriesName[str(registryId)] = obj.name
            registriesURL[str(registryId)] = obj.url
        listSchemaId = set([x['metadataformat'] for x in instanceResults])
        for schemaId in listSchemaId:
            obj = OaiMetadataFormat.objects(pk=schemaId).get()
            objMetadataFormats[str(schemaId)] = obj

        listItems = []
        xmltodictunparse = XMLdata.unparse
        appendResult = results.append
        toXML = etree.XML
        parse = etree.parse
        XSLT = etree.XSLT
        if not onlySuggestions:
            for instanceResult in instanceResults:
                custom_xslt = False
                appendResult({
                    'title':
                    instanceResult['identifier'],
                    'content':
                    xmltodictunparse(instanceResult['metadata']),
                    'id':
                    str(instanceResult['_id'])
                })
                dom = toXML(
                    str(
                        xmltodictunparse(
                            instanceResult['metadata']).encode('utf-8')))
                #Check if a custom list result XSLT has to be used
                try:
                    metadataFormat = objMetadataFormats[str(
                        instanceResult['metadataformat'])]
                    if metadataFormat.template.ResultXsltList:
                        listXslt = parse(
                            BytesIO(
                                metadataFormat.template.ResultXsltList.content.
                                encode('utf-8')))
                        listTransform = XSLT(listXslt)
                        newdom = listTransform(dom)
                        custom_xslt = True
                    else:
                        newdom = transform(dom)
                except Exception, e:
                    #We use the default one
                    newdom = transform(dom)
                    custom_xslt = False

                registry_name = registriesName[instanceResult['registry']]
                if len(registry_name) > 30:
                    registry_name = "{0}...".format(registry_name[:30])

                url = urlparse(registriesURL[instanceResult['registry']])
                context = RequestContext(
                    request, {
                        'id': str(instanceResult['_id']),
                        'xml': str(newdom),
                        'title': instanceResult['identifier'],
                        'custom_xslt': custom_xslt,
                        'template_name': metadataFormat.template.title,
                        'registry_name': registry_name,
                        'registry_url': "{0}://{1}".format(
                            url.scheme, url.netloc),
                        'oai_pmh': True
                    })

                resultString += template.render(context)

        else:
            for instanceResult in instanceResults[:20]:
                wordList = re.sub("[^\w]", " ", keyword).split()
                wordList = [x + "|" + x + "\w+" for x in wordList]
                wordList = '|'.join(wordList)
                listWholeKeywords = re.findall(
                    "\\b(" + wordList + ")\\b",
                    XMLdata.unparse(
                        instanceResult['metadata']).encode('utf-8'),
                    flags=re.IGNORECASE)
                labels = list(set(listWholeKeywords))

                for label in labels:
                    label = label.lower()
                    result_json = {}
                    result_json['label'] = label
                    result_json['value'] = label
                    if not result_json in resultsByKeyword:
                        resultsByKeyword.append(result_json)
def get_results_by_instance_keyword(request):
    print 'BEGIN def getResultsKeyword(request)'
    resultsByKeyword = []
    results = []
    resultString = ""

    #Instance
    json_instances = []
    if 'HTTPS' in request.META['SERVER_PROTOCOL']:
        protocol = "https"
    else:
        protocol = "http"
    instance = Instance(name="Local",
                        protocol=protocol,
                        address=request.META['REMOTE_ADDR'],
                        port=request.META['SERVER_PORT'],
                        access_token="token",
                        refresh_token="token")
    json_instances.append(instance.to_json())
    request.session['instancesExplore'] = json_instances
    sessionName = "resultsExploreOaiPMh" + instance['name']

    try:
        keyword = request.GET['keyword']
        schemas = request.GET.getlist('schemas[]')
        userSchemas = request.GET.getlist('userSchemas[]')
        refinements = refinements_to_mongo(
            request.GET.getlist('refinements[]'))
        if 'onlySuggestions' in request.GET:
            onlySuggestions = json.loads(request.GET['onlySuggestions'])
        else:
            onlySuggestions = False
    except:
        keyword = ''
        schemas = []
        userSchemas = []
        refinements = {}
        onlySuggestions = True
    #We get all template versions for the given schemas
    #First, we take care of user defined schema
    templatesIDUser = Template.objects(title__in=userSchemas).distinct(
        field="id")
    templatesIDUser = [str(x) for x in templatesIDUser]

    #Take care of the rest, with versions
    templatesVersions = Template.objects(title__in=schemas).distinct(
        field="templateVersion")

    #We get all templates ID, for all versions
    allTemplatesIDCommon = TemplateVersion.objects(
        pk__in=templatesVersions, isDeleted=False).distinct(field="versions")
    #We remove the removed version
    allTemplatesIDCommonRemoved = TemplateVersion.objects(
        pk__in=templatesVersions,
        isDeleted=False).distinct(field="deletedVersions")
    templatesIDCommon = list(
        set(allTemplatesIDCommon) - set(allTemplatesIDCommonRemoved))

    templatesID = templatesIDUser + templatesIDCommon
    #We retrieve deactivated registries so as not to get their metadata formats
    deactivatedRegistries = [
        str(x.id)
        for x in OaiRegistry.objects(isDeactivated=True).order_by('id')
    ]
    metadataFormatsID = OaiMetadataFormat.objects(
        template__in=templatesID,
        registry__not__in=deactivatedRegistries).distinct(field="id")

    instanceResults = OaiRecord.executeFullTextQuery(keyword,
                                                     metadataFormatsID,
                                                     refinements)
    if len(instanceResults) > 0:
        if not onlySuggestions:
            xsltPath = os.path.join(settings.SITE_ROOT,
                                    'static/resources/xsl/xml2html.xsl')
            xslt = etree.parse(xsltPath)
            transform = etree.XSLT(xslt)
            template = loader.get_template(
                'oai_pmh/explore/explore_result_keyword.html')

        #Retrieve schema and registries. Avoid to retrieve the information for each result
        registriesName = {}
        objMetadataFormats = {}
        listRegistriesID = set([x['registry'] for x in instanceResults])
        for registryId in listRegistriesID:
            obj = OaiRegistry.objects(pk=registryId).get()
            registriesName[str(registryId)] = obj.name
        listSchemaId = set([x['metadataformat'] for x in instanceResults])
        for schemaId in listSchemaId:
            obj = OaiMetadataFormat.objects(pk=schemaId).get()
            objMetadataFormats[str(schemaId)] = obj

        listItems = []
        xmltodictunparse = xmltodict.unparse
        appendResult = results.append
        toXML = etree.XML
        parse = etree.parse
        XSLT = etree.XSLT
        if not onlySuggestions:
            for instanceResult in instanceResults:
                custom_xslt = False
                appendResult({
                    'title':
                    instanceResult['identifier'],
                    'content':
                    xmltodictunparse(instanceResult['metadata']),
                    'id':
                    str(instanceResult['_id'])
                })
                dom = toXML(
                    str(
                        xmltodictunparse(
                            instanceResult['metadata']).encode('utf-8')))
                #Check if a custom list result XSLT has to be used
                try:
                    metadataFormat = objMetadataFormats[str(
                        instanceResult['metadataformat'])]
                    if metadataFormat.template.ResultXsltList:
                        listXslt = parse(
                            BytesIO(
                                metadataFormat.template.ResultXsltList.content.
                                encode('utf-8')))
                        listTransform = XSLT(listXslt)
                        newdom = listTransform(dom)
                        custom_xslt = True
                    else:
                        newdom = transform(dom)
                except Exception, e:
                    #We use the default one
                    newdom = transform(dom)
                    custom_xslt = False

                context = RequestContext(
                    request, {
                        'id': str(instanceResult['_id']),
                        'xml': str(newdom),
                        'title': instanceResult['identifier'],
                        'custom_xslt': custom_xslt,
                        'schema_name': metadataFormat.metadataPrefix,
                        'registry_name':
                        registriesName[instanceResult['registry']],
                        'oai_pmh': True
                    })

                resultString += template.render(context)

        else:
            for instanceResult in instanceResults[:20]:
                wordList = re.sub("[^\w]", " ", keyword).split()
                wordList = [x + "|" + x + "\w+" for x in wordList]
                wordList = '|'.join(wordList)
                listWholeKeywords = re.findall(
                    "\\b(" + wordList + ")\\b",
                    xmltodict.unparse(
                        instanceResult['metadata']).encode('utf-8'),
                    flags=re.IGNORECASE)
                labels = list(set(listWholeKeywords))

                for label in labels:
                    label = label.lower()
                    result_json = {}
                    result_json['label'] = label
                    result_json['value'] = label
                    if not result_json in resultsByKeyword:
                        resultsByKeyword.append(result_json)