def test_update_publish_draft(self):
     status = Status.ACTIVE
     new_xml = "<Resource localid='' status='"+status+"'><identity>" \
            "<title>My new software</title></identity><curation><publisher>PF</publisher><contact><name></name>" \
            "</contact></curation><content><description>This is a new record</description><subject></subject>" \
            "<referenceURL></referenceURL></content></Resource>"
     id = self.createXMLData(ispublished=True)
     xmlData = XMLdata.get(id)
     self.assertNotEquals(new_xml, XMLdata.unparse(xmlData['content']))
     adminId = self.getAdmin().id
     template = self.createTemplate()
     elements = SchemaElement.objects().all()
     self.assertEqual(len(elements), 0)
     elementsForm = FormData.objects().all()
     self.assertEqual(len(elementsForm), 0)
     formData = self.createFormData(user=adminId, name='name', template=str(template.id), xml_data=new_xml,
                                    xml_data_id=str(id))
     url = '/dashboard/update_publish_draft'
     data = {'draft_id': str(formData.id)}
     r = self.doRequestGetAdminClientLogged(url=url, data=data)
     xmlDataInDatabase = XMLdata.get(id)
     elements = SchemaElement.objects().all()
     self.assertEqual(len(elements), 0)
     elementsForm = FormData.objects().all()
     self.assertEqual(len(elementsForm), 0)
     self.assertEquals(etree.XML(new_xml).text, etree.XML(str(XMLdata.unparse(xmlDataInDatabase['content']))).text)
     self.assertEquals(True, xmlDataInDatabase.get('ispublished'))
     self.assertEquals(str(adminId), xmlDataInDatabase.get('iduser'))
     self.assertNotEquals(xmlData.get('lastmodificationdate'), xmlDataInDatabase.get('lastmodificationdate'))
     self.assertNotEquals(xmlData.get('publicationdate'), xmlDataInDatabase.get('publicationdate'))
     self.assertEquals(status, xmlDataInDatabase.get('status'))
def explore_detail_result_keyword(request):
    template = loader.get_template("oai_pmh/explore/explore_detail_results_keyword.html")
    result_id = request.GET["id"]
    record = OaiRecord.objects.get(pk=result_id)
    # schemaId = xmlString['schema']
    if "title" in request.GET:
        title = request.GET["title"]
    else:
        title = record.identifier
    xmlString = XMLdata.unparse(record.getMetadataOrdered()).encode("utf-8")
    xsltPath = os.path.join(settings.SITE_ROOT, "static", "resources", "xsl", "xml2html.xsl")
    xslt = etree.parse(xsltPath)
    transform = etree.XSLT(xslt)
    dom = etree.fromstring(str(xmlString))

    # Check if a custom list result XSLT has to be used
    try:
        metadataFormat = record.metadataformat
        if metadataFormat.template.ResultXsltDetailed:
            listXslt = etree.parse(BytesIO(metadataFormat.template.ResultXsltDetailed.content.encode("utf-8")))
            transform = etree.XSLT(listXslt)
            newdom = transform(dom)
        else:
            newdom = transform(dom)
    except Exception, e:
        # We use the default one
        newdom = transform(dom)
Example #3
0
def curate_edit_data(request):
    try:
        xml_data_id = request.GET['id']
        xml_data = XMLdata.get(xml_data_id)
        json_content = xml_data['content']
        xml_content = XMLdata.unparse(json_content)
        request.session['curate_edit'] = True
        request.session['currentTemplateID'] = xml_data['schema']
        # remove previously created forms when editing a new one
        previous_forms = FormData.objects(user=str(request.user.id),
                                          xml_data_id__exists=True)

        for previous_form in previous_forms:
            if previous_form.schema_element_root is not None:
                delete_branch_from_db(previous_form.schema_element_root.pk)
            previous_form.delete()

        form_data = FormData(user=str(request.user.id),
                             template=xml_data['schema'],
                             name=xml_data['title'],
                             xml_data=xml_content,
                             xml_data_id=xml_data_id)
        form_data.save()

        request.session['curateFormData'] = str(form_data.pk)

        if 'form_id' in request.session:
            del request.session['form_id']
        if 'xmlDocTree' in request.session:
            del request.session['xmlDocTree']
    except:
        raise MDCSError("The document you are looking for doesn't exist.")
def explore_detail_result_process(request):
    result_id = request.GET['id']
    xmlString = XMLdata.get(result_id)
    schemaId = xmlString['schema']
    if 'title' in request.GET:
        title = request.GET['title']
    else:
        title = xmlString['title']
    xmlString = XMLdata.unparse(xmlString['content']).encode('utf-8')
    xsltPath = os.path.join(settings.SITE_ROOT, 'static', 'resources', 'xsl', 'xml2html.xsl')
    xslt = etree.parse(xsltPath)
    transform = etree.XSLT(xslt)

    #Check if a custom detailed result XSLT has to be used
    schema = Template.objects.get(pk=schemaId)
    try:
        if (xmlString != ""):
            dom = etree.fromstring(str(xmlString))
            if schema.ResultXsltDetailed:
                shortXslt = etree.parse(BytesIO(schema.ResultXsltDetailed.content.encode('utf-8')))
                shortTransform = etree.XSLT(shortXslt)
                newdom = shortTransform(dom)
            else:
                newdom = transform(dom)
    except Exception, e:
        #We use the default one
        newdom = transform(dom)
def explore_detail_result_process(request):
    result_id = request.GET['id']
    xmlString = XMLdata.get(result_id)
    schemaId = xmlString['schema']
    if 'title' in request.GET:
        title = request.GET['title']
    else:
        title = xmlString['title']
    xmlString = XMLdata.unparse(xmlString['content']).encode('utf-8')
    xsltPath = os.path.join(settings.SITE_ROOT, 'static', 'resources', 'xsl',
                            'xml2html.xsl')
    xslt = etree.parse(xsltPath)
    transform = etree.XSLT(xslt)

    #Check if a custom detailed result XSLT has to be used
    schema = Template.objects.get(pk=schemaId)
    try:
        if (xmlString != ""):
            dom = etree.fromstring(str(xmlString))
            if schema.ResultXsltDetailed:
                shortXslt = etree.parse(
                    BytesIO(schema.ResultXsltDetailed.content.encode('utf-8')))
                shortTransform = etree.XSLT(shortXslt)
                newdom = shortTransform(dom)
            else:
                newdom = transform(dom)
    except Exception, e:
        #We use the default one
        newdom = transform(dom)
def explore_detail_result_keyword(request):
    template = loader.get_template(
        'oai_pmh/explore/explore_detail_results_keyword.html')
    result_id = request.GET['id']
    record = OaiRecord.objects.get(pk=result_id)
    # schemaId = xmlString['schema']
    if 'title' in request.GET:
        title = request.GET['title']
    else:
        title = record.identifier
    xmlString = XMLdata.unparse(record.getMetadataOrdered()).encode('utf-8')
    xsltPath = os.path.join(settings.SITE_ROOT, 'static', 'resources', 'xsl',
                            'xml2html.xsl')
    xslt = etree.parse(xsltPath)
    transform = etree.XSLT(xslt)
    dom = etree.fromstring(str(xmlString))

    #Check if a custom list result XSLT has to be used
    try:
        metadataFormat = record.metadataformat
        if metadataFormat.template.ResultXsltDetailed:
            listXslt = etree.parse(
                BytesIO(
                    metadataFormat.template.ResultXsltDetailed.content.encode(
                        'utf-8')))
            transform = etree.XSLT(listXslt)
            newdom = transform(dom)
        else:
            newdom = transform(dom)
    except Exception, e:
        #We use the default one
        newdom = transform(dom)
Example #7
0
def dashboard_detail_record(request,otherUser=None):
    template = None#loader.get_template('dashboard/my_dashboard_detail_record.html')
    record_id = request.GET['id']
    record_type = request.GET['type']
    user_id = None#request.POST.get('user_id','default value')
    if otherUser:
        user_id = otherUser#request.GET['user_id']
        template = loader.get_template('dashboard/my_dashboard_detail_recordotherusers.html')
    else:
        template = loader.get_template('dashboard/my_dashboard_detail_record.html')

    if record_type == 'form':
        form_data = FormData.objects.get(pk=ObjectId(record_id))
        xml_string = form_data.xml_data.encode(encoding='UTF-8')
        title = form_data.name
        schema_id = form_data.template
    elif record_type == 'record':
        xml_string = XMLdata.get(record_id)
        title = xml_string['title']
        schema_id = xml_string['schema']
        xml_string = XMLdata.unparse(xml_string['content']).encode('utf-8')
    else:
        raise Exception("Unknow record type: " + str(record_type))

    xslt_path = os.path.join(settings.SITE_ROOT, 'static', 'resources', 'xsl', 'xml2html.xsl')
    xslt = etree.parse(xslt_path)
    transform = etree.XSLT(xslt)

    dom = ''

    # Check if a custom detailed result XSLT has to be used
    try:
        if xml_string != "":
            dom = etree.fromstring(xml_string)
            schema = Template.objects.get(pk=schema_id)

            if schema.ResultXsltDetailed:
                short_xslt = etree.parse(BytesIO(schema.ResultXsltDetailed.content.encode('utf-8')))
                short_transform = etree.XSLT(short_xslt)
                newdom = short_transform(dom)
            else:
                newdom = transform(dom)
        else:
            newdom = 'No data has been saved to this form yet.'
    except Exception as e:
        # We use the default one
        newdom = transform(dom)

    result = str(newdom)

    context = RequestContext(request, {
        'XMLHolder': result,
        'title': title,
        'type': record_type,
    })
    if otherUser:
        context["user_id"] = otherUser

    return HttpResponse(template.render(context))
 def test_parse_unparse_test(self):
     with open(join(RESOURCES_PATH, 'test.xml'), 'r') as data_file:
         data_content = data_file.read()
         # test parsing
         xml_data = XMLdata(xml=data_content)
         # test unparsing
         xml_string = XMLdata.unparse(xml_data.content['content'])
         self.assertEquals(_strip(data_content), _strip(xml_string))
 def test_parse_unparse_test(self):
     with open(join(RESOURCES_PATH, 'test.xml'), 'r') as data_file:
         data_content = data_file.read()
         # test parsing
         xml_data = XMLdata(xml=data_content)
         # test unparsing
         xml_string = XMLdata.unparse(xml_data.content['content'])
         self.assertEquals(_strip(data_content), _strip(xml_string))
Example #10
0
def retrieve_xml(docID):
    """ Get the xml assiociated to the id

    :param request:
    :return:
    """
    xml_data = XMLdata.getXMLdata(docID)
    xml_dta = json.dumps(xml_data.items()[3][1],sort_keys=False)
    xml_d = json.loads(xml_dta, object_pairs_hook=OrderedDict) # Convert the ordered dict in dict
    xsdDocData = XMLdata.unparse(xml_d)
    xsdEncoded = xsdDocData.encode('utf-8')

    return xsdEncoded
Example #11
0
 def test_update_publish_draft(self):
     status = Status.ACTIVE
     new_xml = "<Resource localid='' status='"+status+"'><identity>" \
            "<title>My new software</title></identity><curation><publisher>PF</publisher><contact><name></name>" \
            "</contact></curation><content><description>This is a new record</description><subject></subject>" \
            "<referenceURL></referenceURL></content></Resource>"
     id = self.createXMLData(ispublished=True)
     xmlData = XMLdata.get(id)
     self.assertNotEquals(new_xml, XMLdata.unparse(xmlData['content']))
     adminId = self.getAdmin().id
     template = self.createTemplate()
     elements = SchemaElement.objects().all()
     self.assertEqual(len(elements), 0)
     elementsForm = FormData.objects().all()
     self.assertEqual(len(elementsForm), 0)
     formData = self.createFormData(user=adminId,
                                    name='name',
                                    template=str(template.id),
                                    xml_data=new_xml,
                                    xml_data_id=str(id))
     url = '/dashboard/update_publish_draft'
     data = {'draft_id': str(formData.id)}
     r = self.doRequestGetAdminClientLogged(url=url, data=data)
     xmlDataInDatabase = XMLdata.get(id)
     elements = SchemaElement.objects().all()
     self.assertEqual(len(elements), 0)
     elementsForm = FormData.objects().all()
     self.assertEqual(len(elementsForm), 0)
     self.assertEquals(
         etree.XML(new_xml).text,
         etree.XML(str(XMLdata.unparse(xmlDataInDatabase['content']))).text)
     self.assertEquals(True, xmlDataInDatabase.get('ispublished'))
     self.assertEquals(str(adminId), xmlDataInDatabase.get('iduser'))
     self.assertNotEquals(xmlData.get('lastmodificationdate'),
                          xmlDataInDatabase.get('lastmodificationdate'))
     self.assertNotEquals(xmlData.get('publicationdate'),
                          xmlDataInDatabase.get('publicationdate'))
     self.assertEquals(status, xmlDataInDatabase.get('status'))
Example #12
0
def render_xml_as_html(value):
    try:
        dict = value
        xsltPath = os.path.join(settings.SITE_ROOT, 'static', 'resources',
                                'xsl', 'xml2html.xsl')
        xslt = etree.parse(xsltPath)
        transform = etree.XSLT(xslt)
        xmlString = XMLdata.unparse(dict)
        if (xmlString != ""):
            dom = etree.XML(xmlString.encode('utf-8'))
            newdom = transform(dom)
            xmlTree = str(newdom)
            return xmlTree
        else:
            return dict
    except:
        return dict
def curate_edit_data(request):
    try:
        if 'useForm' in request.GET and request.GET['useForm'] == 'true':
            pass
        else:
            xml_data_id = request.GET['id']
            request.session['curate_edit'] = True
            # remove previously created forms when editing a new one
            previous_forms = FormData.objects(user=str(request.user.id),
                                              xml_data_id__exists=True,
                                              isNewVersionOfRecord=False)
            for previous_form in previous_forms:
                if previous_form.schema_element_root is not None:
                    delete_branch_from_db(previous_form.schema_element_root.pk)
                previous_form.delete()

            #Check if a form_data already exists for this record
            form_data = FormData.objects(xml_data_id=xml_data_id).all().first()
            if not form_data:
                xml_data = XMLdata.get(xml_data_id)
                json_content = xml_data['content']
                xml_content = XMLdata.unparse(json_content)
                form_data = FormData(user=str(request.user.id),
                                     template=xml_data['schema'],
                                     name=xml_data['title'],
                                     xml_data=xml_content,
                                     xml_data_id=xml_data_id,
                                     isNewVersionOfRecord=xml_data.get(
                                         'ispublished', False))
                form_data.save()

            request.session['currentTemplateID'] = form_data.template
            request.session['curate_edit_data'] = form_data.xml_data
            request.session['curateFormData'] = str(form_data.pk)

            if 'form_id' in request.session:
                del request.session['form_id']
            if 'xmlDocTree' in request.session:
                del request.session['xmlDocTree']
    except:
        raise MDCSError("The document you are looking for doesn't exist.")
Example #14
0
def dashboard_detail_resource(request):
    template = loader.get_template(
        'dashboard/my_dashboard_detail_resource.html')
    result_id = request.GET['id']
    type = request.GET['type']

    if type == 'form':
        form_data = FormData.objects.get(pk=ObjectId(result_id))
        xmlString = form_data.xml_data.encode('utf-8')
        title = form_data.name
        schemaId = form_data.template
    elif type == 'record':
        xmlString = XMLdata.get(result_id)
        title = xmlString['title']
        schemaId = xmlString['schema']
        xmlString = XMLdata.unparse(xmlString['content']).encode('utf-8')

    xsltPath = os.path.join(settings.SITE_ROOT, 'static', 'resources', 'xsl',
                            'xml2html.xsl')
    xslt = etree.parse(xsltPath)
    transform = etree.XSLT(xslt)

    #Check if a custom detailed result XSLT has to be used
    try:
        if (xmlString != ""):
            dom = etree.fromstring(str(xmlString))
            schema = Template.objects.get(pk=schemaId)
            if schema.ResultXsltDetailed:
                shortXslt = etree.parse(
                    BytesIO(schema.ResultXsltDetailed.content.encode('utf-8')))
                shortTransform = etree.XSLT(shortXslt)
                newdom = shortTransform(dom)
            else:
                newdom = transform(dom)
        else:
            newdom = "No data to display"
    except Exception, e:
        #We use the default one
        newdom = transform(dom)
Example #15
0
def explore_detail_result_keyword(request):
    template = loader.get_template(
        'oai_pmh/explore/explore_detail_results_keyword.html')
    result_id = request.GET['id']
    record = OaiRecord.objects.get(pk=result_id)
    # schemaId = xmlString['schema']
    if 'title' in request.GET:
        title = request.GET['title']
    else:
        title = record.identifier
    xmlString = XMLdata.unparse(record.metadata).encode('utf-8')
    xsltPath = os.path.join(settings.SITE_ROOT, 'static', 'resources', 'xsl',
                            'xml2html.xsl')
    xslt = etree.parse(xsltPath)
    transform = etree.XSLT(xslt)
    dom = etree.fromstring(str(xmlString))
    newdom = transform(dom)

    result = str(newdom)
    context = RequestContext(request, {'XMLHolder': result, 'title': title})

    return HttpResponse(template.render(context))
def dashboard_detail_resource(request) :
    template = loader.get_template('dashboard/my_dashboard_detail_resource.html')
    result_id = request.GET['id']
    type = request.GET['type']

    if type=='form':
        form_data = FormData.objects.get(pk=ObjectId(result_id))
        xmlString = form_data.xml_data.encode('utf-8')
        title = form_data.name
        schemaId = form_data.template
    elif type=='record':
        xmlString = XMLdata.get(result_id)
        title = xmlString['title']
        schemaId = xmlString['schema']
        xmlString = XMLdata.unparse(xmlString['content']).encode('utf-8')


    xsltPath = os.path.join(settings.SITE_ROOT, 'static', 'resources', 'xsl', 'xml2html.xsl')
    xslt = etree.parse(xsltPath)
    transform = etree.XSLT(xslt)

    #Check if a custom detailed result XSLT has to be used
    try:
        if (xmlString != ""):
            dom = etree.fromstring(str(xmlString))
            schema = Template.objects.get(pk=schemaId)
            if schema.ResultXsltDetailed:
                shortXslt = etree.parse(BytesIO(schema.ResultXsltDetailed.content.encode('utf-8')))
                shortTransform = etree.XSLT(shortXslt)
                newdom = shortTransform(dom)
            else:
                newdom = transform(dom)
        else:
            newdom = "No data to display"
    except Exception, e:
        #We use the default one
        newdom = transform(dom)
Example #17
0
def get_results_by_instance_keyword(request):
    print 'BEGIN def getResultsKeyword(request)'
    resultsByKeyword = []
    results = []
    resultString = ""

    #Instance
    json_instances = []
    if 'HTTPS' in request.META['SERVER_PROTOCOL']:
        protocol = "https"
    else:
        protocol = "http"
    instance = Instance(name="Local",
                        protocol=protocol,
                        address=request.META['REMOTE_ADDR'],
                        port=request.META['SERVER_PORT'],
                        access_token="token",
                        refresh_token="token")
    json_instances.append(instance.to_json())
    request.session['instancesExplore'] = json_instances
    sessionName = "resultsExploreOaiPMh" + instance['name']

    keyword = request.POST.get('keyword', '')
    schemas = request.POST.getlist('schemas[]', [])
    user_schemas = request.POST.getlist('userSchemas[]', [])
    refinements = refinements_to_mongo(
        json.loads(request.POST.get('refinements', '{}')))
    registries = request.POST.getlist('registries[]', [])
    if 'onlySuggestions' in request.POST:
        onlySuggestions = json.loads(request.POST['onlySuggestions'])
    else:
        onlySuggestions = False

    metadata_format_ids = _get_metadata_formats_id(schemas=schemas,
                                                   user_schemas=user_schemas,
                                                   registries=registries)
    instanceResults = OaiRecord.executeFullTextQuery(keyword,
                                                     metadata_format_ids,
                                                     refinements)
    if len(instanceResults) > 0:
        if not onlySuggestions:
            xsltPath = os.path.join(settings.SITE_ROOT,
                                    'static/resources/xsl/xml2html.xsl')
            xslt = etree.parse(xsltPath)
            transform = etree.XSLT(xslt)
            template = loader.get_template(
                'oai_pmh/explore/explore_result_keyword.html')

        #Retrieve schema and registries. Avoid to retrieve the information for each result
        registriesName = {}
        objMetadataFormats = {}
        listRegistriesID = set([x['registry'] for x in instanceResults])
        registriesURL = {}
        for registryId in listRegistriesID:
            obj = OaiRegistry.objects(pk=registryId).get()
            registriesName[str(registryId)] = obj.name
            registriesURL[str(registryId)] = obj.url
        listSchemaId = set([x['metadataformat'] for x in instanceResults])
        for schemaId in listSchemaId:
            obj = OaiMetadataFormat.objects(pk=schemaId).get()
            objMetadataFormats[str(schemaId)] = obj

        listItems = []
        xmltodictunparse = XMLdata.unparse
        appendResult = results.append
        toXML = etree.XML
        parse = etree.parse
        XSLT = etree.XSLT
        if not onlySuggestions:
            for instanceResult in instanceResults:
                custom_xslt = False
                appendResult({
                    'title':
                    instanceResult['identifier'],
                    'content':
                    xmltodictunparse(instanceResult['metadata']),
                    'id':
                    str(instanceResult['_id'])
                })
                dom = toXML(
                    str(
                        xmltodictunparse(
                            instanceResult['metadata']).encode('utf-8')))
                #Check if a custom list result XSLT has to be used
                try:
                    metadataFormat = objMetadataFormats[str(
                        instanceResult['metadataformat'])]
                    if metadataFormat.template.ResultXsltList:
                        listXslt = parse(
                            BytesIO(
                                metadataFormat.template.ResultXsltList.content.
                                encode('utf-8')))
                        listTransform = XSLT(listXslt)
                        newdom = listTransform(dom)
                        custom_xslt = True
                    else:
                        newdom = transform(dom)
                except Exception, e:
                    #We use the default one
                    newdom = transform(dom)
                    custom_xslt = False

                registry_name = registriesName[instanceResult['registry']]
                if len(registry_name) > 30:
                    registry_name = "{0}...".format(registry_name[:30])

                url = urlparse(registriesURL[instanceResult['registry']])
                context = RequestContext(
                    request, {
                        'id': str(instanceResult['_id']),
                        'xml': str(newdom),
                        'title': instanceResult['identifier'],
                        'custom_xslt': custom_xslt,
                        'template_name': metadataFormat.template.title,
                        'registry_name': registry_name,
                        'registry_url': "{0}://{1}".format(
                            url.scheme, url.netloc),
                        'oai_pmh': True
                    })

                resultString += template.render(context)

        else:
            for instanceResult in instanceResults[:20]:
                wordList = re.sub("[^\w]", " ", keyword).split()
                wordList = [x + "|" + x + "\w+" for x in wordList]
                wordList = '|'.join(wordList)
                listWholeKeywords = re.findall(
                    "\\b(" + wordList + ")\\b",
                    XMLdata.unparse(
                        instanceResult['metadata']).encode('utf-8'),
                    flags=re.IGNORECASE)
                labels = list(set(listWholeKeywords))

                for label in labels:
                    label = label.lower()
                    result_json = {}
                    result_json['label'] = label
                    result_json['value'] = label
                    if not result_json in resultsByKeyword:
                        resultsByKeyword.append(result_json)
def get_results_by_instance_keyword(request):
    print 'BEGIN def getResultsKeyword(request)'
    resultsByKeyword = []
    results = []
    resultString = ""

    #Instance
    json_instances = []
    if 'HTTPS' in request.META['SERVER_PROTOCOL']:
        protocol = "https"
    else:
        protocol = "http"
    instance = Instance(name="Local", protocol=protocol, address=request.META['REMOTE_ADDR'], port=request.META['SERVER_PORT'], access_token="token", refresh_token="token")
    json_instances.append(instance.to_json())
    request.session['instancesExplore'] = json_instances
    sessionName = "resultsExploreOaiPMh" + instance['name']


    try:
        keyword = request.GET['keyword']
        schemas = request.GET.getlist('schemas[]')
        userSchemas = request.GET.getlist('userSchemas[]')
        refinements = refinements_to_mongo(request.GET.getlist('refinements[]'))
        if 'onlySuggestions' in request.GET:
            onlySuggestions = json.loads(request.GET['onlySuggestions'])
        else:
            onlySuggestions = False
        registries = request.GET.getlist('registries[]')
    except:
        keyword = ''
        schemas = []
        userSchemas = []
        refinements = {}
        onlySuggestions = True
        registries = []

    #We get all template versions for the given schemas
    #First, we take care of user defined schema
    templatesIDUser = Template.objects(title__in=userSchemas).distinct(field="id")
    templatesIDUser = [str(x) for x in templatesIDUser]

    #Take care of the rest, with versions
    templatesVersions = Template.objects(title__in=schemas).distinct(field="templateVersion")

    #We get all templates ID, for all versions
    allTemplatesIDCommon = TemplateVersion.objects(pk__in=templatesVersions, isDeleted=False).distinct(field="versions")
    #We remove the removed version
    allTemplatesIDCommonRemoved = TemplateVersion.objects(pk__in=templatesVersions, isDeleted=False).distinct(field="deletedVersions")
    templatesIDCommon = list(set(allTemplatesIDCommon) - set(allTemplatesIDCommonRemoved))

    templatesID = templatesIDUser + templatesIDCommon
    if len(registries) == 0:
        #We retrieve deactivated registries so as not to get their metadata formats
        deactivatedRegistries = [str(x.id) for x in OaiRegistry.objects(isDeactivated=True).order_by('id')]
        metadataFormatsID = OaiMetadataFormat.objects(template__in=templatesID, registry__not__in=deactivatedRegistries).distinct(field="id")
    else:
        #We retrieve registries from the refinement
        metadataFormatsID = OaiMetadataFormat.objects(template__in=templatesID, registry__in=registries).distinct(field="id")


    instanceResults = OaiRecord.executeFullTextQuery(keyword, metadataFormatsID, refinements)
    if len(instanceResults) > 0:
        if not onlySuggestions:
            xsltPath = os.path.join(settings.SITE_ROOT, 'static/resources/xsl/xml2html.xsl')
            xslt = etree.parse(xsltPath)
            transform = etree.XSLT(xslt)
            template = loader.get_template('oai_pmh/explore/explore_result_keyword.html')

        #Retrieve schema and registries. Avoid to retrieve the information for each result
        registriesName = {}
        objMetadataFormats = {}
        listRegistriesID = set([x['registry'] for x in instanceResults])
        for registryId in listRegistriesID:
            obj = OaiRegistry.objects(pk=registryId).get()
            registriesName[str(registryId)] = obj.name
        listSchemaId = set([x['metadataformat'] for x in instanceResults])
        for schemaId in listSchemaId:
            obj = OaiMetadataFormat.objects(pk=schemaId).get()
            objMetadataFormats[str(schemaId)] = obj

        listItems = []
        xmltodictunparse = XMLdata.unparse
        appendResult = results.append
        toXML = etree.XML
        parse = etree.parse
        XSLT = etree.XSLT
        if not onlySuggestions:
            for instanceResult in instanceResults:
                custom_xslt = False
                appendResult({'title':instanceResult['identifier'], 'content':xmltodictunparse(instanceResult['metadata']),'id':str(instanceResult['_id'])})
                dom = toXML(str(xmltodictunparse(instanceResult['metadata']).encode('utf-8')))
                #Check if a custom list result XSLT has to be used
                try:
                    metadataFormat = objMetadataFormats[str(instanceResult['metadataformat'])]
                    if metadataFormat.template.ResultXsltList:
                        listXslt = parse(BytesIO(metadataFormat.template.ResultXsltList.content.encode('utf-8')))
                        listTransform = XSLT(listXslt)
                        newdom = listTransform(dom)
                        custom_xslt = True
                    else:
                        newdom = transform(dom)
                except Exception, e:
                    #We use the default one
                    newdom = transform(dom)
                    custom_xslt = False

                registry_name = registriesName[instanceResult['registry']]
                if len(registry_name) > 30:
                    registry_name = "{0}...".format(registry_name[:30])

                context = RequestContext(request, {'id':str(instanceResult['_id']),
                                   'xml': str(newdom),
                                   'title': instanceResult['identifier'],
                                   'custom_xslt': custom_xslt,
                                   'template_name': metadataFormat.template.title,
                                   'registry_name': registry_name,
                                   'oai_pmh': True})


                resultString+= template.render(context)

        else:
            for instanceResult in instanceResults[:20]:
                wordList = re.sub("[^\w]", " ",  keyword).split()
                wordList = [x + "|" + x +"\w+" for x in wordList]
                wordList = '|'.join(wordList)
                listWholeKeywords = re.findall("\\b("+ wordList +")\\b", XMLdata.unparse(instanceResult['metadata']).encode('utf-8'), flags=re.IGNORECASE)
                labels = list(set(listWholeKeywords))

                for label in labels:
                    label = label.lower()
                    result_json = {}
                    result_json['label'] = label
                    result_json['value'] = label
                    if not result_json in resultsByKeyword:
                        resultsByKeyword.append(result_json)
    def get_record(self):
        try:
            #Bool if we need to transform the XML via XSLT
            hasToBeTransformed = False
            #Check if the identifier pattern is OK
            id = self.check_identifier()
            #Template name
            self.template_name = 'oai_pmh/xml/get_record.xml'
            query = dict()
            #Convert id to ObjectId
            try:
                query['_id'] = ObjectId(id)
                #The record has to be published
                query['ispublished'] = True
            except Exception:
                raise idDoesNotExist(self.identifier)
            data = XMLdata.executeQueryFullResult(query)
            #This id doesn't exist
            if len(data) == 0:
                raise idDoesNotExist(self.identifier)
            data = data[0]
            #Get the template for the identifier
            template = data['schema']
            #Retrieve sets for this template
            sets = OaiMySet.objects(templates=template).all()
            #Retrieve the XSLT for the transformation
            try:
                #Get the metadataformat for the provided prefix
                myMetadataFormat = OaiMyMetadataFormat.objects.get(
                    metadataPrefix=self.metadataPrefix)
                #If this metadata prefix is not associated to a template, we need to retrieve the XSLT to do the transformation
                if not myMetadataFormat.isTemplate:
                    hasToBeTransformed = True
                    #Get information about the XSLT for the MF and the template
                    objTempMfXslt = OaiTemplMfXslt.objects(
                        myMetadataFormat=myMetadataFormat,
                        template=template,
                        activated=True).get()
                    #If no information or desactivated
                    if not objTempMfXslt.xslt:
                        raise cannotDisseminateFormat(self.metadataPrefix)
                    else:
                        #Get the XSLT for the transformation
                        xslt = objTempMfXslt.xslt
            except:
                raise cannotDisseminateFormat(self.metadataPrefix)

            #Transform XML data
            dataToTransform = [{
                'title':
                data['_id'],
                'content':
                self.cleanXML(XMLdata.unparse(data['content']))
            }]
            if hasToBeTransformed:
                dataXML = self.getXMLTranformXSLT(dataToTransform, xslt)
            else:
                dataXML = dataToTransform

            #Fill the response
            record_info = {
                'identifier': self.identifier,
                'last_modified': self.get_last_modified_date(data),
                'sets': sets,
                'XML': dataXML[0]['content'],
                'deleted': data.get('status', '') == Status.DELETED
            }
            return self.render_to_response(record_info)
        except OAIExceptions, e:
            return self.errors(e.errors)
 for template in templatesID:
     #Retrieve sets for this template
     sets = OaiMySet.objects(templates=template).all()
     query['schema'] = template
     #The record has to be published
     query['ispublished'] = True
     #Get all records for this template
     data = XMLdata.executeQueryFullResult(query)
     #IF no records, go to the next template
     if len(data) == 0:
         continue
     dataToTransform = [{
         'title':
         x['_id'],
         'content':
         self.cleanXML(XMLdata.unparse(x['content']))
     } for x in data]
     if myMetadataFormat.isTemplate:
         #No transformation needed
         dataXML = dataToTransform
     else:
         #Get the XSLT file
         xslt = objTempMfXslt(template=template).get().xslt
         #Transform all XML data (1 call)
         dataXML = self.getXMLTranformXSLT(dataToTransform, xslt)
     #Add each record
     for elt in data:
         identifier = '%s:%s:id/%s' % (settings.OAI_SCHEME,
                                       settings.OAI_REPO_IDENTIFIER,
                                       elt['_id'])
         xmlStr = filter(lambda xml: xml['title'] == elt['_id'],
def get_results_by_instance_keyword(request):
    print 'BEGIN def getResultsKeyword(request)'
    resultsByKeyword = []
    results = []
    resultString = ""

    #Instance
    json_instances = []
    if 'HTTPS' in request.META['SERVER_PROTOCOL']:
        protocol = "https"
    else:
        protocol = "http"
    instance = Instance(name="Local",
                        protocol=protocol,
                        address=request.META['REMOTE_ADDR'],
                        port=request.META['SERVER_PORT'],
                        access_token="token",
                        refresh_token="token")
    json_instances.append(instance.to_json())
    request.session['instancesExplore'] = json_instances
    sessionName = "resultsExploreOaiPMh" + instance['name']

    try:
        keyword = request.GET['keyword']
        schemas = request.GET.getlist('schemas[]')
        userSchemas = request.GET.getlist('userSchemas[]')
        refinements = refinements_to_mongo(
            request.GET.getlist('refinements[]'))
        if 'onlySuggestions' in request.GET:
            onlySuggestions = json.loads(request.GET['onlySuggestions'])
        else:
            onlySuggestions = False
        registries = request.GET.getlist('registries[]')
    except:
        keyword = ''
        schemas = []
        userSchemas = []
        refinements = {}
        onlySuggestions = True
        registries = []

    #We get all template versions for the given schemas
    #First, we take care of user defined schema
    templatesIDUser = Template.objects(title__in=userSchemas).distinct(
        field="id")
    templatesIDUser = [str(x) for x in templatesIDUser]

    #Take care of the rest, with versions
    templatesVersions = Template.objects(title__in=schemas).distinct(
        field="templateVersion")

    #We get all templates ID, for all versions
    allTemplatesIDCommon = TemplateVersion.objects(
        pk__in=templatesVersions, isDeleted=False).distinct(field="versions")
    #We remove the removed version
    allTemplatesIDCommonRemoved = TemplateVersion.objects(
        pk__in=templatesVersions,
        isDeleted=False).distinct(field="deletedVersions")
    templatesIDCommon = list(
        set(allTemplatesIDCommon) - set(allTemplatesIDCommonRemoved))

    templatesID = templatesIDUser + templatesIDCommon
    if len(registries) == 0:
        #We retrieve deactivated registries so as not to get their metadata formats
        deactivatedRegistries = [
            str(x.id)
            for x in OaiRegistry.objects(isDeactivated=True).order_by('id')
        ]
        metadataFormatsID = OaiMetadataFormat.objects(
            template__in=templatesID,
            registry__not__in=deactivatedRegistries).distinct(field="id")
    else:
        #We retrieve registries from the refinement
        metadataFormatsID = OaiMetadataFormat.objects(
            template__in=templatesID,
            registry__in=registries).distinct(field="id")

    instanceResults = OaiRecord.executeFullTextQuery(keyword,
                                                     metadataFormatsID,
                                                     refinements)
    if len(instanceResults) > 0:
        if not onlySuggestions:
            xsltPath = os.path.join(settings.SITE_ROOT,
                                    'static/resources/xsl/xml2html.xsl')
            xslt = etree.parse(xsltPath)
            transform = etree.XSLT(xslt)
            template = loader.get_template(
                'oai_pmh/explore/explore_result_keyword.html')

        #Retrieve schema and registries. Avoid to retrieve the information for each result
        registriesName = {}
        objMetadataFormats = {}
        listRegistriesID = set([x['registry'] for x in instanceResults])
        for registryId in listRegistriesID:
            obj = OaiRegistry.objects(pk=registryId).get()
            registriesName[str(registryId)] = obj.name
        listSchemaId = set([x['metadataformat'] for x in instanceResults])
        for schemaId in listSchemaId:
            obj = OaiMetadataFormat.objects(pk=schemaId).get()
            objMetadataFormats[str(schemaId)] = obj

        listItems = []
        xmltodictunparse = XMLdata.unparse
        appendResult = results.append
        toXML = etree.XML
        parse = etree.parse
        XSLT = etree.XSLT
        if not onlySuggestions:
            for instanceResult in instanceResults:
                custom_xslt = False
                appendResult({
                    'title':
                    instanceResult['identifier'],
                    'content':
                    xmltodictunparse(instanceResult['metadata']),
                    'id':
                    str(instanceResult['_id'])
                })
                dom = toXML(
                    str(
                        xmltodictunparse(
                            instanceResult['metadata']).encode('utf-8')))
                #Check if a custom list result XSLT has to be used
                try:
                    metadataFormat = objMetadataFormats[str(
                        instanceResult['metadataformat'])]
                    if metadataFormat.template.ResultXsltList:
                        listXslt = parse(
                            BytesIO(
                                metadataFormat.template.ResultXsltList.content.
                                encode('utf-8')))
                        listTransform = XSLT(listXslt)
                        newdom = listTransform(dom)
                        custom_xslt = True
                    else:
                        newdom = transform(dom)
                except Exception, e:
                    #We use the default one
                    newdom = transform(dom)
                    custom_xslt = False

                registry_name = registriesName[instanceResult['registry']]
                if len(registry_name) > 30:
                    registry_name = "{0}...".format(registry_name[:30])

                context = RequestContext(
                    request, {
                        'id': str(instanceResult['_id']),
                        'xml': str(newdom),
                        'title': instanceResult['identifier'],
                        'custom_xslt': custom_xslt,
                        'template_name': metadataFormat.template.title,
                        'registry_name': registry_name,
                        'oai_pmh': True
                    })

                resultString += template.render(context)

        else:
            for instanceResult in instanceResults[:20]:
                wordList = re.sub("[^\w]", " ", keyword).split()
                wordList = [x + "|" + x + "\w+" for x in wordList]
                wordList = '|'.join(wordList)
                listWholeKeywords = re.findall(
                    "\\b(" + wordList + ")\\b",
                    XMLdata.unparse(
                        instanceResult['metadata']).encode('utf-8'),
                    flags=re.IGNORECASE)
                labels = list(set(listWholeKeywords))

                for label in labels:
                    label = label.lower()
                    result_json = {}
                    result_json['label'] = label
                    result_json['value'] = label
                    if not result_json in resultsByKeyword:
                        resultsByKeyword.append(result_json)
         templatesID = set(templatesID).intersection([str(x.id) for x in setsTemplates.templates])
     except Exception, e:
         raise noRecordsMatch
 #For each template found
 for template in templatesID:
     #Retrieve sets for this template
     sets = OaiMySet.objects(templates=template).all()
     query['schema'] = template
     #The record has to be published
     query['ispublished'] = True
     #Get all records for this template
     data = XMLdata.executeQueryFullResult(query)
     #IF no records, go to the next template
     if len(data) == 0:
         continue
     dataToTransform = [{'title': x['_id'], 'content': self.cleanXML(XMLdata.unparse(x['content']))} for x in data]
     if myMetadataFormat.isTemplate:
         #No transformation needed
         dataXML = dataToTransform
     else:
         #Get the XSLT file
         xslt = objTempMfXslt(template=template).get().xslt
         #Transform all XML data (1 call)
         dataXML = self.getXMLTranformXSLT(dataToTransform, xslt)
     #Add each record
     for elt in data:
         identifier = '%s:%s:id/%s' % (settings.OAI_SCHEME, settings.OAI_REPO_IDENTIFIER,
               elt['_id'])
         xmlStr = filter(lambda xml: xml['title'] == elt['_id'], dataXML)[0]
         record_info = {
             'identifier': identifier,
    def get_record(self):
        try:
            #Bool if we need to transform the XML via XSLT
            hasToBeTransformed = False
            #Check if the identifier pattern is OK
            id = self.check_identifier()
            #Template name
            self.template_name = 'oai_pmh/xml/get_record.xml'
            query = dict()
            #Convert id to ObjectId
            try:
                query['_id'] = ObjectId(id)
                #The record has to be published
                query['ispublished'] = True
            except Exception:
                raise idDoesNotExist(self.identifier)
            data = XMLdata.executeQueryFullResult(query)
            #This id doesn't exist
            if len(data) == 0:
                raise idDoesNotExist(self.identifier)
            data = data[0]
            #Get the template for the identifier
            template = data['schema']
            #Retrieve sets for this template
            sets = OaiMySet.objects(templates=template).all()
            #Retrieve the XSLT for the transformation
            try:
                #Get the metadataformat for the provided prefix
                myMetadataFormat = OaiMyMetadataFormat.objects.get(metadataPrefix=self.metadataPrefix)
                #If this metadata prefix is not associated to a template, we need to retrieve the XSLT to do the transformation
                if not myMetadataFormat.isTemplate:
                    hasToBeTransformed = True
                    #Get information about the XSLT for the MF and the template
                    objTempMfXslt = OaiTemplMfXslt.objects(myMetadataFormat=myMetadataFormat, template=template, activated=True).get()
                    #If no information or desactivated
                    if not objTempMfXslt.xslt:
                        raise cannotDisseminateFormat(self.metadataPrefix)
                    else:
                        #Get the XSLT for the transformation
                        xslt = objTempMfXslt.xslt
            except:
                raise cannotDisseminateFormat(self.metadataPrefix)

            #Transform XML data
            dataToTransform = [{'title': data['_id'], 'content': self.cleanXML(XMLdata.unparse(data['content']))}]
            if hasToBeTransformed:
                dataXML = self.getXMLTranformXSLT(dataToTransform, xslt)
            else:
                dataXML = dataToTransform

            #Fill the response
            record_info = {
                'identifier': self.identifier,
                'last_modified': self.get_last_modified_date(data),
                'sets': sets,
                'XML': dataXML[0]['content'],
                'deleted': data.get('status', '') == Status.DELETED
            }
            return self.render_to_response(record_info)
        except OAIExceptions, e:
            return self.errors(e.errors)