def test_update_publish_draft(self): status = Status.ACTIVE new_xml = "<Resource localid='' status='"+status+"'><identity>" \ "<title>My new software</title></identity><curation><publisher>PF</publisher><contact><name></name>" \ "</contact></curation><content><description>This is a new record</description><subject></subject>" \ "<referenceURL></referenceURL></content></Resource>" id = self.createXMLData(ispublished=True) xmlData = XMLdata.get(id) self.assertNotEquals(new_xml, XMLdata.unparse(xmlData['content'])) adminId = self.getAdmin().id template = self.createTemplate() elements = SchemaElement.objects().all() self.assertEqual(len(elements), 0) elementsForm = FormData.objects().all() self.assertEqual(len(elementsForm), 0) formData = self.createFormData(user=adminId, name='name', template=str(template.id), xml_data=new_xml, xml_data_id=str(id)) url = '/dashboard/update_publish_draft' data = {'draft_id': str(formData.id)} r = self.doRequestGetAdminClientLogged(url=url, data=data) xmlDataInDatabase = XMLdata.get(id) elements = SchemaElement.objects().all() self.assertEqual(len(elements), 0) elementsForm = FormData.objects().all() self.assertEqual(len(elementsForm), 0) self.assertEquals(etree.XML(new_xml).text, etree.XML(str(XMLdata.unparse(xmlDataInDatabase['content']))).text) self.assertEquals(True, xmlDataInDatabase.get('ispublished')) self.assertEquals(str(adminId), xmlDataInDatabase.get('iduser')) self.assertNotEquals(xmlData.get('lastmodificationdate'), xmlDataInDatabase.get('lastmodificationdate')) self.assertNotEquals(xmlData.get('publicationdate'), xmlDataInDatabase.get('publicationdate')) self.assertEquals(status, xmlDataInDatabase.get('status'))
def explore_detail_result_keyword(request): template = loader.get_template("oai_pmh/explore/explore_detail_results_keyword.html") result_id = request.GET["id"] record = OaiRecord.objects.get(pk=result_id) # schemaId = xmlString['schema'] if "title" in request.GET: title = request.GET["title"] else: title = record.identifier xmlString = XMLdata.unparse(record.getMetadataOrdered()).encode("utf-8") xsltPath = os.path.join(settings.SITE_ROOT, "static", "resources", "xsl", "xml2html.xsl") xslt = etree.parse(xsltPath) transform = etree.XSLT(xslt) dom = etree.fromstring(str(xmlString)) # Check if a custom list result XSLT has to be used try: metadataFormat = record.metadataformat if metadataFormat.template.ResultXsltDetailed: listXslt = etree.parse(BytesIO(metadataFormat.template.ResultXsltDetailed.content.encode("utf-8"))) transform = etree.XSLT(listXslt) newdom = transform(dom) else: newdom = transform(dom) except Exception, e: # We use the default one newdom = transform(dom)
def curate_edit_data(request): try: xml_data_id = request.GET['id'] xml_data = XMLdata.get(xml_data_id) json_content = xml_data['content'] xml_content = XMLdata.unparse(json_content) request.session['curate_edit'] = True request.session['currentTemplateID'] = xml_data['schema'] # remove previously created forms when editing a new one previous_forms = FormData.objects(user=str(request.user.id), xml_data_id__exists=True) for previous_form in previous_forms: if previous_form.schema_element_root is not None: delete_branch_from_db(previous_form.schema_element_root.pk) previous_form.delete() form_data = FormData(user=str(request.user.id), template=xml_data['schema'], name=xml_data['title'], xml_data=xml_content, xml_data_id=xml_data_id) form_data.save() request.session['curateFormData'] = str(form_data.pk) if 'form_id' in request.session: del request.session['form_id'] if 'xmlDocTree' in request.session: del request.session['xmlDocTree'] except: raise MDCSError("The document you are looking for doesn't exist.")
def explore_detail_result_process(request): result_id = request.GET['id'] xmlString = XMLdata.get(result_id) schemaId = xmlString['schema'] if 'title' in request.GET: title = request.GET['title'] else: title = xmlString['title'] xmlString = XMLdata.unparse(xmlString['content']).encode('utf-8') xsltPath = os.path.join(settings.SITE_ROOT, 'static', 'resources', 'xsl', 'xml2html.xsl') xslt = etree.parse(xsltPath) transform = etree.XSLT(xslt) #Check if a custom detailed result XSLT has to be used schema = Template.objects.get(pk=schemaId) try: if (xmlString != ""): dom = etree.fromstring(str(xmlString)) if schema.ResultXsltDetailed: shortXslt = etree.parse(BytesIO(schema.ResultXsltDetailed.content.encode('utf-8'))) shortTransform = etree.XSLT(shortXslt) newdom = shortTransform(dom) else: newdom = transform(dom) except Exception, e: #We use the default one newdom = transform(dom)
def explore_detail_result_process(request): result_id = request.GET['id'] xmlString = XMLdata.get(result_id) schemaId = xmlString['schema'] if 'title' in request.GET: title = request.GET['title'] else: title = xmlString['title'] xmlString = XMLdata.unparse(xmlString['content']).encode('utf-8') xsltPath = os.path.join(settings.SITE_ROOT, 'static', 'resources', 'xsl', 'xml2html.xsl') xslt = etree.parse(xsltPath) transform = etree.XSLT(xslt) #Check if a custom detailed result XSLT has to be used schema = Template.objects.get(pk=schemaId) try: if (xmlString != ""): dom = etree.fromstring(str(xmlString)) if schema.ResultXsltDetailed: shortXslt = etree.parse( BytesIO(schema.ResultXsltDetailed.content.encode('utf-8'))) shortTransform = etree.XSLT(shortXslt) newdom = shortTransform(dom) else: newdom = transform(dom) except Exception, e: #We use the default one newdom = transform(dom)
def explore_detail_result_keyword(request): template = loader.get_template( 'oai_pmh/explore/explore_detail_results_keyword.html') result_id = request.GET['id'] record = OaiRecord.objects.get(pk=result_id) # schemaId = xmlString['schema'] if 'title' in request.GET: title = request.GET['title'] else: title = record.identifier xmlString = XMLdata.unparse(record.getMetadataOrdered()).encode('utf-8') xsltPath = os.path.join(settings.SITE_ROOT, 'static', 'resources', 'xsl', 'xml2html.xsl') xslt = etree.parse(xsltPath) transform = etree.XSLT(xslt) dom = etree.fromstring(str(xmlString)) #Check if a custom list result XSLT has to be used try: metadataFormat = record.metadataformat if metadataFormat.template.ResultXsltDetailed: listXslt = etree.parse( BytesIO( metadataFormat.template.ResultXsltDetailed.content.encode( 'utf-8'))) transform = etree.XSLT(listXslt) newdom = transform(dom) else: newdom = transform(dom) except Exception, e: #We use the default one newdom = transform(dom)
def dashboard_detail_record(request,otherUser=None): template = None#loader.get_template('dashboard/my_dashboard_detail_record.html') record_id = request.GET['id'] record_type = request.GET['type'] user_id = None#request.POST.get('user_id','default value') if otherUser: user_id = otherUser#request.GET['user_id'] template = loader.get_template('dashboard/my_dashboard_detail_recordotherusers.html') else: template = loader.get_template('dashboard/my_dashboard_detail_record.html') if record_type == 'form': form_data = FormData.objects.get(pk=ObjectId(record_id)) xml_string = form_data.xml_data.encode(encoding='UTF-8') title = form_data.name schema_id = form_data.template elif record_type == 'record': xml_string = XMLdata.get(record_id) title = xml_string['title'] schema_id = xml_string['schema'] xml_string = XMLdata.unparse(xml_string['content']).encode('utf-8') else: raise Exception("Unknow record type: " + str(record_type)) xslt_path = os.path.join(settings.SITE_ROOT, 'static', 'resources', 'xsl', 'xml2html.xsl') xslt = etree.parse(xslt_path) transform = etree.XSLT(xslt) dom = '' # Check if a custom detailed result XSLT has to be used try: if xml_string != "": dom = etree.fromstring(xml_string) schema = Template.objects.get(pk=schema_id) if schema.ResultXsltDetailed: short_xslt = etree.parse(BytesIO(schema.ResultXsltDetailed.content.encode('utf-8'))) short_transform = etree.XSLT(short_xslt) newdom = short_transform(dom) else: newdom = transform(dom) else: newdom = 'No data has been saved to this form yet.' except Exception as e: # We use the default one newdom = transform(dom) result = str(newdom) context = RequestContext(request, { 'XMLHolder': result, 'title': title, 'type': record_type, }) if otherUser: context["user_id"] = otherUser return HttpResponse(template.render(context))
def test_parse_unparse_test(self): with open(join(RESOURCES_PATH, 'test.xml'), 'r') as data_file: data_content = data_file.read() # test parsing xml_data = XMLdata(xml=data_content) # test unparsing xml_string = XMLdata.unparse(xml_data.content['content']) self.assertEquals(_strip(data_content), _strip(xml_string))
def test_parse_unparse_test(self): with open(join(RESOURCES_PATH, 'test.xml'), 'r') as data_file: data_content = data_file.read() # test parsing xml_data = XMLdata(xml=data_content) # test unparsing xml_string = XMLdata.unparse(xml_data.content['content']) self.assertEquals(_strip(data_content), _strip(xml_string))
def retrieve_xml(docID): """ Get the xml assiociated to the id :param request: :return: """ xml_data = XMLdata.getXMLdata(docID) xml_dta = json.dumps(xml_data.items()[3][1],sort_keys=False) xml_d = json.loads(xml_dta, object_pairs_hook=OrderedDict) # Convert the ordered dict in dict xsdDocData = XMLdata.unparse(xml_d) xsdEncoded = xsdDocData.encode('utf-8') return xsdEncoded
def test_update_publish_draft(self): status = Status.ACTIVE new_xml = "<Resource localid='' status='"+status+"'><identity>" \ "<title>My new software</title></identity><curation><publisher>PF</publisher><contact><name></name>" \ "</contact></curation><content><description>This is a new record</description><subject></subject>" \ "<referenceURL></referenceURL></content></Resource>" id = self.createXMLData(ispublished=True) xmlData = XMLdata.get(id) self.assertNotEquals(new_xml, XMLdata.unparse(xmlData['content'])) adminId = self.getAdmin().id template = self.createTemplate() elements = SchemaElement.objects().all() self.assertEqual(len(elements), 0) elementsForm = FormData.objects().all() self.assertEqual(len(elementsForm), 0) formData = self.createFormData(user=adminId, name='name', template=str(template.id), xml_data=new_xml, xml_data_id=str(id)) url = '/dashboard/update_publish_draft' data = {'draft_id': str(formData.id)} r = self.doRequestGetAdminClientLogged(url=url, data=data) xmlDataInDatabase = XMLdata.get(id) elements = SchemaElement.objects().all() self.assertEqual(len(elements), 0) elementsForm = FormData.objects().all() self.assertEqual(len(elementsForm), 0) self.assertEquals( etree.XML(new_xml).text, etree.XML(str(XMLdata.unparse(xmlDataInDatabase['content']))).text) self.assertEquals(True, xmlDataInDatabase.get('ispublished')) self.assertEquals(str(adminId), xmlDataInDatabase.get('iduser')) self.assertNotEquals(xmlData.get('lastmodificationdate'), xmlDataInDatabase.get('lastmodificationdate')) self.assertNotEquals(xmlData.get('publicationdate'), xmlDataInDatabase.get('publicationdate')) self.assertEquals(status, xmlDataInDatabase.get('status'))
def render_xml_as_html(value): try: dict = value xsltPath = os.path.join(settings.SITE_ROOT, 'static', 'resources', 'xsl', 'xml2html.xsl') xslt = etree.parse(xsltPath) transform = etree.XSLT(xslt) xmlString = XMLdata.unparse(dict) if (xmlString != ""): dom = etree.XML(xmlString.encode('utf-8')) newdom = transform(dom) xmlTree = str(newdom) return xmlTree else: return dict except: return dict
def curate_edit_data(request): try: if 'useForm' in request.GET and request.GET['useForm'] == 'true': pass else: xml_data_id = request.GET['id'] request.session['curate_edit'] = True # remove previously created forms when editing a new one previous_forms = FormData.objects(user=str(request.user.id), xml_data_id__exists=True, isNewVersionOfRecord=False) for previous_form in previous_forms: if previous_form.schema_element_root is not None: delete_branch_from_db(previous_form.schema_element_root.pk) previous_form.delete() #Check if a form_data already exists for this record form_data = FormData.objects(xml_data_id=xml_data_id).all().first() if not form_data: xml_data = XMLdata.get(xml_data_id) json_content = xml_data['content'] xml_content = XMLdata.unparse(json_content) form_data = FormData(user=str(request.user.id), template=xml_data['schema'], name=xml_data['title'], xml_data=xml_content, xml_data_id=xml_data_id, isNewVersionOfRecord=xml_data.get( 'ispublished', False)) form_data.save() request.session['currentTemplateID'] = form_data.template request.session['curate_edit_data'] = form_data.xml_data request.session['curateFormData'] = str(form_data.pk) if 'form_id' in request.session: del request.session['form_id'] if 'xmlDocTree' in request.session: del request.session['xmlDocTree'] except: raise MDCSError("The document you are looking for doesn't exist.")
def dashboard_detail_resource(request): template = loader.get_template( 'dashboard/my_dashboard_detail_resource.html') result_id = request.GET['id'] type = request.GET['type'] if type == 'form': form_data = FormData.objects.get(pk=ObjectId(result_id)) xmlString = form_data.xml_data.encode('utf-8') title = form_data.name schemaId = form_data.template elif type == 'record': xmlString = XMLdata.get(result_id) title = xmlString['title'] schemaId = xmlString['schema'] xmlString = XMLdata.unparse(xmlString['content']).encode('utf-8') xsltPath = os.path.join(settings.SITE_ROOT, 'static', 'resources', 'xsl', 'xml2html.xsl') xslt = etree.parse(xsltPath) transform = etree.XSLT(xslt) #Check if a custom detailed result XSLT has to be used try: if (xmlString != ""): dom = etree.fromstring(str(xmlString)) schema = Template.objects.get(pk=schemaId) if schema.ResultXsltDetailed: shortXslt = etree.parse( BytesIO(schema.ResultXsltDetailed.content.encode('utf-8'))) shortTransform = etree.XSLT(shortXslt) newdom = shortTransform(dom) else: newdom = transform(dom) else: newdom = "No data to display" except Exception, e: #We use the default one newdom = transform(dom)
def explore_detail_result_keyword(request): template = loader.get_template( 'oai_pmh/explore/explore_detail_results_keyword.html') result_id = request.GET['id'] record = OaiRecord.objects.get(pk=result_id) # schemaId = xmlString['schema'] if 'title' in request.GET: title = request.GET['title'] else: title = record.identifier xmlString = XMLdata.unparse(record.metadata).encode('utf-8') xsltPath = os.path.join(settings.SITE_ROOT, 'static', 'resources', 'xsl', 'xml2html.xsl') xslt = etree.parse(xsltPath) transform = etree.XSLT(xslt) dom = etree.fromstring(str(xmlString)) newdom = transform(dom) result = str(newdom) context = RequestContext(request, {'XMLHolder': result, 'title': title}) return HttpResponse(template.render(context))
def dashboard_detail_resource(request) : template = loader.get_template('dashboard/my_dashboard_detail_resource.html') result_id = request.GET['id'] type = request.GET['type'] if type=='form': form_data = FormData.objects.get(pk=ObjectId(result_id)) xmlString = form_data.xml_data.encode('utf-8') title = form_data.name schemaId = form_data.template elif type=='record': xmlString = XMLdata.get(result_id) title = xmlString['title'] schemaId = xmlString['schema'] xmlString = XMLdata.unparse(xmlString['content']).encode('utf-8') xsltPath = os.path.join(settings.SITE_ROOT, 'static', 'resources', 'xsl', 'xml2html.xsl') xslt = etree.parse(xsltPath) transform = etree.XSLT(xslt) #Check if a custom detailed result XSLT has to be used try: if (xmlString != ""): dom = etree.fromstring(str(xmlString)) schema = Template.objects.get(pk=schemaId) if schema.ResultXsltDetailed: shortXslt = etree.parse(BytesIO(schema.ResultXsltDetailed.content.encode('utf-8'))) shortTransform = etree.XSLT(shortXslt) newdom = shortTransform(dom) else: newdom = transform(dom) else: newdom = "No data to display" except Exception, e: #We use the default one newdom = transform(dom)
def get_results_by_instance_keyword(request): print 'BEGIN def getResultsKeyword(request)' resultsByKeyword = [] results = [] resultString = "" #Instance json_instances = [] if 'HTTPS' in request.META['SERVER_PROTOCOL']: protocol = "https" else: protocol = "http" instance = Instance(name="Local", protocol=protocol, address=request.META['REMOTE_ADDR'], port=request.META['SERVER_PORT'], access_token="token", refresh_token="token") json_instances.append(instance.to_json()) request.session['instancesExplore'] = json_instances sessionName = "resultsExploreOaiPMh" + instance['name'] keyword = request.POST.get('keyword', '') schemas = request.POST.getlist('schemas[]', []) user_schemas = request.POST.getlist('userSchemas[]', []) refinements = refinements_to_mongo( json.loads(request.POST.get('refinements', '{}'))) registries = request.POST.getlist('registries[]', []) if 'onlySuggestions' in request.POST: onlySuggestions = json.loads(request.POST['onlySuggestions']) else: onlySuggestions = False metadata_format_ids = _get_metadata_formats_id(schemas=schemas, user_schemas=user_schemas, registries=registries) instanceResults = OaiRecord.executeFullTextQuery(keyword, metadata_format_ids, refinements) if len(instanceResults) > 0: if not onlySuggestions: xsltPath = os.path.join(settings.SITE_ROOT, 'static/resources/xsl/xml2html.xsl') xslt = etree.parse(xsltPath) transform = etree.XSLT(xslt) template = loader.get_template( 'oai_pmh/explore/explore_result_keyword.html') #Retrieve schema and registries. Avoid to retrieve the information for each result registriesName = {} objMetadataFormats = {} listRegistriesID = set([x['registry'] for x in instanceResults]) registriesURL = {} for registryId in listRegistriesID: obj = OaiRegistry.objects(pk=registryId).get() registriesName[str(registryId)] = obj.name registriesURL[str(registryId)] = obj.url listSchemaId = set([x['metadataformat'] for x in instanceResults]) for schemaId in listSchemaId: obj = OaiMetadataFormat.objects(pk=schemaId).get() objMetadataFormats[str(schemaId)] = obj listItems = [] xmltodictunparse = XMLdata.unparse appendResult = results.append toXML = etree.XML parse = etree.parse XSLT = etree.XSLT if not onlySuggestions: for instanceResult in instanceResults: custom_xslt = False appendResult({ 'title': instanceResult['identifier'], 'content': xmltodictunparse(instanceResult['metadata']), 'id': str(instanceResult['_id']) }) dom = toXML( str( xmltodictunparse( instanceResult['metadata']).encode('utf-8'))) #Check if a custom list result XSLT has to be used try: metadataFormat = objMetadataFormats[str( instanceResult['metadataformat'])] if metadataFormat.template.ResultXsltList: listXslt = parse( BytesIO( metadataFormat.template.ResultXsltList.content. encode('utf-8'))) listTransform = XSLT(listXslt) newdom = listTransform(dom) custom_xslt = True else: newdom = transform(dom) except Exception, e: #We use the default one newdom = transform(dom) custom_xslt = False registry_name = registriesName[instanceResult['registry']] if len(registry_name) > 30: registry_name = "{0}...".format(registry_name[:30]) url = urlparse(registriesURL[instanceResult['registry']]) context = RequestContext( request, { 'id': str(instanceResult['_id']), 'xml': str(newdom), 'title': instanceResult['identifier'], 'custom_xslt': custom_xslt, 'template_name': metadataFormat.template.title, 'registry_name': registry_name, 'registry_url': "{0}://{1}".format( url.scheme, url.netloc), 'oai_pmh': True }) resultString += template.render(context) else: for instanceResult in instanceResults[:20]: wordList = re.sub("[^\w]", " ", keyword).split() wordList = [x + "|" + x + "\w+" for x in wordList] wordList = '|'.join(wordList) listWholeKeywords = re.findall( "\\b(" + wordList + ")\\b", XMLdata.unparse( instanceResult['metadata']).encode('utf-8'), flags=re.IGNORECASE) labels = list(set(listWholeKeywords)) for label in labels: label = label.lower() result_json = {} result_json['label'] = label result_json['value'] = label if not result_json in resultsByKeyword: resultsByKeyword.append(result_json)
def get_results_by_instance_keyword(request): print 'BEGIN def getResultsKeyword(request)' resultsByKeyword = [] results = [] resultString = "" #Instance json_instances = [] if 'HTTPS' in request.META['SERVER_PROTOCOL']: protocol = "https" else: protocol = "http" instance = Instance(name="Local", protocol=protocol, address=request.META['REMOTE_ADDR'], port=request.META['SERVER_PORT'], access_token="token", refresh_token="token") json_instances.append(instance.to_json()) request.session['instancesExplore'] = json_instances sessionName = "resultsExploreOaiPMh" + instance['name'] try: keyword = request.GET['keyword'] schemas = request.GET.getlist('schemas[]') userSchemas = request.GET.getlist('userSchemas[]') refinements = refinements_to_mongo(request.GET.getlist('refinements[]')) if 'onlySuggestions' in request.GET: onlySuggestions = json.loads(request.GET['onlySuggestions']) else: onlySuggestions = False registries = request.GET.getlist('registries[]') except: keyword = '' schemas = [] userSchemas = [] refinements = {} onlySuggestions = True registries = [] #We get all template versions for the given schemas #First, we take care of user defined schema templatesIDUser = Template.objects(title__in=userSchemas).distinct(field="id") templatesIDUser = [str(x) for x in templatesIDUser] #Take care of the rest, with versions templatesVersions = Template.objects(title__in=schemas).distinct(field="templateVersion") #We get all templates ID, for all versions allTemplatesIDCommon = TemplateVersion.objects(pk__in=templatesVersions, isDeleted=False).distinct(field="versions") #We remove the removed version allTemplatesIDCommonRemoved = TemplateVersion.objects(pk__in=templatesVersions, isDeleted=False).distinct(field="deletedVersions") templatesIDCommon = list(set(allTemplatesIDCommon) - set(allTemplatesIDCommonRemoved)) templatesID = templatesIDUser + templatesIDCommon if len(registries) == 0: #We retrieve deactivated registries so as not to get their metadata formats deactivatedRegistries = [str(x.id) for x in OaiRegistry.objects(isDeactivated=True).order_by('id')] metadataFormatsID = OaiMetadataFormat.objects(template__in=templatesID, registry__not__in=deactivatedRegistries).distinct(field="id") else: #We retrieve registries from the refinement metadataFormatsID = OaiMetadataFormat.objects(template__in=templatesID, registry__in=registries).distinct(field="id") instanceResults = OaiRecord.executeFullTextQuery(keyword, metadataFormatsID, refinements) if len(instanceResults) > 0: if not onlySuggestions: xsltPath = os.path.join(settings.SITE_ROOT, 'static/resources/xsl/xml2html.xsl') xslt = etree.parse(xsltPath) transform = etree.XSLT(xslt) template = loader.get_template('oai_pmh/explore/explore_result_keyword.html') #Retrieve schema and registries. Avoid to retrieve the information for each result registriesName = {} objMetadataFormats = {} listRegistriesID = set([x['registry'] for x in instanceResults]) for registryId in listRegistriesID: obj = OaiRegistry.objects(pk=registryId).get() registriesName[str(registryId)] = obj.name listSchemaId = set([x['metadataformat'] for x in instanceResults]) for schemaId in listSchemaId: obj = OaiMetadataFormat.objects(pk=schemaId).get() objMetadataFormats[str(schemaId)] = obj listItems = [] xmltodictunparse = XMLdata.unparse appendResult = results.append toXML = etree.XML parse = etree.parse XSLT = etree.XSLT if not onlySuggestions: for instanceResult in instanceResults: custom_xslt = False appendResult({'title':instanceResult['identifier'], 'content':xmltodictunparse(instanceResult['metadata']),'id':str(instanceResult['_id'])}) dom = toXML(str(xmltodictunparse(instanceResult['metadata']).encode('utf-8'))) #Check if a custom list result XSLT has to be used try: metadataFormat = objMetadataFormats[str(instanceResult['metadataformat'])] if metadataFormat.template.ResultXsltList: listXslt = parse(BytesIO(metadataFormat.template.ResultXsltList.content.encode('utf-8'))) listTransform = XSLT(listXslt) newdom = listTransform(dom) custom_xslt = True else: newdom = transform(dom) except Exception, e: #We use the default one newdom = transform(dom) custom_xslt = False registry_name = registriesName[instanceResult['registry']] if len(registry_name) > 30: registry_name = "{0}...".format(registry_name[:30]) context = RequestContext(request, {'id':str(instanceResult['_id']), 'xml': str(newdom), 'title': instanceResult['identifier'], 'custom_xslt': custom_xslt, 'template_name': metadataFormat.template.title, 'registry_name': registry_name, 'oai_pmh': True}) resultString+= template.render(context) else: for instanceResult in instanceResults[:20]: wordList = re.sub("[^\w]", " ", keyword).split() wordList = [x + "|" + x +"\w+" for x in wordList] wordList = '|'.join(wordList) listWholeKeywords = re.findall("\\b("+ wordList +")\\b", XMLdata.unparse(instanceResult['metadata']).encode('utf-8'), flags=re.IGNORECASE) labels = list(set(listWholeKeywords)) for label in labels: label = label.lower() result_json = {} result_json['label'] = label result_json['value'] = label if not result_json in resultsByKeyword: resultsByKeyword.append(result_json)
def get_record(self): try: #Bool if we need to transform the XML via XSLT hasToBeTransformed = False #Check if the identifier pattern is OK id = self.check_identifier() #Template name self.template_name = 'oai_pmh/xml/get_record.xml' query = dict() #Convert id to ObjectId try: query['_id'] = ObjectId(id) #The record has to be published query['ispublished'] = True except Exception: raise idDoesNotExist(self.identifier) data = XMLdata.executeQueryFullResult(query) #This id doesn't exist if len(data) == 0: raise idDoesNotExist(self.identifier) data = data[0] #Get the template for the identifier template = data['schema'] #Retrieve sets for this template sets = OaiMySet.objects(templates=template).all() #Retrieve the XSLT for the transformation try: #Get the metadataformat for the provided prefix myMetadataFormat = OaiMyMetadataFormat.objects.get( metadataPrefix=self.metadataPrefix) #If this metadata prefix is not associated to a template, we need to retrieve the XSLT to do the transformation if not myMetadataFormat.isTemplate: hasToBeTransformed = True #Get information about the XSLT for the MF and the template objTempMfXslt = OaiTemplMfXslt.objects( myMetadataFormat=myMetadataFormat, template=template, activated=True).get() #If no information or desactivated if not objTempMfXslt.xslt: raise cannotDisseminateFormat(self.metadataPrefix) else: #Get the XSLT for the transformation xslt = objTempMfXslt.xslt except: raise cannotDisseminateFormat(self.metadataPrefix) #Transform XML data dataToTransform = [{ 'title': data['_id'], 'content': self.cleanXML(XMLdata.unparse(data['content'])) }] if hasToBeTransformed: dataXML = self.getXMLTranformXSLT(dataToTransform, xslt) else: dataXML = dataToTransform #Fill the response record_info = { 'identifier': self.identifier, 'last_modified': self.get_last_modified_date(data), 'sets': sets, 'XML': dataXML[0]['content'], 'deleted': data.get('status', '') == Status.DELETED } return self.render_to_response(record_info) except OAIExceptions, e: return self.errors(e.errors)
for template in templatesID: #Retrieve sets for this template sets = OaiMySet.objects(templates=template).all() query['schema'] = template #The record has to be published query['ispublished'] = True #Get all records for this template data = XMLdata.executeQueryFullResult(query) #IF no records, go to the next template if len(data) == 0: continue dataToTransform = [{ 'title': x['_id'], 'content': self.cleanXML(XMLdata.unparse(x['content'])) } for x in data] if myMetadataFormat.isTemplate: #No transformation needed dataXML = dataToTransform else: #Get the XSLT file xslt = objTempMfXslt(template=template).get().xslt #Transform all XML data (1 call) dataXML = self.getXMLTranformXSLT(dataToTransform, xslt) #Add each record for elt in data: identifier = '%s:%s:id/%s' % (settings.OAI_SCHEME, settings.OAI_REPO_IDENTIFIER, elt['_id']) xmlStr = filter(lambda xml: xml['title'] == elt['_id'],
def get_results_by_instance_keyword(request): print 'BEGIN def getResultsKeyword(request)' resultsByKeyword = [] results = [] resultString = "" #Instance json_instances = [] if 'HTTPS' in request.META['SERVER_PROTOCOL']: protocol = "https" else: protocol = "http" instance = Instance(name="Local", protocol=protocol, address=request.META['REMOTE_ADDR'], port=request.META['SERVER_PORT'], access_token="token", refresh_token="token") json_instances.append(instance.to_json()) request.session['instancesExplore'] = json_instances sessionName = "resultsExploreOaiPMh" + instance['name'] try: keyword = request.GET['keyword'] schemas = request.GET.getlist('schemas[]') userSchemas = request.GET.getlist('userSchemas[]') refinements = refinements_to_mongo( request.GET.getlist('refinements[]')) if 'onlySuggestions' in request.GET: onlySuggestions = json.loads(request.GET['onlySuggestions']) else: onlySuggestions = False registries = request.GET.getlist('registries[]') except: keyword = '' schemas = [] userSchemas = [] refinements = {} onlySuggestions = True registries = [] #We get all template versions for the given schemas #First, we take care of user defined schema templatesIDUser = Template.objects(title__in=userSchemas).distinct( field="id") templatesIDUser = [str(x) for x in templatesIDUser] #Take care of the rest, with versions templatesVersions = Template.objects(title__in=schemas).distinct( field="templateVersion") #We get all templates ID, for all versions allTemplatesIDCommon = TemplateVersion.objects( pk__in=templatesVersions, isDeleted=False).distinct(field="versions") #We remove the removed version allTemplatesIDCommonRemoved = TemplateVersion.objects( pk__in=templatesVersions, isDeleted=False).distinct(field="deletedVersions") templatesIDCommon = list( set(allTemplatesIDCommon) - set(allTemplatesIDCommonRemoved)) templatesID = templatesIDUser + templatesIDCommon if len(registries) == 0: #We retrieve deactivated registries so as not to get their metadata formats deactivatedRegistries = [ str(x.id) for x in OaiRegistry.objects(isDeactivated=True).order_by('id') ] metadataFormatsID = OaiMetadataFormat.objects( template__in=templatesID, registry__not__in=deactivatedRegistries).distinct(field="id") else: #We retrieve registries from the refinement metadataFormatsID = OaiMetadataFormat.objects( template__in=templatesID, registry__in=registries).distinct(field="id") instanceResults = OaiRecord.executeFullTextQuery(keyword, metadataFormatsID, refinements) if len(instanceResults) > 0: if not onlySuggestions: xsltPath = os.path.join(settings.SITE_ROOT, 'static/resources/xsl/xml2html.xsl') xslt = etree.parse(xsltPath) transform = etree.XSLT(xslt) template = loader.get_template( 'oai_pmh/explore/explore_result_keyword.html') #Retrieve schema and registries. Avoid to retrieve the information for each result registriesName = {} objMetadataFormats = {} listRegistriesID = set([x['registry'] for x in instanceResults]) for registryId in listRegistriesID: obj = OaiRegistry.objects(pk=registryId).get() registriesName[str(registryId)] = obj.name listSchemaId = set([x['metadataformat'] for x in instanceResults]) for schemaId in listSchemaId: obj = OaiMetadataFormat.objects(pk=schemaId).get() objMetadataFormats[str(schemaId)] = obj listItems = [] xmltodictunparse = XMLdata.unparse appendResult = results.append toXML = etree.XML parse = etree.parse XSLT = etree.XSLT if not onlySuggestions: for instanceResult in instanceResults: custom_xslt = False appendResult({ 'title': instanceResult['identifier'], 'content': xmltodictunparse(instanceResult['metadata']), 'id': str(instanceResult['_id']) }) dom = toXML( str( xmltodictunparse( instanceResult['metadata']).encode('utf-8'))) #Check if a custom list result XSLT has to be used try: metadataFormat = objMetadataFormats[str( instanceResult['metadataformat'])] if metadataFormat.template.ResultXsltList: listXslt = parse( BytesIO( metadataFormat.template.ResultXsltList.content. encode('utf-8'))) listTransform = XSLT(listXslt) newdom = listTransform(dom) custom_xslt = True else: newdom = transform(dom) except Exception, e: #We use the default one newdom = transform(dom) custom_xslt = False registry_name = registriesName[instanceResult['registry']] if len(registry_name) > 30: registry_name = "{0}...".format(registry_name[:30]) context = RequestContext( request, { 'id': str(instanceResult['_id']), 'xml': str(newdom), 'title': instanceResult['identifier'], 'custom_xslt': custom_xslt, 'template_name': metadataFormat.template.title, 'registry_name': registry_name, 'oai_pmh': True }) resultString += template.render(context) else: for instanceResult in instanceResults[:20]: wordList = re.sub("[^\w]", " ", keyword).split() wordList = [x + "|" + x + "\w+" for x in wordList] wordList = '|'.join(wordList) listWholeKeywords = re.findall( "\\b(" + wordList + ")\\b", XMLdata.unparse( instanceResult['metadata']).encode('utf-8'), flags=re.IGNORECASE) labels = list(set(listWholeKeywords)) for label in labels: label = label.lower() result_json = {} result_json['label'] = label result_json['value'] = label if not result_json in resultsByKeyword: resultsByKeyword.append(result_json)
templatesID = set(templatesID).intersection([str(x.id) for x in setsTemplates.templates]) except Exception, e: raise noRecordsMatch #For each template found for template in templatesID: #Retrieve sets for this template sets = OaiMySet.objects(templates=template).all() query['schema'] = template #The record has to be published query['ispublished'] = True #Get all records for this template data = XMLdata.executeQueryFullResult(query) #IF no records, go to the next template if len(data) == 0: continue dataToTransform = [{'title': x['_id'], 'content': self.cleanXML(XMLdata.unparse(x['content']))} for x in data] if myMetadataFormat.isTemplate: #No transformation needed dataXML = dataToTransform else: #Get the XSLT file xslt = objTempMfXslt(template=template).get().xslt #Transform all XML data (1 call) dataXML = self.getXMLTranformXSLT(dataToTransform, xslt) #Add each record for elt in data: identifier = '%s:%s:id/%s' % (settings.OAI_SCHEME, settings.OAI_REPO_IDENTIFIER, elt['_id']) xmlStr = filter(lambda xml: xml['title'] == elt['_id'], dataXML)[0] record_info = { 'identifier': identifier,
def get_record(self): try: #Bool if we need to transform the XML via XSLT hasToBeTransformed = False #Check if the identifier pattern is OK id = self.check_identifier() #Template name self.template_name = 'oai_pmh/xml/get_record.xml' query = dict() #Convert id to ObjectId try: query['_id'] = ObjectId(id) #The record has to be published query['ispublished'] = True except Exception: raise idDoesNotExist(self.identifier) data = XMLdata.executeQueryFullResult(query) #This id doesn't exist if len(data) == 0: raise idDoesNotExist(self.identifier) data = data[0] #Get the template for the identifier template = data['schema'] #Retrieve sets for this template sets = OaiMySet.objects(templates=template).all() #Retrieve the XSLT for the transformation try: #Get the metadataformat for the provided prefix myMetadataFormat = OaiMyMetadataFormat.objects.get(metadataPrefix=self.metadataPrefix) #If this metadata prefix is not associated to a template, we need to retrieve the XSLT to do the transformation if not myMetadataFormat.isTemplate: hasToBeTransformed = True #Get information about the XSLT for the MF and the template objTempMfXslt = OaiTemplMfXslt.objects(myMetadataFormat=myMetadataFormat, template=template, activated=True).get() #If no information or desactivated if not objTempMfXslt.xslt: raise cannotDisseminateFormat(self.metadataPrefix) else: #Get the XSLT for the transformation xslt = objTempMfXslt.xslt except: raise cannotDisseminateFormat(self.metadataPrefix) #Transform XML data dataToTransform = [{'title': data['_id'], 'content': self.cleanXML(XMLdata.unparse(data['content']))}] if hasToBeTransformed: dataXML = self.getXMLTranformXSLT(dataToTransform, xslt) else: dataXML = dataToTransform #Fill the response record_info = { 'identifier': self.identifier, 'last_modified': self.get_last_modified_date(data), 'sets': sets, 'XML': dataXML[0]['content'], 'deleted': data.get('status', '') == Status.DELETED } return self.render_to_response(record_info) except OAIExceptions, e: return self.errors(e.errors)