def import_xml(filename, copy_status=MASTER): """ Returns the imported resource object on success, raises and Exception on failure. """ _xml = open(filename) _xml_string = _xml.read() _xml.close() return xml_utils.import_from_string(_xml_string, INTERNAL, copy_status)
def _add_resource(repository, remote_id, metadataPrefix, raw_xml_record, source_url): #TODO: copy_status = PROXY or MASTER. If PROXY then source_node=None #and source_url the harvested repo's url. If MASTER then source_node= None #and source_url= our repo's url xml_record = _convert_to_MSschema(metadataPrefix, raw_xml_record) # if resource comes from a META-SHARE node, then the imported resource belongs to that # repository and it has the same identifer as the remote identifier. # Eitherwise the imported resource will have as master META-SHARE node, this node resource = (add_or_update_resource(None, xml_record, None, \ source_node=source_url, \ identifier=remote_id, \ publication_status=INGESTED, \ source_url=source_url) \ if source_url else \ import_from_string(xml_record, \ INGESTED, \ MASTER, \ DJANGO_URL)) checksum = _compute_checksum(raw_xml_record) so = resource.storage_object repository[remote_id] = [so, checksum] return resource
def build_target_metadata(data_dict): xml = etree.fromstring(data_dict.get("resource_info").get("metadata")) # # # PROCESS NAME resourceName = xml.find( "{}identificationInfo/{}resourceName[@lang='en']".format(ns, ns), namespaces=NS) add_to_name = u"(Processed)" resourceName.text = u"{} {}".format(resourceName.text, add_to_name) print "Adding {} with source id{}".format(smart_str(resourceName.text), data_dict.get("resource_id")) # # # PROCESS DESCRIPTION description = xml.find( "{}identificationInfo/{}description[@lang='en']".format(ns, ns), namespaces=NS) add_to_descr = u"(Processed)" description.text = u"{} {}".format(description.text, add_to_descr) # # # EDIT TEXTFORMAT textformats = xml.findall(".//{}textFormatInfo".format(ns), namespaces=NS) for tf in textformats: tf.getparent().remove(tf) last_size_info = xml.xpath(u'//ms:sizeInfo[last()]', namespaces=NS) last_size_info[0].addnext( create_data_format_element(data_Format.get(data_dict.get("mimetype")))) # # get the languageInfo to append after it languageInfo = xml.xpath(u'//ms:languageInfo[last()]', namespaces=NS) # sizes # HANDLE SIZES # delete existing sizes and sizePerLanguage only if processed is TMX if data_dict.get("mimetype") is "TMX" or data_dict.get( "mimetype") is "TBX": size_infos = xml.findall(".//{}sizeInfo".format(ns), namespaces=NS) size_per_lang = xml.findall(".//{}sizePerLanguage".format(ns), namespaces=NS) for sin in size_infos: sin.getparent().remove(sin) for spl in size_per_lang: spl.getparent().remove(spl) try: size_info = data_dict["size_info"] if size_info["files"] > 1: languageInfo[0].addnext( create_size_element("files", size_info["files"])) if (data_dict["mimetype"] == 'TMX' or data_dict["mimetype"] == 'TBX') and size_info["size_sum"] > 0: languageInfo[0].addnext( create_size_element( "translationUnits" if data_dict["mimetype"] == 'TMX' else "terms", size_info["size_sum"])) except KeyError: pass # HANDLE RELATIONS AND VERSIONS # For each new resource, add the proper relation and version to that resource and its source # Processed resource # find the last resourceCreationInfo to add relation after it resourceCreationInfo = xml.xpath(u'//ms:resourceCreationInfo[last()]', namespaces=NS) # create a relation according to the mimetype of the new resource relation = create_relation_element( relations_map.get(data_dict.get("mimetype"))[0], str(data_dict.get("resource_id"))) # REMEMBER TO ADD THE OPPOSITE RELATION TO SOURCE RESOURCE AFTER IMPORT/SAVE resourceCreationInfo[0].addnext(relation) # create a version element after metadataInfo # <versionInfo> # < version>2.0</version> # </versionInfo> metadataInfo = resourceCreationInfo = xml.xpath( u'//ms:metadataInfo[last()]', namespaces=NS) metadataInfo[0].addnext(create_version_element(u"2.0")) # metadataCreationDate=date.today() # edit the dates updated = metadataInfo[0].find("{}metadataLastDateUpdated".format(ns), namespaces=NS) # remove the lastupdated since it's a new record updated.getparent().remove(updated) # print etree.tostring(xml) # create the new record new_resource = import_from_string(etree.tostring(xml), INGESTED, MASTER) # now add the owners we got from source resoource new_resource.owners = data_dict.get("resource_info").get("owners") # edit the creation in metadatainfo new_resource.metadataInfo.metadataCreationDate = date.today() new_resource.metadataInfo.save() new_resource.resourceCreationInfo.fundingProject.add( get_or_create_project()) new_resource.save() # and finally "upload" the dataset and validation report to the respective folder for ftomove in os.listdir(data_dict.get("path")): shutil.move( os.path.join(data_dict.get("path"), ftomove), os.path.join(new_resource.storage_object._storage_folder(), ftomove)) shutil.rmtree(data_dict.get("path")) # MODIFY SOURCE RESOURCE source = data_dict.get("resource_info").get("resource") source.versionInfo = versionInfoType_model.objects.create(version=u"1.0") target_resource = targetResourceInfoType_model.objects.create( targetResourceNameURI=str(new_resource.id)) source_relation = relationInfoType_model.objects.create( relationType=relations_map.get(data_dict.get("mimetype"))[1], relatedResource=target_resource) source_relation.back_to_resourceinfotype_model = source source_relation.save() source.save()