Exemple #1
0
def import_xml(filename, copy_status=MASTER):
    """
    Returns the imported resource object on success, raises and Exception on failure.
    """
    _xml = open(filename)
    _xml_string = _xml.read()
    _xml.close()
    return xml_utils.import_from_string(_xml_string, INTERNAL, copy_status)
Exemple #2
0
def import_xml(filename, copy_status=MASTER):
    """
    Returns the imported resource object on success, raises and Exception on failure.
    """
    _xml = open(filename)
    _xml_string = _xml.read()
    _xml.close()
    return xml_utils.import_from_string(_xml_string, INTERNAL, copy_status)
Exemple #3
0
def _add_resource(repository, remote_id, metadataPrefix, raw_xml_record,
                  source_url):
    #TODO: copy_status = PROXY or MASTER. If PROXY then source_node=None
    #and source_url the harvested repo's url. If MASTER then source_node= None
    #and source_url= our repo's url
    xml_record = _convert_to_MSschema(metadataPrefix, raw_xml_record)
    # if resource comes from a META-SHARE node, then the imported resource belongs to that
    # repository and it has the same identifer as the remote identifier.
    # Eitherwise the imported resource will have as master META-SHARE node, this node
    resource = (add_or_update_resource(None, xml_record, None, \
                                       source_node=source_url, \
                                       identifier=remote_id, \
                                       publication_status=INGESTED, \
                                       source_url=source_url) \
                if source_url else \
                    import_from_string(xml_record, \
                                       INGESTED, \
                                       MASTER, \
                                       DJANGO_URL))

    checksum = _compute_checksum(raw_xml_record)
    so = resource.storage_object
    repository[remote_id] = [so, checksum]
    return resource
Exemple #4
0
def build_target_metadata(data_dict):
    xml = etree.fromstring(data_dict.get("resource_info").get("metadata"))
    #
    # # PROCESS NAME
    resourceName = xml.find(
        "{}identificationInfo/{}resourceName[@lang='en']".format(ns, ns),
        namespaces=NS)
    add_to_name = u"(Processed)"
    resourceName.text = u"{} {}".format(resourceName.text, add_to_name)
    print "Adding {} with source id{}".format(smart_str(resourceName.text),
                                              data_dict.get("resource_id"))
    #
    # # PROCESS DESCRIPTION
    description = xml.find(
        "{}identificationInfo/{}description[@lang='en']".format(ns, ns),
        namespaces=NS)
    add_to_descr = u"(Processed)"
    description.text = u"{} {}".format(description.text, add_to_descr)
    #
    # # EDIT TEXTFORMAT
    textformats = xml.findall(".//{}textFormatInfo".format(ns), namespaces=NS)
    for tf in textformats:
        tf.getparent().remove(tf)
    last_size_info = xml.xpath(u'//ms:sizeInfo[last()]', namespaces=NS)
    last_size_info[0].addnext(
        create_data_format_element(data_Format.get(data_dict.get("mimetype"))))
    #
    # get the languageInfo to append after it

    languageInfo = xml.xpath(u'//ms:languageInfo[last()]', namespaces=NS)
    # sizes

    # HANDLE SIZES
    # delete existing sizes and sizePerLanguage only if processed is TMX
    if data_dict.get("mimetype") is "TMX" or data_dict.get(
            "mimetype") is "TBX":
        size_infos = xml.findall(".//{}sizeInfo".format(ns), namespaces=NS)
        size_per_lang = xml.findall(".//{}sizePerLanguage".format(ns),
                                    namespaces=NS)
        for sin in size_infos:
            sin.getparent().remove(sin)
        for spl in size_per_lang:
            spl.getparent().remove(spl)

    try:
        size_info = data_dict["size_info"]
        if size_info["files"] > 1:
            languageInfo[0].addnext(
                create_size_element("files", size_info["files"]))
        if (data_dict["mimetype"] == 'TMX' or data_dict["mimetype"]
                == 'TBX') and size_info["size_sum"] > 0:
            languageInfo[0].addnext(
                create_size_element(
                    "translationUnits" if data_dict["mimetype"] == 'TMX' else
                    "terms", size_info["size_sum"]))

    except KeyError:
        pass

    # HANDLE RELATIONS AND VERSIONS
    # For each new resource, add the proper relation and version to that resource and its source

    # Processed resource
    # find the last resourceCreationInfo to add relation after it
    resourceCreationInfo = xml.xpath(u'//ms:resourceCreationInfo[last()]',
                                     namespaces=NS)

    # create a relation according to the mimetype of the new resource
    relation = create_relation_element(
        relations_map.get(data_dict.get("mimetype"))[0],
        str(data_dict.get("resource_id")))

    # REMEMBER TO ADD THE OPPOSITE RELATION TO SOURCE RESOURCE AFTER IMPORT/SAVE
    resourceCreationInfo[0].addnext(relation)

    # create a version element after metadataInfo
    # <versionInfo>
    #     < version>2.0</version>
    # </versionInfo>

    metadataInfo = resourceCreationInfo = xml.xpath(
        u'//ms:metadataInfo[last()]', namespaces=NS)
    metadataInfo[0].addnext(create_version_element(u"2.0"))

    # metadataCreationDate=date.today()
    # edit the dates
    updated = metadataInfo[0].find("{}metadataLastDateUpdated".format(ns),
                                   namespaces=NS)
    # remove the lastupdated since it's a new record
    updated.getparent().remove(updated)

    # print etree.tostring(xml)
    # create the new record
    new_resource = import_from_string(etree.tostring(xml), INGESTED, MASTER)
    # now add the owners we got from source resoource
    new_resource.owners = data_dict.get("resource_info").get("owners")
    # edit the creation in metadatainfo
    new_resource.metadataInfo.metadataCreationDate = date.today()
    new_resource.metadataInfo.save()

    new_resource.resourceCreationInfo.fundingProject.add(
        get_or_create_project())
    new_resource.save()

    # and finally "upload" the dataset and validation report to the respective folder
    for ftomove in os.listdir(data_dict.get("path")):
        shutil.move(
            os.path.join(data_dict.get("path"), ftomove),
            os.path.join(new_resource.storage_object._storage_folder(),
                         ftomove))
    shutil.rmtree(data_dict.get("path"))

    # MODIFY SOURCE RESOURCE

    source = data_dict.get("resource_info").get("resource")
    source.versionInfo = versionInfoType_model.objects.create(version=u"1.0")
    target_resource = targetResourceInfoType_model.objects.create(
        targetResourceNameURI=str(new_resource.id))
    source_relation = relationInfoType_model.objects.create(
        relationType=relations_map.get(data_dict.get("mimetype"))[1],
        relatedResource=target_resource)
    source_relation.back_to_resourceinfotype_model = source
    source_relation.save()
    source.save()