Example #1
0
def transform_lines(url):
    """ Download an xml file and add line numbering and ctsize it

    :param url: A Perseus Github Raw address
    :type url: str
    :param urn: The urn of the text
    :type urn: str
    :param lang: Iso code for lang
    :type lang: str

    """

    lang, urn, target, parsed = common.parse(url)

    if "grc" not in urn and "lat" not in urn:
        type_text = "translation"
    else:
        type_text = "edition"

    """
        Change div1 to div, moving their @type to @subtype 
    """

    # We find the lines
    lines = parsed.xpath("//l")
    # We set a counter
    i = 1
    # We loop over lines
    for line in lines:
        # We set the @n attribute using str(i) because .set(_,_) accepts only string
        line.set("n", str(i))
        # We increment the counter
        i += 1

    # We find divs called div1 or div2. Mind the |// !
    divs = parsed.xpath("//div1|//div2")
    # We loop over them
    for div in divs:
        # We change it's tag
        div.tag = "div"

    citations = list()

    """
        Add refsDecl information for CTS
    """
    citations.append(
        MyCapytain.resources.texts.tei.Citation(
            name="line", refsDecl="/tei:TEI/tei:text/tei:body/tei:div[@type='" + type_text + "']//tei:l[@n='$1']"
        )
    )

    try:
        common.write_and_clean(urn, lang, parsed, citations, target)
    except:
        print(urn + " failed")
def transform(url):
    """ Download an xml file and add line numbering and ctsize it

    :param url: A Perseus Github Raw address
    :type url: str
    :param urn: The urn of the text
    :type urn: str
    :param lang: Iso code for lang
    :type lang: str

    """


    lang, urn, target, parsed = common.parse(url)

    if "grc" not in urn and "lat" not in urn:
        type_text = "translation"
    else:
        type_text = "edition"

    # We find divs called div1
    div1_group = parsed.xpath("//div1")
    i = 1
    for div1 in div1_group:
        # We change it's tag
        div1.tag = "div"
        # To deal with different subtype, we get the former attribute value of type and put it to subtype
        div1_subtype = div1.get("type")
        div1.set("subtype", div1_subtype)
        div1.set("type", "textpart")

        if "n" not in dict(div1.attrib):
            div1.set("n", str(i))
        i += 1
                
        
    """
        Change div2 to div, moving their @type to @subtype 
    """    
    # We find divs called div2
    i = 1
    div2_group = parsed.xpath("//div2")
    for div2 in div2_group:
        # We change it's tag
        div2.tag = "div"
        # To deal with different subtype, we get the former attribute value of type and put it to subtype
        div2_subtype = div2.get("type")
        div2.set("subtype", div2_subtype)
        div2.set("type", "textpart")

        if "n" not in dict(div2.attrib):
            div2.set("n", str(i))
        i += 1
        
    """
        Change div3 to div, moving their @type to @subtype 
    """    
    # We find divs called div2
    i = 1
    div3_group = parsed.xpath("//div3")
    for div3 in div3_group:
        # We change it's tag
        div3.tag = "div"
        # To deal with different subtype, we get the former attribute value of type and put it to subtype
        div3_subtype = div3.get("type")
        div3.set("subtype", div3_subtype)
        div3.set("type", "textpart")

        if "n" not in dict(div3.attrib):
            div3.set("n", str(i))
        i += 1

    """
        Add refsDecl information for CTS
    """
    citations = []
    # Used only if div3 > 0
    if len(div3_group) > 0:
        citations.append(
            MyCapytain.resources.texts.tei.Citation(
                name=div3_subtype, 
                refsDecl="/tei:TEI/tei:text/tei:body/tei:div[@type='"+type_text+"']/tei:div[@n='$1']/tei:div[@n='$2']/tei:div[@n='$3']"
            )
        )
    # Used only if div2 > 0
    if len(div2_group) > 0:
        citations.append(
            MyCapytain.resources.texts.tei.Citation(
                name=div2_subtype, 
                refsDecl="/tei:TEI/tei:text/tei:body/tei:div[@type='"+type_text+"']/tei:div[@n='$1']/tei:div[@n='$2']"
            )
        )
    citations.append(
        MyCapytain.resources.texts.tei.Citation(
            name=div1_subtype, 
            refsDecl="/tei:TEI/tei:text/tei:body/tei:div[@type='"+type_text+"']/tei:div[@n='$1']"
        )
    )

    try:
        common.write_and_clean(urn, lang, parsed, citations, target)   
    except Exception as E:
        print(urn + " failed")
        print(E)