Ejemplos de scrape en Python, ejemplos de lat_epig.parse.scrape en Python

Ejemplo n.º 1

0

Mostrar archivo

def test_single_valued_datespan():
    # dating: -68 to -68
    #  EDCS-ID: EDCS-24900077

    args = argparse.Namespace(EDCS='24900077',
                              publication=None,
                              province=None,
                              place=None,
                              operator='and',
                              term2=None,
                              dating_from=None,
                              dating_to=None,
                              inscription_genus=None,
                              and_not_inscription_genus=None,
                              to_file=None,
                              from_file=None,
                              debug=True,
                              term1='%')

    test_output = scrape(args, prevent_write=True)

    assert test_output[0]['dating from'] == -68
    assert test_output[0]['dating to'] == -68
    assert test_output[0]['date not before'] == -68
    assert test_output[0]['date not after'] == -68

Ejemplo n.º 2

0

Mostrar archivo

def test_missing_first_date():
    # dating:  to 100
    #  EDCS-ID: EDCS-34901010

    args = argparse.Namespace(EDCS='34901010',
                              publication=None,
                              province=None,
                              place=None,
                              operator='and',
                              term2=None,
                              dating_from=None,
                              dating_to=None,
                              inscription_genus=None,
                              and_not_inscription_genus=None,
                              to_file=None,
                              from_file=None,
                              debug=True,
                              term1='%')

    test_output = scrape(args, prevent_write=True)

    assert test_output[0]['dating from'] == None
    assert test_output[0]['dating to'] == 100
    assert test_output[0]['date not before'] == None
    assert test_output[0]['date not after'] == 100

Ejemplo n.º 3

0

Mostrar archivo

Archivo: test_inscriptions.py Proyecto: mqAncientHistory/EpigraphyScraperNotebook

def test_inscription_vir():
    # ./parse.py -e 24900101  % --debug
    args = argparse.Namespace(EDCS='24900101',
                              publication=None,
                              province=None,
                              place=None,
                              operator='and',
                              term2=None,
                              dating_from=None,
                              dating_to=None,
                              inscription_genus=None,
                              and_not_inscription_genus=None,
                              to_file=None,
                              from_file=None,
                              debug=True,
                              term1='%')

    test_output = scrape(args,
                         prevent_write=True,
                         show_inscription_transform=True)
    assert "IIIIvir" in test_output[0]['inscription']
    assert "IIIIvir" not in test_output[0]['inscription conservative cleaning']
    assert "IIIIvir" not in test_output[0]['inscription interpretive cleaning']
    assert "IIII vir " in test_output[0]['inscription conservative cleaning']
    assert "IIII vir " in test_output[0]['inscription interpretive cleaning']

Ejemplo n.º 4

0

Mostrar archivo

Archivo: test_inscriptions.py Proyecto: mqAncientHistory/EpigraphyScraperNotebook

def test_inscription_three_middle():
    # ./parse.py -e 09000264 20700224 % --debug
    args = argparse.Namespace(EDCS='20700224',
                              publication=None,
                              province=None,
                              place=None,
                              operator='and',
                              term2=None,
                              dating_from=None,
                              dating_to=None,
                              inscription_genus=None,
                              and_not_inscription_genus=None,
                              to_file=None,
                              from_file=None,
                              debug=True,
                              term1='%')

    test_output = scrape(args,
                         prevent_write=True,
                         show_inscription_transform=True)
    assert "mo[numentum 3 vi]/olaverit" in test_output[0]['inscription']
    assert "mo[numentum 3 vi]/olaverit" not in test_output[0][
        'inscription conservative cleaning']
    assert "mo[numentum 3 vi]/olaverit" not in test_output[0][
        'inscription interpretive cleaning']

Ejemplo n.º 5

0

Mostrar archivo

Archivo: test_inscriptions.py Proyecto: mqAncientHistory/EpigraphyScraperNotebook

def test_inscription_substitution():
    # ./parse.py -e 34100092  % --debug
    args = argparse.Namespace(EDCS='15300609',
                              publication=None,
                              province=None,
                              place=None,
                              operator='and',
                              term2=None,
                              dating_from=None,
                              dating_to=None,
                              inscription_genus=None,
                              and_not_inscription_genus=None,
                              to_file=None,
                              from_file=None,
                              debug=True,
                              term1='%')

    test_output = scrape(args,
                         prevent_write=True,
                         show_inscription_transform=True)
    assert "sanc<t=I>issi<ma=AM>" in test_output[0]['inscription']
    assert "sanc<t=I>issi<ma=AM>" not in test_output[0][
        'inscription conservative cleaning']
    assert "sanc<t=I>issi<ma=AM>" not in test_output[0][
        'inscription interpretive cleaning']

Ejemplo n.º 6

0

Mostrar archivo

Archivo: test_inscriptions.py Proyecto: mqAncientHistory/EpigraphyScraperNotebook

def test_inscription_expanded_abbreviations():
    # ./parse.py -e 27000432  % --debug
    args = argparse.Namespace(EDCS='27000432',
                              publication=None,
                              province=None,
                              place=None,
                              operator='and',
                              term2=None,
                              dating_from=None,
                              dating_to=None,
                              inscription_genus=None,
                              and_not_inscription_genus=None,
                              to_file=None,
                              from_file=None,
                              debug=True,
                              term1='%')

    test_output = scrape(args,
                         prevent_write=True,
                         show_inscription_transform=True)
    assert "D(is) M(anibus)" in test_output[0]['inscription']
    assert "D(is) M(anibus)" not in test_output[0][
        'inscription conservative cleaning']
    assert "D M " in test_output[0]['inscription conservative cleaning']
    assert "Dis Manibus" in test_output[0]['inscription interpretive cleaning']

Ejemplo n.º 7

0

Mostrar archivo

Archivo: test_inscriptions.py Proyecto: mqAncientHistory/EpigraphyScraperNotebook

def test_inscription_restoration():
    # ./parse.py -e 34100092  % --debug
    args = argparse.Namespace(EDCS='34100092',
                              publication=None,
                              province=None,
                              place=None,
                              operator='and',
                              term2=None,
                              dating_from=None,
                              dating_to=None,
                              inscription_genus=None,
                              and_not_inscription_genus=None,
                              to_file=None,
                              from_file=None,
                              debug=True,
                              term1='%')

    test_output = scrape(args,
                         prevent_write=True,
                         show_inscription_transform=True)
    assert "nillae c[oniugi]" in test_output[0]['inscription']
    assert "nillae c[oniugi]" not in test_output[0][
        'inscription conservative cleaning']
    assert "nillae c " in test_output[0]['inscription conservative cleaning']
    assert "nillae coniugi" in test_output[0][
        'inscription interpretive cleaning']

Ejemplo n.º 8

0

Mostrar archivo

def test_random__middle_date():
    # # dating:  a:  ;   b:  71 to 100;   c:  ;   d:
    #  EDCS-ID: EDCS-32001032

    args = argparse.Namespace(EDCS='32001032',
                              publication=None,
                              province=None,
                              place=None,
                              operator='and',
                              term2=None,
                              dating_from=None,
                              dating_to=None,
                              inscription_genus=None,
                              and_not_inscription_genus=None,
                              to_file=None,
                              from_file=None,
                              debug=True,
                              term1='%')

    test_output = scrape(args, prevent_write=True)

    assert test_output[0]['dating from'] == 71
    assert test_output[0]['dating to'] == 100
    assert test_output[0]['date not before'] == 71
    assert test_output[0]['date not after'] == 100

Ejemplo n.º 9

0

Mostrar archivo

def test_language_with_trailing_imbalanced_doublequote():
    # ./parse.py -e 78800166  % --debug
    # csvcut -t -c 10,17 output/2021-07-30-EDCS_78800166+term1_%-1.tsv | csvlook
    # | inscription | language |
    # | ----------- | -------- |
    # |  // GR"     | PALMYR   |

    args = argparse.Namespace(EDCS='78800166',
                              publication=None,
                              province=None,
                              place=None,
                              operator='and',
                              term2=None,
                              dating_from=None,
                              dating_to=None,
                              inscription_genus=None,
                              and_not_inscription_genus=None,
                              to_file=None,
                              from_file=None,
                              debug=True,
                              term1='%')

    test_output = scrape(args,
                         prevent_write=True,
                         show_inscription_transform=True)
    assert 'GR"' not in test_output[0]['inscription']
    assert "PALMYR, GR" in test_output[0]['language']

Ejemplo n.º 10

0

Mostrar archivo

def test_no_letter():
    #  'raw dating': 'b:  96 to 96;  81 to 96',
    args = argparse.Namespace(EDCS='72300077',
                              publication=None,
                              province=None,
                              place=None,
                              operator='and',
                              term2=None,
                              dating_from=None,
                              dating_to=None,
                              inscription_genus=None,
                              and_not_inscription_genus=None,
                              to_file=None,
                              from_file=None,
                              debug=True,
                              term1='%')

    test_output = scrape(args, prevent_write=True)

    assert test_output[0]['dating from'] == 96
    assert test_output[0]['dating to'] == 96
    assert test_output[0]['date not before'] == 81
    assert test_output[0]['date not after'] == 96

Ejemplo n.º 11

0

Mostrar archivo

def test_place():
    # ./parse.py -e 16201127  % --debug

    args = argparse.Namespace(EDCS='16201127',
                              publication=None,
                              province=None,
                              place=None,
                              operator='and',
                              term2=None,
                              dating_from=None,
                              dating_to=None,
                              inscription_genus=None,
                              and_not_inscription_genus=None,
                              to_file=None,
                              from_file=None,
                              debug=True,
                              term1='%')

    test_output = scrape(args,
                         prevent_write=True,
                         show_inscription_transform=True)
    assert '[]' not in test_output[0]['place']
    assert "Acireale / Acium" in test_output[0]['place']

Ejemplo n.º 12

0

Mostrar archivo

def test_material():
    # ./parse.py -e 32001159  % --debug

    args = argparse.Namespace(EDCS='32001159',
                              publication=None,
                              province=None,
                              place=None,
                              operator='and',
                              term2=None,
                              dating_from=None,
                              dating_to=None,
                              inscription_genus=None,
                              and_not_inscription_genus=None,
                              to_file=None,
                              from_file=None,
                              debug=True,
                              term1='%')

    test_output = scrape(args,
                         prevent_write=True,
                         show_inscription_transform=True)
    assert '[]' not in test_output[0]['Material']
    assert "lapis" in test_output[0]['Material']

Ejemplo n.º 13

0

Mostrar archivo

def test_publication():
    # ./parse.py -e 78800166  % --debug

    args = argparse.Namespace(EDCS='78800166',
                              publication=None,
                              province=None,
                              place=None,
                              operator='and',
                              term2=None,
                              dating_from=None,
                              dating_to=None,
                              inscription_genus=None,
                              and_not_inscription_genus=None,
                              to_file=None,
                              from_file=None,
                              debug=True,
                              term1='%')

    test_output = scrape(args,
                         prevent_write=True,
                         show_inscription_transform=True)
    assert "AE 1937, 00075" in test_output[0]['publication']
    assert "[]" not in test_output[0]['publication']

Ejemplo n.º 14

0

Mostrar archivo

def test_EDCS_ID():
    # ./parse.py -e EDCS-07600345  % --debug

    args = argparse.Namespace(EDCS='07600345',
                              publication=None,
                              province=None,
                              place=None,
                              operator='and',
                              term2=None,
                              dating_from=None,
                              dating_to=None,
                              inscription_genus=None,
                              and_not_inscription_genus=None,
                              to_file=None,
                              from_file=None,
                              debug=True,
                              term1='%')

    test_output = scrape(args,
                         prevent_write=True,
                         show_inscription_transform=True)
    assert "[]" not in test_output[0]['EDCS-ID']
    assert re.match(r"EDCS-[0-9]{8,8}", test_output[0]['EDCS-ID'])

Ejemplo n.º 15

0

Mostrar archivo

def test_no_letters_at_all():
    #  'raw dating': '163 to 170;  163 to 163',
    # ./parse.py -e 01000244 % --debug
    args = argparse.Namespace(EDCS='01000244',
                              publication=None,
                              province=None,
                              place=None,
                              operator='and',
                              term2=None,
                              dating_from=None,
                              dating_to=None,
                              inscription_genus=None,
                              and_not_inscription_genus=None,
                              to_file=None,
                              from_file=None,
                              debug=True,
                              term1='%')
    test_output = scrape(args, prevent_write=True)

    assert test_output[0]['dating from'] == 163
    assert test_output[0]['dating to'] == 170
    assert test_output[0]['date not before'] == 163
    assert test_output[0]['date not after'] == 170

Ejemplo n.º 16

0

Mostrar archivo

def test_a_k_dates():
    # dating:  a:  196 to 196;   b:  198 to 200;   c:  171 to 300;   d:  208 to 218;   e:  180 to 222;   f:  228 to 228;   g:  234 to 234;   h:  297 to 297;   i:  171 to 300;   j:  171 to 300;   k:  171 to 300
    # EDCS-ID: EDCS-72200182
    args = argparse.Namespace(EDCS='72200182',
                              publication=None,
                              province=None,
                              place=None,
                              operator='and',
                              term2=None,
                              dating_from=None,
                              dating_to=None,
                              inscription_genus=None,
                              and_not_inscription_genus=None,
                              to_file=None,
                              from_file=None,
                              debug=True,
                              term1='%')
    test_output = scrape(args, prevent_write=True)

    assert test_output[0]['dating from'] == 196
    assert test_output[0]['dating to'] == 196
    assert test_output[0]['date not before'] == 171
    assert test_output[0]['date not after'] == 300

Ejemplo n.º 17

0

Mostrar archivo

def test_comment():
    # ./parse.py -e 36400015  % --debug

    args = argparse.Namespace(EDCS='36400015',
                              publication=None,
                              province=None,
                              place=None,
                              operator='and',
                              term2=None,
                              dating_from=None,
                              dating_to=None,
                              inscription_genus=None,
                              and_not_inscription_genus=None,
                              to_file=None,
                              from_file=None,
                              debug=True,
                              term1='%')

    test_output = scrape(args,
                         prevent_write=True,
                         show_inscription_transform=True)
    assert "comment DOI: 10.3406/crai.2005.22934" not in test_output[0][
        'inscription']
    assert "comment DOI: 10.3406/crai.2005.22934" in test_output[0]['Comment']

Ejemplo n.º 18

0

Mostrar archivo

def test_status():
    # ./parse.py -e 55701594  % --debug

    args = argparse.Namespace(EDCS='55701594',
                              publication=None,
                              province=None,
                              place=None,
                              operator='and',
                              term2=None,
                              dating_from=None,
                              dating_to=None,
                              inscription_genus=None,
                              and_not_inscription_genus=None,
                              to_file=None,
                              from_file=None,
                              debug=True,
                              term1='%')

    test_output = scrape(args,
                         prevent_write=True,
                         show_inscription_transform=True)
    assert '[]' not in test_output[0]['status']
    assert "sigilla impressa;  tituli fabricationis" in test_output[0][
        'status']

Ejemplo n.º 19

0

Mostrar archivo

def test_digit_colon():
    #  digit with colon 3:  ;  -27 to 37
    #  EDCS-75100087
    args = argparse.Namespace(EDCS='75100087',
                              publication=None,
                              province=None,
                              place=None,
                              operator='and',
                              term2=None,
                              dating_from=None,
                              dating_to=None,
                              inscription_genus=None,
                              and_not_inscription_genus=None,
                              to_file=None,
                              from_file=None,
                              debug=True,
                              term1='%')

    test_output = scrape(args, prevent_write=True)

    assert test_output[0]['dating from'] == -27
    assert test_output[0]['dating to'] == 37
    assert test_output[0]['date not before'] == -27
    assert test_output[0]['date not after'] == 37

Ejemplo n.º 20

0

Mostrar archivo

Archivo: test_inscriptions.py Proyecto: mqAncientHistory/EpigraphyScraperNotebook

def test_inscription_substitution_edh():

    # ./parse.py -e 63600442 % --debug
    args = argparse.Namespace(EDCS='63600442',
                              publication=None,
                              province=None,
                              place=None,
                              operator='and',
                              term2=None,
                              dating_from=None,
                              dating_to=None,
                              inscription_genus=None,
                              and_not_inscription_genus=None,
                              to_file=None,
                              from_file=None,
                              debug=True,
                              term1='%')
    test_output = scrape(args,
                         prevent_write=True,
                         show_inscription_transform=True)

    assert "<F=P>urius" in test_output[0]['inscription']
    assert "Purius" in test_output[0]['inscription conservative cleaning']
    assert "Furius" in test_output[0]['inscription interpretive cleaning']

Ejemplo n.º 21

0

Mostrar archivo

Archivo: interface.py Proyecto: mqAncientHistory/EpigraphyScraperNotebook

    def on_button_clicked(b):
        with out:
            out.clear_output(wait=True)

            if and_not_inscription_genus.value:
                args.and_not_inscription_genus = and_not_inscription_genus.value
                and_not_inscription_genus.value = ""

            if dating_from.value:
                args.dating_from = dating_from.value
                dating_from.value = ""

            if dating_to.value:
                args.dating_to = dating_to.value
                dating_to.value = ""

            if EDCS.value:
                args.EDCS = EDCS.value
                EDCS.value = ""

            if inscription_genus.value:
                args.inscription_genus = inscription_genus.value
                inscription_genus.value = ""

            if operator.value:
                args.operator = operator.value

            if place.value:
                args.place = place.value
                place.value = ""

            if province.value:
                args.province = province.value
                province.value = ""

            if publication.value:
                args.publication = [publication.value]
                publication.value = []

            if term1.value:
                args.term1 = term1.value
                term1.value = ""

            if term2.value:
                args.term2 = term2.value
                term2.value = ""

        #   with widgets.Output(layout={'border': '1px solid black'}) as out:

            display(
                HTML(
                    "<p>Getting the inscriptions. This may take a few minutes (or hours), depending on the number of search results.</p>"
                ))

            filename = parse.scrape(args)

            OUTPUTS = Path("output")

            file_outputs = {}
            for output in OUTPUTS.glob("*.tsv"):
                file_outputs[output.stat().st_mtime] = (output.name, output)
            output_keys = sorted(file_outputs, reverse=True)

            filenames = []
            for key in output_keys:
                filenames.append(file_outputs[key])

            #print(filename)
            # display(HTML("<a href='/tree/output/' target='_blank'>Full File List</a>"))
            display(HTML("<ul>"))
            for zipfile in filenames:
                display(
                    HTML(f"<li><a href='{zipfile[1]}'>{zipfile[0]}</a></li>"))
            display(HTML("</ul>"))