def test_single_valued_datespan(): # dating: -68 to -68 # EDCS-ID: EDCS-24900077 args = argparse.Namespace(EDCS='24900077', publication=None, province=None, place=None, operator='and', term2=None, dating_from=None, dating_to=None, inscription_genus=None, and_not_inscription_genus=None, to_file=None, from_file=None, debug=True, term1='%') test_output = scrape(args, prevent_write=True) assert test_output[0]['dating from'] == -68 assert test_output[0]['dating to'] == -68 assert test_output[0]['date not before'] == -68 assert test_output[0]['date not after'] == -68
def test_missing_first_date(): # dating: to 100 # EDCS-ID: EDCS-34901010 args = argparse.Namespace(EDCS='34901010', publication=None, province=None, place=None, operator='and', term2=None, dating_from=None, dating_to=None, inscription_genus=None, and_not_inscription_genus=None, to_file=None, from_file=None, debug=True, term1='%') test_output = scrape(args, prevent_write=True) assert test_output[0]['dating from'] == None assert test_output[0]['dating to'] == 100 assert test_output[0]['date not before'] == None assert test_output[0]['date not after'] == 100
def test_inscription_vir(): # ./parse.py -e 24900101 % --debug args = argparse.Namespace(EDCS='24900101', publication=None, province=None, place=None, operator='and', term2=None, dating_from=None, dating_to=None, inscription_genus=None, and_not_inscription_genus=None, to_file=None, from_file=None, debug=True, term1='%') test_output = scrape(args, prevent_write=True, show_inscription_transform=True) assert "IIIIvir" in test_output[0]['inscription'] assert "IIIIvir" not in test_output[0]['inscription conservative cleaning'] assert "IIIIvir" not in test_output[0]['inscription interpretive cleaning'] assert "IIII vir " in test_output[0]['inscription conservative cleaning'] assert "IIII vir " in test_output[0]['inscription interpretive cleaning']
def test_inscription_three_middle(): # ./parse.py -e 09000264 20700224 % --debug args = argparse.Namespace(EDCS='20700224', publication=None, province=None, place=None, operator='and', term2=None, dating_from=None, dating_to=None, inscription_genus=None, and_not_inscription_genus=None, to_file=None, from_file=None, debug=True, term1='%') test_output = scrape(args, prevent_write=True, show_inscription_transform=True) assert "mo[numentum 3 vi]/olaverit" in test_output[0]['inscription'] assert "mo[numentum 3 vi]/olaverit" not in test_output[0][ 'inscription conservative cleaning'] assert "mo[numentum 3 vi]/olaverit" not in test_output[0][ 'inscription interpretive cleaning']
def test_inscription_substitution(): # ./parse.py -e 34100092 % --debug args = argparse.Namespace(EDCS='15300609', publication=None, province=None, place=None, operator='and', term2=None, dating_from=None, dating_to=None, inscription_genus=None, and_not_inscription_genus=None, to_file=None, from_file=None, debug=True, term1='%') test_output = scrape(args, prevent_write=True, show_inscription_transform=True) assert "sanc<t=I>issi<ma=AM>" in test_output[0]['inscription'] assert "sanc<t=I>issi<ma=AM>" not in test_output[0][ 'inscription conservative cleaning'] assert "sanc<t=I>issi<ma=AM>" not in test_output[0][ 'inscription interpretive cleaning']
def test_inscription_expanded_abbreviations(): # ./parse.py -e 27000432 % --debug args = argparse.Namespace(EDCS='27000432', publication=None, province=None, place=None, operator='and', term2=None, dating_from=None, dating_to=None, inscription_genus=None, and_not_inscription_genus=None, to_file=None, from_file=None, debug=True, term1='%') test_output = scrape(args, prevent_write=True, show_inscription_transform=True) assert "D(is) M(anibus)" in test_output[0]['inscription'] assert "D(is) M(anibus)" not in test_output[0][ 'inscription conservative cleaning'] assert "D M " in test_output[0]['inscription conservative cleaning'] assert "Dis Manibus" in test_output[0]['inscription interpretive cleaning']
def test_inscription_restoration(): # ./parse.py -e 34100092 % --debug args = argparse.Namespace(EDCS='34100092', publication=None, province=None, place=None, operator='and', term2=None, dating_from=None, dating_to=None, inscription_genus=None, and_not_inscription_genus=None, to_file=None, from_file=None, debug=True, term1='%') test_output = scrape(args, prevent_write=True, show_inscription_transform=True) assert "nillae c[oniugi]" in test_output[0]['inscription'] assert "nillae c[oniugi]" not in test_output[0][ 'inscription conservative cleaning'] assert "nillae c " in test_output[0]['inscription conservative cleaning'] assert "nillae coniugi" in test_output[0][ 'inscription interpretive cleaning']
def test_random__middle_date(): # # dating: a: ; b: 71 to 100; c: ; d: # EDCS-ID: EDCS-32001032 args = argparse.Namespace(EDCS='32001032', publication=None, province=None, place=None, operator='and', term2=None, dating_from=None, dating_to=None, inscription_genus=None, and_not_inscription_genus=None, to_file=None, from_file=None, debug=True, term1='%') test_output = scrape(args, prevent_write=True) assert test_output[0]['dating from'] == 71 assert test_output[0]['dating to'] == 100 assert test_output[0]['date not before'] == 71 assert test_output[0]['date not after'] == 100
def test_language_with_trailing_imbalanced_doublequote(): # ./parse.py -e 78800166 % --debug # csvcut -t -c 10,17 output/2021-07-30-EDCS_78800166+term1_%-1.tsv | csvlook # | inscription | language | # | ----------- | -------- | # | // GR" | PALMYR | args = argparse.Namespace(EDCS='78800166', publication=None, province=None, place=None, operator='and', term2=None, dating_from=None, dating_to=None, inscription_genus=None, and_not_inscription_genus=None, to_file=None, from_file=None, debug=True, term1='%') test_output = scrape(args, prevent_write=True, show_inscription_transform=True) assert 'GR"' not in test_output[0]['inscription'] assert "PALMYR, GR" in test_output[0]['language']
def test_no_letter(): # 'raw dating': 'b: 96 to 96; 81 to 96', args = argparse.Namespace(EDCS='72300077', publication=None, province=None, place=None, operator='and', term2=None, dating_from=None, dating_to=None, inscription_genus=None, and_not_inscription_genus=None, to_file=None, from_file=None, debug=True, term1='%') test_output = scrape(args, prevent_write=True) assert test_output[0]['dating from'] == 96 assert test_output[0]['dating to'] == 96 assert test_output[0]['date not before'] == 81 assert test_output[0]['date not after'] == 96
def test_place(): # ./parse.py -e 16201127 % --debug args = argparse.Namespace(EDCS='16201127', publication=None, province=None, place=None, operator='and', term2=None, dating_from=None, dating_to=None, inscription_genus=None, and_not_inscription_genus=None, to_file=None, from_file=None, debug=True, term1='%') test_output = scrape(args, prevent_write=True, show_inscription_transform=True) assert '[]' not in test_output[0]['place'] assert "Acireale / Acium" in test_output[0]['place']
def test_material(): # ./parse.py -e 32001159 % --debug args = argparse.Namespace(EDCS='32001159', publication=None, province=None, place=None, operator='and', term2=None, dating_from=None, dating_to=None, inscription_genus=None, and_not_inscription_genus=None, to_file=None, from_file=None, debug=True, term1='%') test_output = scrape(args, prevent_write=True, show_inscription_transform=True) assert '[]' not in test_output[0]['Material'] assert "lapis" in test_output[0]['Material']
def test_publication(): # ./parse.py -e 78800166 % --debug args = argparse.Namespace(EDCS='78800166', publication=None, province=None, place=None, operator='and', term2=None, dating_from=None, dating_to=None, inscription_genus=None, and_not_inscription_genus=None, to_file=None, from_file=None, debug=True, term1='%') test_output = scrape(args, prevent_write=True, show_inscription_transform=True) assert "AE 1937, 00075" in test_output[0]['publication'] assert "[]" not in test_output[0]['publication']
def test_EDCS_ID(): # ./parse.py -e EDCS-07600345 % --debug args = argparse.Namespace(EDCS='07600345', publication=None, province=None, place=None, operator='and', term2=None, dating_from=None, dating_to=None, inscription_genus=None, and_not_inscription_genus=None, to_file=None, from_file=None, debug=True, term1='%') test_output = scrape(args, prevent_write=True, show_inscription_transform=True) assert "[]" not in test_output[0]['EDCS-ID'] assert re.match(r"EDCS-[0-9]{8,8}", test_output[0]['EDCS-ID'])
def test_no_letters_at_all(): # 'raw dating': '163 to 170; 163 to 163', # ./parse.py -e 01000244 % --debug args = argparse.Namespace(EDCS='01000244', publication=None, province=None, place=None, operator='and', term2=None, dating_from=None, dating_to=None, inscription_genus=None, and_not_inscription_genus=None, to_file=None, from_file=None, debug=True, term1='%') test_output = scrape(args, prevent_write=True) assert test_output[0]['dating from'] == 163 assert test_output[0]['dating to'] == 170 assert test_output[0]['date not before'] == 163 assert test_output[0]['date not after'] == 170
def test_a_k_dates(): # dating: a: 196 to 196; b: 198 to 200; c: 171 to 300; d: 208 to 218; e: 180 to 222; f: 228 to 228; g: 234 to 234; h: 297 to 297; i: 171 to 300; j: 171 to 300; k: 171 to 300 # EDCS-ID: EDCS-72200182 args = argparse.Namespace(EDCS='72200182', publication=None, province=None, place=None, operator='and', term2=None, dating_from=None, dating_to=None, inscription_genus=None, and_not_inscription_genus=None, to_file=None, from_file=None, debug=True, term1='%') test_output = scrape(args, prevent_write=True) assert test_output[0]['dating from'] == 196 assert test_output[0]['dating to'] == 196 assert test_output[0]['date not before'] == 171 assert test_output[0]['date not after'] == 300
def test_comment(): # ./parse.py -e 36400015 % --debug args = argparse.Namespace(EDCS='36400015', publication=None, province=None, place=None, operator='and', term2=None, dating_from=None, dating_to=None, inscription_genus=None, and_not_inscription_genus=None, to_file=None, from_file=None, debug=True, term1='%') test_output = scrape(args, prevent_write=True, show_inscription_transform=True) assert "comment DOI: 10.3406/crai.2005.22934" not in test_output[0][ 'inscription'] assert "comment DOI: 10.3406/crai.2005.22934" in test_output[0]['Comment']
def test_status(): # ./parse.py -e 55701594 % --debug args = argparse.Namespace(EDCS='55701594', publication=None, province=None, place=None, operator='and', term2=None, dating_from=None, dating_to=None, inscription_genus=None, and_not_inscription_genus=None, to_file=None, from_file=None, debug=True, term1='%') test_output = scrape(args, prevent_write=True, show_inscription_transform=True) assert '[]' not in test_output[0]['status'] assert "sigilla impressa; tituli fabricationis" in test_output[0][ 'status']
def test_digit_colon(): # digit with colon 3: ; -27 to 37 # EDCS-75100087 args = argparse.Namespace(EDCS='75100087', publication=None, province=None, place=None, operator='and', term2=None, dating_from=None, dating_to=None, inscription_genus=None, and_not_inscription_genus=None, to_file=None, from_file=None, debug=True, term1='%') test_output = scrape(args, prevent_write=True) assert test_output[0]['dating from'] == -27 assert test_output[0]['dating to'] == 37 assert test_output[0]['date not before'] == -27 assert test_output[0]['date not after'] == 37
def test_inscription_substitution_edh(): # ./parse.py -e 63600442 % --debug args = argparse.Namespace(EDCS='63600442', publication=None, province=None, place=None, operator='and', term2=None, dating_from=None, dating_to=None, inscription_genus=None, and_not_inscription_genus=None, to_file=None, from_file=None, debug=True, term1='%') test_output = scrape(args, prevent_write=True, show_inscription_transform=True) assert "<F=P>urius" in test_output[0]['inscription'] assert "Purius" in test_output[0]['inscription conservative cleaning'] assert "Furius" in test_output[0]['inscription interpretive cleaning']
def on_button_clicked(b): with out: out.clear_output(wait=True) if and_not_inscription_genus.value: args.and_not_inscription_genus = and_not_inscription_genus.value and_not_inscription_genus.value = "" if dating_from.value: args.dating_from = dating_from.value dating_from.value = "" if dating_to.value: args.dating_to = dating_to.value dating_to.value = "" if EDCS.value: args.EDCS = EDCS.value EDCS.value = "" if inscription_genus.value: args.inscription_genus = inscription_genus.value inscription_genus.value = "" if operator.value: args.operator = operator.value if place.value: args.place = place.value place.value = "" if province.value: args.province = province.value province.value = "" if publication.value: args.publication = [publication.value] publication.value = [] if term1.value: args.term1 = term1.value term1.value = "" if term2.value: args.term2 = term2.value term2.value = "" # with widgets.Output(layout={'border': '1px solid black'}) as out: display( HTML( "<p>Getting the inscriptions. This may take a few minutes (or hours), depending on the number of search results.</p>" )) filename = parse.scrape(args) OUTPUTS = Path("output") file_outputs = {} for output in OUTPUTS.glob("*.tsv"): file_outputs[output.stat().st_mtime] = (output.name, output) output_keys = sorted(file_outputs, reverse=True) filenames = [] for key in output_keys: filenames.append(file_outputs[key]) #print(filename) # display(HTML("<a href='/tree/output/' target='_blank'>Full File List</a>")) display(HTML("<ul>")) for zipfile in filenames: display( HTML(f"<li><a href='{zipfile[1]}'>{zipfile[0]}</a></li>")) display(HTML("</ul>"))