Ejemplo n.º 1
0
 def test_redirect(self):
     redirect_to_name = "Redirect here {}".format(uuid4())
     redirect_from_name = "Redirect from {}".format(uuid4())
     content = "Some content {}".format(uuid4())
     create_wiki_page(page_name=redirect_to_name, content=content, summary="Unit test")
     create_redirect_wiki_page(page_name=redirect_from_name, redirect_to=redirect_to_name, summary="Unit test")
     redirect_from = get_wiki_page(page_name=redirect_from_name)
     redirect_to = get_wiki_page(page_name=redirect_to_name)
     self.assertEqual(redirect_from.redirects_to().text(), redirect_to.text())
     # cleanup
     redirect_from.delete(reason="Unit test finished")
     redirect_to.delete(reason="Unit test finished")
Ejemplo n.º 2
0
def handle_categories(browse, create_category_pages):
    subjects = {}
    if browse.get('subject'):
        subjects = str_to_list(browse['subject'])
    nnl_to_subject = {}
    for subject in subjects:
        match = re.search(AUTHORITY_ID_PATTERN, subject)
        if match:
            nnl = match.group(3)[5:]
            term = match.group(1)
            if is_hebrew(term):  # Hebrew term
                nnl_to_subject[nnl] = term

    result = ""
    for subject in nnl_to_subject.values():
        category_page_name = "קטגוריה:{}".format(subject)
        result += CR + '[[{}]]'.format(category_page_name)
        if create_category_pages:
            create_wiki_page(category_page_name, "Creating empty category page", "")

    return result
Ejemplo n.º 3
0
def handle_categories(browse, create_category_pages):
    subjects = {}
    if browse.get('subject'):
        subjects = str_to_list(browse['subject'])
    nnl_to_subject = {}
    for subject in subjects:
        match = re.search(AUTHORITY_ID_PATTERN, subject)
        if match:
            nnl = match.group(3)[5:]
            term = match.group(1)
            if is_hebrew(term):  # Hebrew term
                nnl_to_subject[nnl] = term

    result = ""
    for subject in nnl_to_subject.values():
        category_page_name = "קטגוריה:{}".format(subject)
        result += CR + '[[{}]]'.format(category_page_name)
        if create_category_pages:
            create_wiki_page(category_page_name,
                             "Creating empty category page", "")

    return result
Ejemplo n.º 4
0
 def test_page_creation_deletion(self):
     # test creation & get
     content = "Some content {}".format(uuid4())
     page_name = "New page {}".format(uuid4())
     self._delete_page(page_name) # make sure it doesn't exist
     page = create_wiki_page(page_name=page_name, content=content, summary="Unit test")
     print("Created page", page)
     page = get_wiki_page(page_name)
     self.assertEqual(content, page.text())
     # test deletion - if page doesn't exist then an execption would be thrown
     page.delete(reason="Unit test finished")
     print("Deleted page", page)
     non_existing_page = get_wiki_page(page_name)
     self.assertFalse(non_existing_page.text())
Ejemplo n.º 5
0
def create_page_from_dictionary(item_dict, debug=None, create_category_pages=False, site=None):
    """
    create a wikipedia page from a dictionary that describes a primo item
    :param item_dict: primo item as a dictionary/json
    :param debug: if not debug then actually create the pages
    :param create_category_pages: whether to create empty category pages when encountered
    :return: page content in wiki markup
    """
    document_id = item_dict['control']['recordid']
    sourcerecordid = item_dict['control']['sourcerecordid']
    originalsourceid = item_dict['control']['originalsourceid']
    display = item_dict['display']
    try:
        title = display['title']
        if type(title) is list:
            title = title(0) # getting the main title short, the full title is displayed in another location in the page
    except:
        title = trim(item_dict['sort']['title'])
    item_type = display['type'].lower()

    try:
        display_type = type_dict[item_type][1]  # hebrew type as a definite article, e.g. כתב העת
        display_type += " "
    except Exception as e:
        display_type = ""
        print("Unrecognized type '{}'".format(item_type))

    creation_verb = type_dict[item_type][2]
    creators_field = display.get('creator')
    if display.get('contributor'):
        if creators_field:
            creators_field += creators_field + ';' + display.get('contributor')
        else:
            creators_field = display.get('contributor')

    creator = None
    if creators_field:
        authors_to_id = entries_to_authority_id(str_to_list(item_dict['browse']['author']))
        creators = creators_field.split(";")
        creator = ", ".join(set([person_name(authors_to_id, creator.strip()) for creator in creators]))
        creator = comma_and(creator)
    else:
        authors_to_id = {}

    summary = display.get('lds20')

    comments = str_to_list(display.get('lds05'))
    comments_section = None
    if comments:
        comments_section = CR.join(["* " + comment for comment in comments])

    # handle digital images: thumbnail display + links to digital images
    rosetta_link = item_dict["links"].get("linktorsrc")
    view_online = ''
    if rosetta_link:
        # handling the 'view' button & thumbnail image
        thumb_value = generate_thumb_link(rosetta_link)
        if thumb_value:
            view_online = VIEW_ONLINE
            view_online = view_online.replace('{IE}',thumb_value[0])
            view_online = view_online.replace('{title}', title)
    creationdate = display.get('creationdate')
    ispartof = display.get('ispartof')
    performed_by = display.get('lds35')  # list
    performed_by = str_to_list(performed_by)

    performed_by_str = None
    if performed_by:
        performed_by_str = ", ".join(person_name(authors_to_id, performer) for performer in performed_by)

    source = display['source']
    lib_link = display.get('lds21')
    if not lib_link:
        lib_link = item_dict['links']['linktorsrc']
        lib_link = lib_link[lib_link.find("http"):]

    # Building page's Wikicode
    content = "{{DISPLAYTITLE:%s}}\n" % title
    content += "{}'''{}''' {}".format(display_type, title, creation_verb)
    if creator:
        content += " על ידי {}".format(creator)

    if (creationdate):
        content += " בשנת {}".format(creationdate)
    content += CR
    if summary:
        content += CR + summary + CR

    if view_online:
        if len(view_online)>0:
            content += view_online + CR
    content += "==פרטים כלליים==" + CR
    if (performed_by_str):
        content += LIST_ITEM.format("שם מבצע", performed_by_str)
    if (ispartof):
        content += LIST_ITEM.format("מתוך", ispartof)
    if comments_section:
        content += comments_section
    content += CR + "==מידע נוסף==" + CR
    content += LIST_ITEM.format("מקור", source)
    content += "* מספר מערכת: [{} {}]\n".format(lib_link, sourcerecordid)
    content += "== קישורים נוספים ==\n"
    alef_link = ALEF_LINK.format(originalsourceid, sourcerecordid)
    content += "* [{} הפריט בקטלוג הספרייה הלאומית]\n".format(alef_link)

    browse = item_dict.get('browse')
    if browse:
        content += handle_categories(item_dict['browse'], create_category_pages)

    if debug:
        print(content)
    else:
        title = clean_title(title)
        if is_hebrew(title):
            title = limit_length(title)
            create_redirect_wiki_page(site, page_name=clean_title(title), redirect_to=document_id,
                                      summary="Creating redirect page for {}".format(document_id))
        create_wiki_page(site, page_name=document_id, summary="Created from primo", content=content)

    return content
Ejemplo n.º 6
0
def create_page_from_node(person_node, records_list, debug=None, create_category_pages=False, site=None):
    """
    Create a person page from a neo4j node
    :param person_node: neo4j node
    :param debug:
    :param create_category_pages:
    :return:
    """
    wiki_page_name = person_node['id']
    person_name = simple_person_name(person_node['person_name_heb'])
    this_record = json.loads(person_node['data'])

    birth_date = date8_to_heb_date(get_if_exists(this_record, '046', 0, 'f'))
    death_date = date8_to_heb_date(get_if_exists(this_record, '046', 0, 'g'))

    birth_place = get_if_exists(this_record, '370', 0, 'a')
    death_place = get_if_exists(this_record, '370', 0, 'b')

    other_names = get_if_exists(this_record, '400')
    other_names_value = BR.join(simple_person_name(other_name['a']) for other_name in other_names)

    address = get_if_exists(this_record, '371', 0, 'a')
    address_place = get_if_exists(this_record, '371', 0, 'b')
    address_country = get_if_exists(this_record, '371', 0, 'd')

    occupation = get_if_exists(this_record, '374', 0, 'a')
    gender = get_if_exists(this_record, '375', 0, 'a')  # MALE/FEMALE
    female = gender.lower() == "female"

    value_image_url = ""

    content = "{{DISPLAYTITLE:%s}}\n" % person_name

    AUDIO = ["==פריטי שמע==", OPENDIV]
    VIDEO = ["==פריטי וידאו==", OPENDIV]
    BOOKS_BY = ["==ספרים שכתבה==" if female else "==ספרים שכתב==", OPENDIV]
    BOOKS_ABOUT = ["==ספרים אודותיה==" if female else "==ספרים אודותיו==", OPENDIV]
    IMAGES = ["==גלריית תמונות==", '{| class="wikitable" border="1"']
    IMAGES_DESC = ['|-']
    OTHER = ["==אחר==", OPENDIV]

    for record_rel in records_list:
        for record_type in records_list[record_rel]:
            for record in records_list[record_rel][record_type]:
                # getting record type
                if record_type == "other":
                    item_type = "other"
                else:
                    item_type = type_dict.get(record_type.lower())[3]

                # temporary - skip over non-hebrew records
                if item_type == 'print':
                    if record['language']:
                        if record['language']!='heb':
                            continue
                # content_item = ITEM.format(**record)
                content_item = ITEM
                rosetta_link = record['rosetta'] or ''
                if len(rosetta_link) > 0:
                    # handling the 'view' button & thumbnail image
                    view_online = VIEW_ONLINE
                    view_online = view_online.format(extract_link(rosetta_link))
                    content_item = content_item.replace('{view}', view_online)
                    thumb_value = generate_thumb(rosetta_link)
                    if thumb_value:
                        content_item = content_item.replace('{thumb}',thumb_value)
                    else:
                        content_item = content_item.replace('{thumb}','')
                else:
                    content_item = content_item.replace('{view}', '')
                    content_item = content_item.replace('{thumb}','')
                content_item = content_item.format(**record)
                if record_rel == 'portrait_of':
                    value_image_url = '<img src="http://rosetta.nli.org.il/delivery/DeliveryManagerServlet?' \
                                      'dps_pid={}&dps_func=stream"' \
                                      ' style="max-height: 500px; max-width: 300px;"/>'.format(record['fl'])

                if item_type == 'print':
                    if record_rel == 'subject_of':
                        BOOKS_ABOUT.append(content_item)
                    else:
                        BOOKS_BY.append(content_item)
                elif item_type == 'audio':
                    AUDIO.append(content_item)
                elif item_type == 'video':
                    VIDEO.append(content_item)
                elif item_type == 'photograph':
                    if len(IMAGES_DESC) > 6 or not record.get('fl'):
                        continue
                    IMAGES.append(GALLERY_ITEM.format(record['fl']))
                    IMAGES_DESC.append('| {}'.format(record['title']))
                else:
                    OTHER.append(content_item)

    template = TEMPLATE \
        .replace(template_name, template_name + person_name) \
        .replace(template_birth_date, template_birth_date + birth_date) \
        .replace(template_death_date, template_death_date + death_date) \
        .replace(template_birth_place, template_birth_place + birth_place) \
        .replace(template_death_place, template_death_place + death_place) \
        .replace(template_other_names, template_other_names + other_names_value) \
        .replace(template_occupation, template_occupation + occupation) \
        .replace(template_image_url, template_image_url + value_image_url)

    content += template

    notes1 = this_record.get('670')
    notes2 = this_record.get('678')
    notes3 = this_record.get('680')

    notes = []
    for notes_i in (notes1, notes2, notes3):
        if notes_i:
            notes += notes_i

    if notes:
        content += CR
        content += "".join(note['a'] + BR for note in notes if note.get('a') and note.get('a') != "LCN")

    IMAGES_DESC.append('|}')

    content += CR + \
               CR.join(BOOKS_BY) + CLOSEDIV + CR + \
               CR.join(BOOKS_ABOUT) + CLOSEDIV + CR + \
               CR.join(AUDIO) + CLOSEDIV + CR + \
               CR.join(VIDEO) + CLOSEDIV + CR + \
               CR.join(IMAGES + IMAGES_DESC) + CR + \
               CR.join(OTHER) + CLOSEDIV + CR

    if debug:
        print(content)
    else:
        redicrect_page_name = "אישיות:" + person_name
        # create_redirect_wiki_page(page_name=redicrect_page_name, redirect_to=wiki_page_name,
        #                           summary="Creating redirect page for {}".format(wiki_page_name))
        create_wiki_page(site, page_name=wiki_page_name, summary="Created from primo", content=content)

    return content
Ejemplo n.º 7
0
def create_page_from_node(person_node,
                          records_list,
                          debug=None,
                          create_category_pages=False,
                          site=None):
    """
    Create a person page from a neo4j node
    :param person_node: neo4j node
    :param debug:
    :param create_category_pages:
    :return:
    """
    wiki_page_name = person_node['id']
    person_name = simple_person_name(person_node['person_name_heb'])
    this_record = json.loads(person_node['data'])

    birth_date = date8_to_heb_date(get_if_exists(this_record, '046', 0, 'f'))
    death_date = date8_to_heb_date(get_if_exists(this_record, '046', 0, 'g'))

    birth_place = get_if_exists(this_record, '370', 0, 'a')
    death_place = get_if_exists(this_record, '370', 0, 'b')

    other_names = get_if_exists(this_record, '400')
    other_names_value = BR.join(
        simple_person_name(other_name['a']) for other_name in other_names)

    address = get_if_exists(this_record, '371', 0, 'a')
    address_place = get_if_exists(this_record, '371', 0, 'b')
    address_country = get_if_exists(this_record, '371', 0, 'd')

    occupation = get_if_exists(this_record, '374', 0, 'a')
    gender = get_if_exists(this_record, '375', 0, 'a')  # MALE/FEMALE
    female = gender.lower() == "female"

    value_image_url = ""

    content = "{{DISPLAYTITLE:%s}}\n" % person_name

    AUDIO = ["==פריטי שמע==", OPENDIV]
    VIDEO = ["==פריטי וידאו==", OPENDIV]
    BOOKS_BY = ["==ספרים שכתבה==" if female else "==ספרים שכתב==", OPENDIV]
    BOOKS_ABOUT = [
        "==ספרים אודותיה==" if female else "==ספרים אודותיו==", OPENDIV
    ]
    IMAGES = ["==גלריית תמונות==", '{| class="wikitable" border="1"']
    IMAGES_DESC = ['|-']
    OTHER = ["==אחר==", OPENDIV]

    for record_rel in records_list:
        for record_type in records_list[record_rel]:
            for record in records_list[record_rel][record_type]:
                # getting record type
                if record_type == "other":
                    item_type = "other"
                else:
                    item_type = type_dict.get(record_type.lower())[3]

                # temporary - skip over non-hebrew records
                if item_type == 'print':
                    if record['language']:
                        if record['language'] != 'heb':
                            continue
                # content_item = ITEM.format(**record)
                content_item = ITEM
                rosetta_link = record['rosetta'] or ''
                if len(rosetta_link) > 0:
                    # handling the 'view' button & thumbnail image
                    view_online = VIEW_ONLINE
                    view_online = view_online.format(
                        extract_link(rosetta_link))
                    content_item = content_item.replace('{view}', view_online)
                    thumb_value = generate_thumb(rosetta_link)
                    if thumb_value:
                        content_item = content_item.replace(
                            '{thumb}', thumb_value)
                    else:
                        content_item = content_item.replace('{thumb}', '')
                else:
                    content_item = content_item.replace('{view}', '')
                    content_item = content_item.replace('{thumb}', '')
                content_item = content_item.format(**record)
                if record_rel == 'portrait_of':
                    value_image_url = '<img src="http://rosetta.nli.org.il/delivery/DeliveryManagerServlet?' \
                                      'dps_pid={}&dps_func=stream"' \
                                      ' style="max-height: 500px; max-width: 300px;"/>'.format(record['fl'])

                if item_type == 'print':
                    if record_rel == 'subject_of':
                        BOOKS_ABOUT.append(content_item)
                    else:
                        BOOKS_BY.append(content_item)
                elif item_type == 'audio':
                    AUDIO.append(content_item)
                elif item_type == 'video':
                    VIDEO.append(content_item)
                elif item_type == 'photograph':
                    if len(IMAGES_DESC) > 6 or not record.get('fl'):
                        continue
                    IMAGES.append(GALLERY_ITEM.format(record['fl']))
                    IMAGES_DESC.append('| {}'.format(record['title']))
                else:
                    OTHER.append(content_item)

    template = TEMPLATE \
        .replace(template_name, template_name + person_name) \
        .replace(template_birth_date, template_birth_date + birth_date) \
        .replace(template_death_date, template_death_date + death_date) \
        .replace(template_birth_place, template_birth_place + birth_place) \
        .replace(template_death_place, template_death_place + death_place) \
        .replace(template_other_names, template_other_names + other_names_value) \
        .replace(template_occupation, template_occupation + occupation) \
        .replace(template_image_url, template_image_url + value_image_url)

    content += template

    notes1 = this_record.get('670')
    notes2 = this_record.get('678')
    notes3 = this_record.get('680')

    notes = []
    for notes_i in (notes1, notes2, notes3):
        if notes_i:
            notes += notes_i

    if notes:
        content += CR
        content += "".join(note['a'] + BR for note in notes
                           if note.get('a') and note.get('a') != "LCN")

    IMAGES_DESC.append('|}')

    content += CR + \
               CR.join(BOOKS_BY) + CLOSEDIV + CR + \
               CR.join(BOOKS_ABOUT) + CLOSEDIV + CR + \
               CR.join(AUDIO) + CLOSEDIV + CR + \
               CR.join(VIDEO) + CLOSEDIV + CR + \
               CR.join(IMAGES + IMAGES_DESC) + CR + \
               CR.join(OTHER) + CLOSEDIV + CR

    if debug:
        print(content)
    else:
        redicrect_page_name = "אישיות:" + person_name
        # create_redirect_wiki_page(page_name=redicrect_page_name, redirect_to=wiki_page_name,
        #                           summary="Creating redirect page for {}".format(wiki_page_name))
        create_wiki_page(site,
                         page_name=wiki_page_name,
                         summary="Created from primo",
                         content=content)

    return content
Ejemplo n.º 8
0
def create_page_from_dictionary(item_dict,
                                debug=None,
                                create_category_pages=False,
                                site=None):
    """
    create a wikipedia page from a dictionary that describes a primo item
    :param item_dict: primo item as a dictionary/json
    :param debug: if not debug then actually create the pages
    :param create_category_pages: whether to create empty category pages when encountered
    :return: page content in wiki markup
    """
    document_id = item_dict['control']['recordid']
    sourcerecordid = item_dict['control']['sourcerecordid']
    originalsourceid = item_dict['control']['originalsourceid']
    display = item_dict['display']
    try:
        title = display['title']
        if type(title) is list:
            title = title(
                0
            )  # getting the main title short, the full title is displayed in another location in the page
    except:
        title = trim(item_dict['sort']['title'])
    item_type = display['type'].lower()

    try:
        display_type = type_dict[item_type][
            1]  # hebrew type as a definite article, e.g. כתב העת
        display_type += " "
    except Exception as e:
        display_type = ""
        print("Unrecognized type '{}'".format(item_type))

    creation_verb = type_dict[item_type][2]
    creators_field = display.get('creator')
    if display.get('contributor'):
        if creators_field:
            creators_field += creators_field + ';' + display.get('contributor')
        else:
            creators_field = display.get('contributor')

    creator = None
    if creators_field:
        authors_to_id = entries_to_authority_id(
            str_to_list(item_dict['browse']['author']))
        creators = creators_field.split(";")
        creator = ", ".join(
            set([
                person_name(authors_to_id, creator.strip())
                for creator in creators
            ]))
        creator = comma_and(creator)
    else:
        authors_to_id = {}

    summary = display.get('lds20')

    comments = str_to_list(display.get('lds05'))
    comments_section = None
    if comments:
        comments_section = CR.join(["* " + comment for comment in comments])

    # handle digital images: thumbnail display + links to digital images
    rosetta_link = item_dict["links"].get("linktorsrc")
    view_online = ''
    if rosetta_link:
        # handling the 'view' button & thumbnail image
        thumb_value = generate_thumb_link(rosetta_link)
        if thumb_value:
            view_online = VIEW_ONLINE
            view_online = view_online.replace('{IE}', thumb_value[0])
            view_online = view_online.replace('{title}', title)
    creationdate = display.get('creationdate')
    ispartof = display.get('ispartof')
    performed_by = display.get('lds35')  # list
    performed_by = str_to_list(performed_by)

    performed_by_str = None
    if performed_by:
        performed_by_str = ", ".join(
            person_name(authors_to_id, performer)
            for performer in performed_by)

    source = display['source']
    lib_link = display.get('lds21')
    if not lib_link:
        lib_link = item_dict['links']['linktorsrc']
        lib_link = lib_link[lib_link.find("http"):]

    # Building page's Wikicode
    content = "{{DISPLAYTITLE:%s}}\n" % title
    content += "{}'''{}''' {}".format(display_type, title, creation_verb)
    if creator:
        content += " על ידי {}".format(creator)

    if (creationdate):
        content += " בשנת {}".format(creationdate)
    content += CR
    if summary:
        content += CR + summary + CR

    if view_online:
        if len(view_online) > 0:
            content += view_online + CR
    content += "==פרטים כלליים==" + CR
    if (performed_by_str):
        content += LIST_ITEM.format("שם מבצע", performed_by_str)
    if (ispartof):
        content += LIST_ITEM.format("מתוך", ispartof)
    if comments_section:
        content += comments_section
    content += CR + "==מידע נוסף==" + CR
    content += LIST_ITEM.format("מקור", source)
    content += "* מספר מערכת: [{} {}]\n".format(lib_link, sourcerecordid)
    content += "== קישורים נוספים ==\n"
    alef_link = ALEF_LINK.format(originalsourceid, sourcerecordid)
    content += "* [{} הפריט בקטלוג הספרייה הלאומית]\n".format(alef_link)

    browse = item_dict.get('browse')
    if browse:
        content += handle_categories(item_dict['browse'],
                                     create_category_pages)

    if debug:
        print(content)
    else:
        title = clean_title(title)
        if is_hebrew(title):
            title = limit_length(title)
            create_redirect_wiki_page(
                site,
                page_name=clean_title(title),
                redirect_to=document_id,
                summary="Creating redirect page for {}".format(document_id))
        create_wiki_page(site,
                         page_name=document_id,
                         summary="Created from primo",
                         content=content)

    return content