from SPARQLWrapper import SPARQLWrapper

from data_extraction import map_wd_attribute, map_wd_response
from data_extraction.constants import *
from data_extraction.request_utils import send_http_request
from shared.blocklist import BLOCKLIST
from shared.utils import (
    chunks,
    language_config_to_list,
    setup_logger,
)

logger = setup_logger(
    "data_extraction.load_wd_entities",
    Path(__file__).parent.parent.absolute() / "logs" /
    GET_WIKIDATA_ITEMS_LOG_FILENAME,
)

lang_keys = [item[0] for item in language_config_to_list()]


def query_artwork_qids(type_name: str, wikidata_id: str) -> List[str]:
    """Extracts all artwork QIDs from the wikidata SPARQL endpoint https://query.wikidata.org/

    Args:
        type_name: type name to extract from, only relevant for console output
        wikidata_id: wikidata qid related to the given type name

    Returns:
        A list of all qids of the provided wikidata_id
예제 #2
0
import datetime
import json
import sys
from pathlib import Path
from typing import Dict, List, Optional

from data_extraction.constants import *
from data_extraction.request_utils import send_http_request
from shared.constants import JSON
from shared.utils import chunks, create_new_path, language_config_to_list, setup_logger, check_state, write_state

RECOVER_MODE = False

logger = setup_logger(
    "data_extraction.get_wikipedia_extracts",
    Path(__file__).parent.parent.absolute()
    / "logs"
    / GET_WIKIPEDIA_EXTRACS_LOG_FILENAME,
)

lang_keys = [item[0] for item in language_config_to_list()]


def get_wikipedia_page_ids(
        items: List[Dict],
        indices: List[int],
        langkey: str,
        timeout: Optional[int] = TIMEOUT,
        sleep_time: Optional[int] = SLEEP_TIME,
        maxlag: Optional[int] = MAX_LAG,
) -> Dict:
    """Function to get the wikipedia page ids from their label referenced in the sitelinks
예제 #3
0
from typing import List, Dict, Set, Iterator, Optional, Callable

from data_extraction import map_wd_attribute
from data_extraction import map_wd_response
from data_extraction.constants import *
from data_extraction.request_utils import send_http_request
from shared.utils import chunks, create_new_path, language_config_to_list, setup_logger
from shared.constants import JSON, CSV
from SPARQLWrapper import SPARQLWrapper

DEV = False
DEV_CHUNK_LIMIT = 2  # Not entry but chunks of 50

logger = setup_logger(
    "data_extraction.get_wikidata_items",
    Path(__file__).parent.parent.absolute() / "logs" /
    GET_WIKIDATA_ITEMS_LOG_FILENAME,
)

lang_keys = [item[0] for item in language_config_to_list()]


def query_artwork_qids(type_name: str, wikidata_id: str) -> List[str]:
    """Extracts all artwork QIDs from the wikidata SPARQL endpoint https://query.wikidata.org/

    Args:
        type_name: type name to extract from, only relevant for console output
        wikidata_id: wikidata qid related to the given type name

    Returns:
        A list of all qids of the provided wikidata_id
예제 #4
0
"""Mapping functions to extract information from wikidata JSON responses (especially entity attribtues) to the openArtBrowser data model
"""
import inspect
import re
from pathlib import Path
from typing import Any, Callable, Dict, List

from pywikibot import WbTime

from data_extraction.constants import *
from shared.utils import setup_logger

logger = setup_logger(
    "data_extraction.map_wd_attribute",
    Path(__file__).parent.parent.absolute() / "logs" /
    WIKIDATA_MAP_ATTRIBUTE_LOG_FILENAME,
)


def get_attribute_values_with_try_get_func(
    entity_dict: Dict,
    attribute_list: List,
    oab_type: str,
    try_get_func: Callable[[Dict, str, str, str], Any],
) -> Any:
    """Higher order function for map_wd_attribute function to bundle calls in for-loops

    Args:
        result: JSON response from wikidata
        attribute_list: attributes to extract with function
        oab_type: type name which is extracted
예제 #5
0
"""Functions to map a wikidata entity response to an openArtBrowser model
"""

from pathlib import Path
from typing import Dict, List, Optional

import data_extraction.map_wd_attribute as map_wd_attribute
from data_extraction.constants import *
from shared.utils import language_config_to_list, setup_logger

logger = setup_logger(
    "data_extraction.map_wd_response",
    Path(__file__).parent.parent.absolute()
    / "logs"
    / WIKIDATA_MAP_RESPONSE_LOG_FILENAME,
)

lang_keys = [item[0] for item in language_config_to_list()]


def try_map_response_to_subject(
    response: Dict, type_name: str, language_keys: Optional[List[str]] = lang_keys,
) -> Dict:
    """Maps the default attributes which every subject has:
    qid, image, label, description, classes, wikipediaLink (including language specific attributes)

    Args:
        response: The wikidata entity which should be mapped to an openArtBrowser entity
        type_name: Type name of the entity
        language_keys: All language keys which should be extracted. Defaults to languageconfig.csv
import json
import sys
from pathlib import Path
from typing import Dict, List, Optional

import requests

from shared.constants import (JSON, ARTWORK, ARTIST, MOVEMENT, PLURAL, ID,
                              VIDEOS, YOUTUBE_VIDEOS_FILE, ETL_STATES,
                              ADD_YOUTUBE_VIDEOS_LOG_FILENAME)
from shared.utils import create_new_path, write_state, check_state, setup_logger

# setup logger
logger = setup_logger(
    "data_enhancement.add_youtube_videos",
    Path(__file__).parent.parent.absolute() / "logs" /
    ADD_YOUTUBE_VIDEOS_LOG_FILENAME,
)

try:
    GOOGLE_DEV_KEY = open("google_dev_key.txt").read()
except FileNotFoundError:
    GOOGLE_DEV_KEY = ""

RECOVER_MODE = False


def check_yt_id_valid(id: str) -> bool:
    """Connects to the YT API and checks if the is valid

    Args:
Returns:
    The ranked entities which means the attributes absolute rank and relative rank.
"""
import json
import datetime
from numbers import Number
from typing import List, Dict
from shared.constants import *
from shared.utils import create_new_path, write_state, check_state, setup_logger
from pathlib import Path
import sys

# setup logger
logger = setup_logger(
    "data_enhancement.ranking",
    Path(__file__).parent.parent.absolute() / "logs" / RANKING_LOG_FILENAME,
)

RECOVER_MODE = False


def rank_artworks(
        artworks: List[Dict],
        ignore_keys: List[str] = [ABSOLUTE_RANK, RELATIVE_RANK]) -> List[Dict]:
    """Ranks a list of artwork entities (JSON-Objects)

    Args:
        artworks: List of artworks
        ignore_keys: Keys within the artwork entities which have to be ignored. Defaults to [ABSOLUTE_RANK, RELATIVE_RANK].

    Returns: