Exemple #1
0
import pickle
from wikidatasets.processFunctions import get_subclasses, query_wikidata_dump, build_dataset

# change the 4 following values to match your installation
path = '../animals/'  # this will contain the files output through the process
dump_path = 'latest-all.json.bz2'  # path to the bz2 dump file
n_lines = 56208653  # this can be an upper bound
test_entities = get_subclasses(
    'Q16521')  # group of one or more organism(s), which a taxonomist
# adjudges to be a unit

query_wikidata_dump(dump_path,
                    path,
                    n_lines,
                    test_entities=test_entities,
                    collect_labels=False)

labels = pickle.load(open(path + 'labels.pkl', 'rb'))
build_dataset(path, labels)
Exemple #2
0
import pickle
from wikidatasets.processFunctions import get_subclasses, query_wikidata_dump, build_dataset

# change the 4 following values to match your installation
path = '../films/'  # this will contain the files output through the process
dump_path = 'latest-all.json.bz2'  # path to the bz2 dump file
n_lines = 81933324  # this can be an upper bound
test_entities = get_subclasses(
    'Q11424')  # sequence of images that give the impression of movement

query_wikidata_dump(dump_path,
                    path,
                    n_lines,
                    test_entities=test_entities,
                    collect_labels=False)

labels = pickle.load(open(path + 'labels.pkl', 'rb'))
build_dataset(path, labels, dump_date='April 15, 2020')
Exemple #3
0
import pickle
from wikidatasets.processFunctions import get_subclasses, query_wikidata_dump, build_dataset

# change the 4 following values to match your installation
path = '../companies/'  # this will contain the files output through the process
dump_path = 'latest-all.json.bz2'  # path to the bz2 dump file
n_lines = 56208653  # this can be an upper bound
test_entities = get_subclasses('Q4830453')  # organization involved in commercial, industrial, or professional activity

query_wikidata_dump(dump_path, path, n_lines, test_entities=test_entities, collect_labels=False)

labels = pickle.load(open(path + 'labels.pkl', 'rb'))
build_dataset(path, labels)
Exemple #4
0
import pickle
from wikidatasets.processFunctions import get_subclasses, query_wikidata_dump, build_dataset

# change the 4 following values to match your installation
path = '../humans/'  # this will contain the files output through the process
dump_path = 'latest-all.json.bz2'  # path to the bz2 dump file
n_lines = 81933324  # this can be an upper bound
test_entities = get_subclasses(
    'Q5'
)  # common name of H**o sapiens, unique extant species of the genus H**o

query_wikidata_dump(dump_path,
                    path,
                    n_lines,
                    test_entities=test_entities,
                    collect_labels=False)

labels = pickle.load(open(path + 'labels.pkl', 'rb'))
build_dataset(path, labels, dump_date='April 15, 2020')