Python IndexingProfile 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: opentapioca.indexingprofile

클래스/타입: IndexingProfile

hotexamples.com에서의 예제들: 10

Python IndexingProfile - 10개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 opentapioca.indexingprofile.IndexingProfile에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

load(9)

is_language_supported(1)

자주 사용되는 메소드들

load (9)

is_language_supported (1)

예제 #1

파일 보기

def index_stream(collection_name,
                 profile,
                 shards,
                 after,
                 solr='http://localhost:8983/solr/'):
    """
    Listens to the Wikidata edit stream and updates a collection according to
    the given indexing profile.
    """
    tagger = TaggerFactory(solr)
    indexing_profile = IndexingProfile.load(profile)
    try:
        tagger.create_collection(collection_name,
                                 num_shards=shards,
                                 configset=indexing_profile.solrconfig)
    except CollectionAlreadyExists:
        pass
    if after is not None:
        after = dateutil.parser.parse(after)
    stream = WikidataStreamReader(from_time=after)
    tagger.index_stream(collection_name,
                        stream,
                        indexing_profile,
                        batch_size=50,
                        commit_time=1,
                        delete_excluded=True)

예제 #2

파일 보기

def index_dump(collection_name,
               filename,
               profile,
               shards,
               skip,
               solr='http://localhost:8983/solr/'):
    """
    Indexes a Wikidata dump in a new Solr collection with the given name.
    """
    tagger = TaggerFactory(solr)
    indexing_profile = IndexingProfile.load(profile)
    try:
        tagger.create_collection(collection_name,
                                 num_shards=shards,
                                 configset=indexing_profile.solrconfig)
    except CollectionAlreadyExists:
        pass
    dump = WikidataDumpReader(filename)
    tagger.index_stream(collection_name,
                        dump,
                        indexing_profile,
                        batch_size=2000,
                        commit_time=10,
                        delete_excluded=False,
                        skip_docs=skip)

예제 #3

파일 보기

def index_sparql(collection_name,
                 sparql_query_file,
                 profile,
                 shards,
                 solr='http://localhost:8983/solr/'):
    """
    Indexes the results of a SPARQL query which contains an "item" variable pointing to items to index
    """
    tagger = TaggerFactory(solr)
    indexing_profile = IndexingProfile.load(profile)
    try:
        tagger.create_collection(collection_name,
                                 num_shards=shards,
                                 configset=indexing_profile.solrconfig)
    except CollectionAlreadyExists:
        pass
    with open(sparql_query_file, 'r') as f:
        query = f.read()
    query_results = SparqlReader(query)
    tagger.index_stream(collection_name,
                        query_results,
                        indexing_profile,
                        batch_size=50,
                        commit_time=10,
                        delete_excluded=False)

예제 #4

파일 보기

파일: test_indexingprofile.py 프로젝트: ziodave/opentapioca

def test_load_indexing_profile(testdir, expected_json):
    indexing_profile = IndexingProfile.load(
        os.path.join(testdir, 'data', 'indexing_profile.json'))

    assert indexing_profile.language == 'en'
    assert indexing_profile.name == 'affiliations'
    assert indexing_profile.restrict_properties == ['P2427', 'P1566', 'P496']
    assert indexing_profile.json() == expected_json

예제 #5

파일 보기

파일: test_indexingprofile.py 프로젝트: ziodave/opentapioca

def test_all_items_profile(testdir):
    profile_filename = os.path.join(testdir, 'data/all_items_profile.json')
    profile = IndexingProfile.load(profile_filename)
    type_matcher = TypeMatcherStub()
    dump_filename = os.path.join(testdir,
                                 'data/sample_wikidata_items.json.bz2')
    with WikidataDumpReader(dump_filename) as reader:
        for item in reader:
            assert profile.entity_to_document(item, type_matcher) is not None

예제 #6

파일 보기

 def setUpClass(cls):
     cls.testdir = os.path.dirname(os.path.abspath(__file__))
     cls.solr_endpoint = 'http://localhost:8983/solr/'
     cls.tf = TaggerFactory(cls.solr_endpoint)
     
     # Load dummy profile
     cls.profile = IndexingProfile.load(os.path.join(cls.testdir, 'data/all_items_profile.json'))
     
     # Skip entire test if solr is not running
     try:
         r = requests.get(cls.solr_endpoint)
     except requests.exceptions.RequestException:
         raise unittest.SkipTest('Solr is not running')

예제 #7

파일 보기

파일: test_classifier.py 프로젝트: ziodave/opentapioca

    def setUpClass(cls):
        cls.testdir = os.path.dirname(os.path.abspath(__file__))

        # Load dummy bow
        bow_fname = os.path.join(cls.testdir, 'data/sample_bow.pkl')
        cls.bow = BOWLanguageModel()
        cls.bow.load(bow_fname)

        # Load dummy graph
        graph_fname = os.path.join(cls.testdir,
                                   'data/sample_wikidata_items.npz')
        pagerank_fname = os.path.join(cls.testdir,
                                      'data/sample_wikidata_items.pgrank.npy')
        cls.graph = WikidataGraph()
        cls.graph.load_from_matrix(graph_fname)
        cls.graph.load_pagerank(pagerank_fname)

        # Load dummy profile
        cls.profile = IndexingProfile.load(
            os.path.join(cls.testdir, 'data/all_items_profile.json'))

        # Setup solr index (TODO delete this) and tagger
        cls.tf = TaggerFactory()
        cls.collection_name = 'wd_test_collection'
        try:
            cls.tf.create_collection(cls.collection_name)
        except CollectionAlreadyExists:
            pass
        cls.tf.index_stream(
            cls.collection_name,
            WikidataDumpReader(
                os.path.join(cls.testdir,
                             'data/sample_wikidata_items.json.bz2')),
            cls.profile)
        cls.tagger = Tagger(cls.collection_name, cls.bow, cls.graph)

        # Load NIF dataset
        cls.nif = NIFCollection.load(
            os.path.join(cls.testdir, 'data/five-affiliations.ttl'))

        cls.classifier = SimpleTagClassifier(cls.tagger,
                                             max_similarity_distance=10,
                                             similarity_smoothing=2)

예제 #8

파일 보기

파일: test_tagger.py 프로젝트: ziodave/opentapioca

    def setUpClass(cls):
        super(TaggerTest, cls).tearDownClass()
        testdir = os.path.dirname(os.path.abspath(__file__))

        # Load dummy bow
        bow_fname = os.path.join(testdir, 'data/sample_bow.pkl')
        cls.bow = BOWLanguageModel()
        cls.bow.load(bow_fname)

        # Load dummy graph
        graph_fname = os.path.join(testdir, 'data/sample_wikidata_items.npz')
        pagerank_fname = os.path.join(testdir,
                                      'data/sample_wikidata_items.pgrank.npy')
        cls.graph = WikidataGraph()
        cls.graph.load_from_matrix(graph_fname)
        cls.graph.load_pagerank(pagerank_fname)

        # Load indexing profile
        cls.profile = IndexingProfile.load(
            os.path.join(testdir, 'data/all_items_profile.json'))

        # Setup solr index
        cls.tf = TaggerFactory()
        cls.collection_name = 'wd_test_collection'
        try:
            cls.tf.delete_collection('wd_test_collection')
        except requests.exceptions.RequestException:
            pass
        cls.tf.create_collection(cls.collection_name)
        cls.tf.index_stream(
            'wd_test_collection',
            WikidataDumpReader(
                os.path.join(testdir, 'data/sample_wikidata_items.json.bz2')),
            cls.profile)

        cls.sut = Tagger(cls.collection_name, cls.bow, cls.graph)

예제 #9

파일 보기

파일: test_indexingprofile.py 프로젝트: ziodave/opentapioca

def sample_profile(testdir):
    return IndexingProfile.load(
        os.path.join(testdir, 'data', 'indexing_profile.json'))

예제 #10

파일 보기

파일: tagger.py 프로젝트: ziodave/opentapioca

 def fallback_if_unsupported_language(language):
     if IndexingProfile.is_language_supported(language):
         return language
     else:
         return 'en'