def main() -> None:
    utils.setup_logger(__file__)

    logging.info('aggregate_signals.py started')

    signal_collection = SolrCollection(os.getenv('SOLR_COLLECTION_SIGNALS'))
    signal_aggregated_collection = SolrCollection(
        os.getenv('SOLR_COLLECTION_SIGNALS_AGGREGATED'))

    signals = signal_collection.select_all_documents()
    aggregated_signals = signal_aggregated_collection.select_all_documents()

    signal_aggregated_collection.index_documents(
        get_aggregations(aggregate_fields(signals, ['query', 'handler']),
                         aggregated_signals, 'query'))

    signal_aggregated_collection.index_documents(
        get_aggregations(
            aggregate_fields(
                reduce_date_field_to_hours(signals, 'search_timestamp'),
                ['search_timestamp', 'handler']), aggregated_signals,
            'search_timestamp'))

    signal_aggregated_collection.index_documents(
        get_aggregations(
            aggregate_fields(preprocess_filters(signals),
                             ['filters', 'handler']), aggregated_signals,
            'filters'))

    signal_collection.delete_documents('*:*')

    logging.info('aggregate_signals.py finished')
def main():
    utils.setup_logger(__file__)

    logging.info('update_relations_with_object_property.py -- starting')

    search_collection = SolrCollection(os.getenv('SOLR_COLLECTION_SEARCH'))
    property_to_relation = utils.load_resource('property_to_relation')

    updates = []
    for object_type, mapping in property_to_relation.items():
        object_uri_to_source_mapping = get_object_uri_to_source_mapping(
            search_collection, object_type, mapping['source'])

        for relation in mapping['relations']:
            relation_objects = search_collection.select_all_documents(
                'sys_type:{0} AND {1}:[* TO *]'.format(relation['type'],
                                                       relation['match']),
                ['sys_uri', relation['match']],
                id_field='sys_id')

            updates += get_relation_updates(relation, mapping,
                                            object_uri_to_source_mapping,
                                            relation_objects)

    search_collection.index_documents(updates)

    logging.info('update_relations_with_object_property.py -- finished')
Beispiel #3
0
def main() -> None:
    utils.setup_logger(__file__)

    parser = argparse.ArgumentParser(description='Deletes old donl signals')
    parser.add_argument('--number_of_days',
                        type=int,
                        default=30,
                        help='Specify the number of days after which signals '
                        'are considered old')

    input_arguments = vars(parser.parse_args())

    logging.info('rotate_signals.py started')

    collection = SolrCollection(os.getenv('SOLR_COLLECTION_SIGNALS'))

    old_signals_query = 'search_timestamp:[* TO NOW-{0}DAYS]'.format(
        input_arguments['number_of_days'])

    logging.info('deleting {0} signals that are older than {1} days'.format(
        collection.document_count(old_signals_query),
        input_arguments['number_of_days']))

    collection.delete_documents(old_signals_query)

    logging.info('rotate_signals.py finished')
Beispiel #4
0
def main():
    utils.setup_logger(__file__)

    logging.info('generate_relations.py -- starting')

    collection = SolrCollection(os.getenv('SOLR_COLLECTION_SEARCH'))
    update_reverse_relations(collection)

    logging.info('committing index changes')
    collection.index_documents([], commit=True)

    update_relations(collection)

    logging.info('committing index changes')
    collection.index_documents([], commit=True)

    update_authority_kind(collection)

    logging.info('committing index changes')
    collection.index_documents([], commit=True)

    update_popularity(collection)

    logging.info('committing index changes')
    collection.index_documents([], commit=True)

    logging.info('generate_relations.py -- finished')
def main() -> None:
    utils.setup_logger(__file__)

    logging.info('list_downloader.py started')

    lists = utils.load_resource('lists')
    update_vocabularies(lists['vocabularies'])
    update_taxonomies(lists['taxonomies'])

    logging.info('list_downloader.py finished')
def main() -> None:
    resources = {
        'stopwords_nl': manage_stopwords_nl,
        'stopwords_en': manage_stopwords_en,
        'labels_nl': manage_label_synonyms_nl,
        'labels_en': manage_label_synonyms_en,
        'uri_synonyms': manage_uri_synonyms,
        'hierarchy_theme': manage_hierarchy_theme
    }

    utils.setup_logger(__file__)

    logging.info('managed_resource.py -- starting')

    parser = argparse.ArgumentParser(description='Maintain the Solr managed '
                                     'resources.')
    parser.add_argument('--collection',
                        type=str,
                        required=True,
                        choices=solr_collections(),
                        help='Which collection to manage the resource for')
    parser.add_argument('--resource',
                        type=str,
                        choices=resources.keys(),
                        help='Which resource to manage',
                        required=True)
    parser.add_argument('--reload',
                        type=bool,
                        nargs='?',
                        default=False,
                        const=True,
                        help='To reload the collection afterwards')

    input_arguments = vars(parser.parse_args())
    collection = SolrCollection(input_arguments['collection'])

    resources.get(input_arguments['resource'])(collection)

    if input_arguments['reload']:
        logging.info('reloading Solr collection')
        collection.reload()

    logging.info('managed_resource.py -- finished')
def main() -> None:
    utils.setup_logger(__file__)

    parser = argparse.ArgumentParser(description='Reloads a Solr collection')
    parser.add_argument('--collection',
                        type=str,
                        required=True,
                        choices=solr_collections(),
                        help='Which collection to '
                        'reload')

    input_arguments = vars(parser.parse_args())

    logging.info('reload_collection.py -- starting')
    logging.info(' > collection: %s', input_arguments['collection'])

    collection = SolrCollection(input_arguments['collection'])
    collection.reload()

    logging.info('reload_collection.py -- finished')
def main() -> None:
    utils.setup_logger(__file__)

    logging.info('generate_suggestions.py -- starting')

    suggest = SolrCollection(os.getenv('SOLR_COLLECTION_SUGGESTER'))
    search = SolrCollection(os.getenv('SOLR_COLLECTION_SEARCH'))

    logging.info('clearing suggestions')
    suggest.delete_documents('*:*', commit=False)

    relation_counts = search.get_facet_counts('relation')

    community_uri_to_name = search.select_all_documents(
        fq='sys_type:community',
        fl=['sys_uri', 'sys_name'],
        id_field='sys_id'
    )

    community_uri_to_name = {community['sys_uri']: community['sys_name']
                             for community in community_uri_to_name}

    suggestion_types = utils.load_resource('suggestions')
    doc_suggestions = {doc_type: get_doc_suggestions(
        search, doc_type, config['mapping'], relation_counts,
        community_uri_to_name)
        for doc_type, config in suggestion_types.items()}

    logging.info('adding title suggestions:')

    for doc_type, doc_type_suggestions in doc_suggestions.items():
        suggest.index_documents(doc_type_suggestions, commit=False)
        logging.info(' titles: %s of type %s',
                     len(doc_type_suggestions), doc_type)

    user_defined_synonym_suggestions = {doc_type: get_doc_suggestions(
        search, doc_type, config['user_defined_synonyms'], relation_counts,
        community_uri_to_name, 'user_defined_synonyms:[* TO *]')
        for doc_type, config in suggestion_types.items()
        if 'user_defined_synonyms' in config}

    logging.info('adding user defined synonym suggestions:')

    for doc_type, doc_type_suggestions in user_defined_synonym_suggestions.items():
        suggest.index_documents(doc_type_suggestions, commit=False)
        logging.info(' user defined synonyms: %s of type %s',
                     len(doc_type_suggestions), doc_type)

    context_suggestions = {
        doc_type: {
            relation: get_suggestions(search, doc_type, relation,
                                      suggestion_types[relation]['mapping'],
                                      community_uri_to_name)
        } for doc_type, config in suggestion_types.items()
    for relation in config['relations']}

    logging.info('adding context suggestions:')

    for doc_type, relations in context_suggestions.items():
        for relation, suggestions in relations.items():
            suggest.index_documents(suggestions, commit=False)
            logging.info(' titles: %s of type %s in context of %s',
                         len(suggestions), relation, doc_type)

    logging.info('adding theme suggestions:')
    theme_suggestions = get_theme_suggestions(search, 'dataset')
    suggest.index_documents(theme_suggestions, commit=False)
    logging.info(' themes: %s in context of %s',
                 len(theme_suggestions), 'dataset')

    logging.info('committing changes to index')
    suggest.index_documents([], commit=True)

    logging.info('building Solr suggester')
    suggest.build_suggestions('build_suggest')

    logging.info('generate_suggestions.py -- finished')