Exemple #1
0
def watch_registry_harvest_task():
    """ Check each WATCH_REGISTRY_HARVEST_RATE seconds if new registries need to be harvested.
    """
    try:
        logger.info('START watching registries.')
        registries = oai_registry_api.get_all_activated_registry()
        # We launch the background task for each registry
        for registry in registries:
            # If we need to harvest and a task doesn't already exist for this registry.
            if registry.harvest and not registry.is_queued:
                harvest_task.apply_async((str(registry.id), ))
                registry.is_queued = True
                oai_registry_api.upsert(registry)
                logger.info(
                    'Registry {0} has been queued and will be harvested.'.
                    format(registry.name.encode("utf-8")))
        logger.info('FINISH watching registries.')
    except Exception as e:
        logger.error(
            'ERROR : Error while watching new registries to harvest: {0}'.
            format(e.message))
    finally:
        # Periodic call every WATCH_REGISTRY_HARVEST_RATE seconds
        watch_registry_harvest_task.apply_async(
            countdown=WATCH_REGISTRY_HARVEST_RATE)
Exemple #2
0
def watch_registry_harvest_task():
    """Check each WATCH_REGISTRY_HARVEST_RATE seconds if new registries need
    to be harvested."""
    from core_oaipmh_harvester_app.components.oai_registry import (
        api as oai_registry_api, )

    try:
        logger.info("START watching registries.")
        registries = oai_registry_api.get_all_activated_registry()
        # We launch the background task for each registry
        for registry in registries:
            # If we need to harvest and a task doesn't already exist for this
            # registry.
            if registry.harvest and not registry.is_queued:
                harvest_task.apply_async((str(registry.id), ))
                registry.is_queued = True
                oai_registry_api.upsert(registry)
                logger.info(
                    f"Registry {registry.name} has been queued and will be "
                    f"harvested.")
        logger.info("FINISH watching registries.")
    except Exception as e:
        logger.error(
            f"ERROR : Error while watching new registries to harvest: {str(e)}"
        )
    finally:
        # Periodic call every WATCH_REGISTRY_HARVEST_RATE seconds
        watch_registry_harvest_task.apply_async(
            countdown=WATCH_REGISTRY_HARVEST_RATE)
Exemple #3
0
def get_data_source_list_oaipmh(request):
    """Ajax method to fill the list of data sources.

    Args:
        request:

    Returns:

    """
    try:
        id_query = request.GET.get("id_query", None)

        if id_query is not None:
            # Get query from id
            query = api_query.get_by_id(id_query, request.user)
            instance_list = oai_registry_api.get_all_activated_registry(
                order_by_field="name")
            item_list = []
            for instance_item in instance_list:
                checked = False
                # compare instance with existing data source in query
                # in order to know if they have to be checked
                for data_source_item in query.data_sources:
                    if ("instance_id" in data_source_item.query_options
                            and data_source_item.query_options["instance_id"]
                            == str(instance_item.id)):
                        checked = True

                # update the result item list for the context
                item_list.extend([{
                    "instance_id": instance_item.id,
                    "instance_name": instance_item.name,
                    "is_checked": checked,
                }])

            # Here, data sources are instances
            context_params = dict()
            context_params["instances"] = item_list

            # return context
            context = {}
            context.update(request)
            context.update(context_params)
            return render(
                request,
                "core_explore_oaipmh_app/user/data_sources/list-content.html",
                context,
            )
        else:
            return HttpResponseBadRequest(
                "Error during loading data sources from oaipmh search.")
    except AccessControlError:
        return HttpResponseForbidden()
    except Exception as e:
        return HttpResponseBadRequest(
            "Error during loading data sources from oaipmh search: %s" %
            escape(str(e)))
Exemple #4
0
 def __init__(self):
     super(RequestForm, self).__init__()
     self.data_providers = []
     self.data_providers.append(('0', 'Pick one'))
     self.fields['metadata_prefix'].choices = self.data_providers
     self.fields['set'].choices = self.data_providers
     for o in oai_registry_api.get_all_activated_registry():
         self.data_providers.append((str(o.id)+'|'+o.url, str(o.name)))
     self.fields['data_provider'].choices = self.data_providers
    def build_query(self, query, templates, registries):
        """Build the raw query.

        Args:

            query:
            templates:
            registries:

        Returns:

            The raw query
        """
        # build query builder
        query_builder = OaiPmhQueryBuilder(query, self.sub_document_root)

        if type(templates) is str:
            templates = json.loads(templates)

        if type(registries) is str:
            registries = json.loads(registries)

        # if registries, check if activated
        list_activated_registry = (
            oai_registry_api.get_all_activated_registry().values_list("id"))
        if len(registries) > 0:
            activated_registries = [
                str(id_) for id_ in registries
                if ObjectId(id_) in list_activated_registry
            ]
        else:
            activated_registries = list_activated_registry

        if len(templates) > 0:
            # get list of template ids
            list_template_ids = [template["id"] for template in templates]
            # get all metadata formats used by the registries
            list_metadata_format = (
                oai_harvester_metadata_format_api.get_all_by_list_registry_ids(
                    activated_registries))
            # Filter metadata formats that use the given templates
            list_metadata_formats_id = [
                str(x.id) for x in list_metadata_format
                if x.template is not None
                and str(x.template.id) in list_template_ids
            ]
            query_builder.add_list_metadata_formats_criteria(
                list_metadata_formats_id)
        else:
            # Only activated registries
            query_builder.add_list_registries_criteria(activated_registries)

        # do not include deleted records
        query_builder.add_not_deleted_criteria()
        # create a raw query
        return query_builder.get_raw_query()
Exemple #6
0
    def test_get_all_contains_only_oai_registry(self, mock_get_all):
        """

        Args:
            mock_get_all:

        Returns:

        """
        _generic_get_all_test(self, mock_get_all,
                              registry_api.get_all_activated_registry())
Exemple #7
0
    def __init__(self):
        super(RequestForm, self).__init__()

        default_fields = [("0", "Pick one")]

        self.fields["metadata_prefix"].choices = default_fields
        self.fields["set"].choices = default_fields

        for registry in oai_registry_api.get_all_activated_registry():
            default_fields.append(("%s|%s" % (str(registry.id), registry.url),
                                   str(registry.name)))

        self.fields["data_provider"].choices = default_fields
Exemple #8
0
def init_harvest():
    """ Init harvest process.
    """
    # Revoke all scheduled tasks
    _revoke_all_scheduled_tasks()

    # Init all registry is_queued to False in case of a server reboot after an issue.
    registries = oai_registry_api.get_all_activated_registry()
    for registry in registries:
        registry.is_queued = False
        oai_registry_api.upsert(registry)

    # Watch Registries
    watch_registry_harvest_task.apply_async()
def get_data_source_list_oaipmh(request):
    """ Ajax method to fill the list of data sources.

    Args:
        request:

    Returns:

    """
    try:
        id_query = request.GET.get('id_query', None)

        if id_query is not None:
            # Get query from id
            query = api_query.get_by_id(id_query)
            instance_list = oai_registry_api.get_all_activated_registry(order_by_field='name')
            item_list = []
            url_instance = request.build_absolute_uri(reverse("core_explore_oaipmh_rest_execute_query"))
            for instance_item in instance_list:
                checked = False
                # compare instance with existing data source in query
                # in order to know if they have to be checked
                for data_source_item in query.data_sources:
                    if data_source_item.name == instance_item.name\
                       and data_source_item.url_query == url_instance:
                        checked = True
                        break

                # update the result item list for the context
                item_list.extend([{'instance_id': instance_item.id,
                                   'instance_name': instance_item.name,
                                   'is_checked': checked}])

            # Here, data sources are instances
            context_params = dict()
            context_params['instances'] = item_list

            # return context
            context = {}
            context.update(request)
            context.update(context_params)
            return render(request, 'core_explore_oaipmh_app/user/data_sources/list-content.html', context)
        else:
            return HttpResponseBadRequest("Error during loading data sources from oaipmh search.")
    except Exception as e:
        return HttpResponseBadRequest("Error during loading data sources from oaipmh search: %s" % e.message)