Beispiel #1
0
 def moderate_change_apply(self, change):
     print "Change summary:"
     self.print_changes([change])
     print ""
     answer = raw_input("Do you want to apply this change? [Y/n] ").strip() or "y"
     answer = answer[0].lower()
     print ""
     if answer == 'y':
         return True
     else:
         print 
         answer = raw_input("Do you want to mask changes to this ref? [Y/n] ").strip() or "y"
         answer = answer[0].lower()
         print ""
         if answer == 'y':
             from ckan.model.changeset import ChangemaskRegister, Session
             register = ChangemaskRegister()
             mask = register.create_entity(change.ref)
             Session.add(mask)
             Session.commit()
             print "Mask has been set for ref: %s" % change.ref
             print ""
         else:
             print "Warning: Not setting a mask after not applying changes may lead to conflicts."
             import time
             time.sleep(5)
             print ""
Beispiel #2
0
def get_or_create_doi(package_id):
    '''Create or retrieve the unique identifier for this package_id.

    :param package_id:

    '''
    datacite_api = DOIDataCiteAPI()

    doi = get_doi(package_id)
    if doi is None:
        while True:
            identifier = os.path.join(
                get_prefix(), u'{0:07}'.format(random.randint(1, 100000)))

            # Check this identifier doesn't exist in the table
            if Session.query(DOI).filter(DOI.identifier == identifier).count():
                continue

            # And check against the datacite service
            try:
                datacite_doi = datacite_api.get(identifier)
            except HTTPError:
                pass
            else:
                if datacite_doi.text:
                    continue

            doi = DOI(package_id=package_id, identifier=identifier)
            Session.add(doi)
            Session.commit()
            break

    return doi
 def create(cls, term, label, lang):
     inst = cls(term=term,
                label=label,
                lang=lang)
     Session.add(inst)
     Session.flush()
     return inst
    def test_auth_publisher_profile_different_publisher(self):

        # Create a source for publisher 1
        source = HarvestSource(url=u'http://test-source.com',type='ckan',
                               publisher_id=self.publisher1.id)
        Session.add(source)
        Session.commit()

        extra_environ = {'REMOTE_USER': self.publisher2_user.name.encode('utf8')}

        # List (Publihsers can see the sources list)
        res = self.app.get('/harvest', extra_environ=extra_environ)
        assert 'Harvesting Sources' in res
        # Create
        res = self.app.get('/harvest/new', extra_environ=extra_environ)
        assert 'New harvest source' in res
        assert 'publisher_id' in res

        # Check that this publihser is not allowed to manage sources from other publishers
        status = 401
        # Read
        res = self.app.get('/harvest/%s' % source.id, status=status, extra_environ=extra_environ)
        # Edit
        res = self.app.get('/harvest/edit/%s' % source.id, status=status, extra_environ=extra_environ)
        # Refresh
        res = self.app.get('/harvest/refresh/%s' % source.id, status=status, extra_environ=extra_environ)
Beispiel #5
0
 def moderate_change_apply(self, change):
     print "Change summary:"
     self.print_changes([change])
     print ""
     answer = raw_input("Do you want to apply this change? [Y/n] ").strip() or "y"
     answer = answer[0].lower()
     print ""
     if answer == 'y':
         return True
     else:
         print 
         answer = raw_input("Do you want to mask changes to this ref? [Y/n] ").strip() or "y"
         answer = answer[0].lower()
         print ""
         if answer == 'y':
             from ckan.model.changeset import ChangemaskRegister, Session
             register = ChangemaskRegister()
             mask = register.create_entity(change.ref)
             Session.add(mask)
             Session.commit()
             print "Mask has been set for ref: %s" % change.ref
             print ""
         else:
             print "Warning: Not setting a mask after not applying changes may lead to conflicts."
             import time
             time.sleep(5)
             print ""
 def _refresh_harvest_objects(self, harvest_object, package_id):
     """
     Perform harvester housekeeping:
         - Flag the other objects of the source as not current
         - Set a refernce to the package in the harvest object
         - Flag it as current
         - And save the changes
     """
     # Flag the other objects of this source as not current
     from ckanext.harvest.model import harvest_object_table
     u = update(harvest_object_table) \
         .where(harvest_object_table.c.package_id == bindparam('pkg_id')) \
         .values(current=False)
     Session.execute(u, params={'pkg_id': package_id})
     Session.commit()
     # Refresh current object from session, otherwise the
     # import paster command fails
     # (Copied from the Gemini harvester--not sure if necessary)
     Session.remove()
     Session.add(harvest_object)
     Session.refresh(harvest_object)
     # Set reference to package in the HarvestObject and flag it as
     # the current one
     if not harvest_object.package_id:
         harvest_object.package_id = package_id
     harvest_object.current = True
     harvest_object.save()
Beispiel #7
0
    def handle_submit(self, id):
        data = clean_dict(dict_fns.unflatten(tuplize_dict(parse_params(
            request.params))))

        data['dataset_url'] = toolkit.url_for(
            controller='package',
            action='read',
            id=id,
            qualified=True
        )

        package = get_action('package_show')(None, {'id': id})
        self.fail_if_private(package, data['dataset_url'])

        # Comma separated config var
        to_addrs = config['ckanext.ands.support_emails'].split(',')

        subject = 'DataPortal Support: Request to publish dataset'

        body = base.render(
            'package/doi_email.text',
            extra_vars=data)

        for email in to_addrs:
            mail_recipient('Dataportal support', email, subject, body)

        data['package_id'] = package['id']
        data['user_id'] = c.userobj.id

        doi_request = DoiRequest(**data)
        Session.add(doi_request)
        Session.commit()

        h.flash_success("DOI Request sent")
        return toolkit.redirect_to(data['dataset_url'])
    def test_harvest_update_records(self):

        # Create source
        source_fixture = {"url": u"http://127.0.0.1:8999/single/dataset1.xml", "type": u"gemini-single"}

        source, first_job = self._create_source_and_job(source_fixture)

        first_obj = self._run_job_for_single_document(first_job)

        first_package_dict = get_action("package_show_rest")(self.context, {"id": first_obj.package_id})

        # Package was created
        assert first_package_dict
        assert first_obj.current == True
        assert first_obj.package

        # Create and run a second job, the package should not be updated
        second_job = self._create_job(source.id)

        second_obj = self._run_job_for_single_document(second_job)

        Session.remove()
        Session.add(first_obj)
        Session.add(second_obj)

        Session.refresh(first_obj)
        Session.refresh(second_obj)

        second_package_dict = get_action("package_show_rest")(self.context, {"id": first_obj.package_id})

        # Package was not updated
        assert second_package_dict, first_package_dict["id"] == second_package_dict["id"]
        assert first_package_dict["metadata_modified"] == second_package_dict["metadata_modified"]
        assert not second_obj.package, not second_obj.package_id
        assert second_obj.current == False, first_obj.current == True

        # Create and run a third job, forcing the importing to simulate an update in the package
        third_job = self._create_job(source.id)
        third_obj = self._run_job_for_single_document(third_job, force_import=True)

        # For some reason first_obj does not get updated after the import_stage,
        # and we have to force a refresh to get the actual DB values.
        Session.remove()
        Session.add(first_obj)
        Session.add(second_obj)
        Session.add(third_obj)

        Session.refresh(first_obj)
        Session.refresh(second_obj)
        Session.refresh(third_obj)

        third_package_dict = get_action("package_show_rest")(self.context, {"id": third_obj.package_id})

        # Package was updated
        assert third_package_dict, first_package_dict["id"] == third_package_dict["id"]
        assert third_package_dict["metadata_modified"] > second_package_dict["metadata_modified"]
        assert third_obj.package, third_obj.package_id == first_package_dict["id"]
        assert third_obj.current == True
        assert second_obj.current == False
        assert first_obj.current == False
Beispiel #9
0
def create_doi_from_identifier(package_id, identifier):
    """Can be called when an identifier has already been created elsewhere.
    Does not ensure the identifier is unique"""
    doi = DOI(package_id=package_id, identifier=identifier)
    Session.add(doi)
    Session.commit()
    return doi
Beispiel #10
0
def create_unique_identifier(package_id):
    """
    Create a unique identifier, using the prefix and a random number: 10.5072/0044634
    Checks the random number doesn't exist in the table or the datacite repository
    All unique identifiers are created with
    @return:
    """
    datacite_api = DOIDataCiteAPI()

    while True:

        identifier = os.path.join(get_prefix(), '{0:07}'.format(random.randint(1, 100000)))

        # Check this identifier doesn't exist in the table
        if not Session.query(DOI).filter(DOI.identifier == identifier).count():

            # And check against the datacite service
            try:
                datacite_doi = datacite_api.get(identifier)
            except HTTPError:
                pass
            else:
                if datacite_doi.text:
                    continue

        doi = DOI(package_id=package_id, identifier=identifier)
        Session.add(doi)
        Session.commit()

        return doi
def create_application(data_dict, image):
    if 'image' in data_dict:
        del data_dict['image']
    data, errors = validate(data_dict, application_schema())
    if errors:
        raise ValidationError(errors, error_summary(errors))

    application = Application(
            name=Application.generate_name(data.get('title')),
            title=data.get('title'),
            url=data.get('url'),
            description=data.get('description'),
            featured=data.get('featured'),
            submitter=data.get('submitter'),
            developer=data.get('developer'),
            developer_url=data.get('developer_url'),
            license=data.get('license'),
            code_url=data.get('code_url'),
            api_url=data.get('api_url'),
        )

    tags = data.get('tags', '').split(' ')
    application.update_tags(tags)

    if image and image.filename and image.file:
        image = ApplicationImage(name=image.filename, 
            data=image.file.read(),
            application=application)
        Session.add(image)
    application.save()
    return application
Beispiel #12
0
def record_existing_unique_identifier(package_id, identifier):
    """
    Based on a provided identifier, checks datacite for an existing DOI
    Saves to local CKAN database
    :param package_id: string
    :param identifier: string
    :return DOI object if saved, false if it didn't exist in datacite
    """
    datacite_api = DOIDataCiteAPI()

    # Check this identifier doesn't exist in the table
    existing_doi = Session.query(DOI).filter(DOI.identifier == identifier).first()
    if not existing_doi:
        # And check against the datacite service
        try:
            datacite_doi = datacite_api.get(identifier)
            if datacite_doi.text:
                # Determine whether or not we need to delete a doi that points to the current dataset
                doi_for_this_pkg = Session.query(DOI).filter(DOI.package_id == package_id).first()
                if doi_for_this_pkg:
                    datacite_api
                doi = DOI(package_id=package_id, identifier=identifier)
                Session.add(doi)
                Session.commit()
                return doi
        except HTTPError:
            pass
Beispiel #13
0
def create_unique_identifier(package_id):
    """
    Create a unique identifier, using the prefix and a random number: 10.5072/0044634
    Checks the random number doesn't exist in the table or the datacite repository
    All unique identifiers are created with
    @return:
    """
    datacite_api = DOIDataCiteAPI()

    while True:

        identifier = os.path.join(get_prefix(), '{0:07}'.format(random.randint(1, 100000)))

        # Check this identifier doesn't exist in the table
        if not Session.query(DOI).filter(DOI.identifier == identifier).count():

            # And check against the datacite service
            try:
                datacite_doi = datacite_api.get(identifier)
            except HTTPError:
                pass
            else:
                if datacite_doi.text:
                    continue

        doi = DOI(package_id=package_id, identifier=identifier)
        Session.add(doi)
        Session.commit()

        return doi
Beispiel #14
0
    def test_auth_publisher_profile_different_publisher(self):

        # Create a source for publisher 1
        source = HarvestSource(url=u'http://test-source.com',
                               type='ckan',
                               publisher_id=self.publisher1.id)
        Session.add(source)
        Session.commit()

        extra_environ = {
            'REMOTE_USER': self.publisher2_user.name.encode('utf8')
        }

        # List (Publihsers can see the sources list)
        res = self.app.get('/harvest', extra_environ=extra_environ)
        assert 'Harvesting Sources' in res
        # Create
        res = self.app.get('/harvest/new', extra_environ=extra_environ)
        assert 'New harvest source' in res
        assert 'publisher_id' in res

        # Check that this publihser is not allowed to manage sources from other publishers
        status = 401
        # Read
        res = self.app.get('/harvest/%s' % source.id,
                           status=status,
                           extra_environ=extra_environ)
        # Edit
        res = self.app.get('/harvest/edit/%s' % source.id,
                           status=status,
                           extra_environ=extra_environ)
        # Refresh
        res = self.app.get('/harvest/refresh/%s' % source.id,
                           status=status,
                           extra_environ=extra_environ)
Beispiel #15
0
    def _test_auth_not_allowed(self, user_name=None, source=None, status=401):

        if not source:
            # Create harvest source
            source = HarvestSource(url=u'http://test-source.com', type='ckan')
            Session.add(source)
            Session.commit()

        if user_name:
            extra_environ = {'REMOTE_USER': user_name.encode('utf8')}
        else:
            extra_environ = {}

        # List
        res = self.app.get('/harvest',
                           status=status,
                           extra_environ=extra_environ)
        # Create
        res = self.app.get('/harvest/new',
                           status=status,
                           extra_environ=extra_environ)
        # Read
        res = self.app.get('/harvest/%s' % source.id,
                           status=status,
                           extra_environ=extra_environ)
        # Edit
        res = self.app.get('/harvest/edit/%s' % source.id,
                           status=status,
                           extra_environ=extra_environ)
        # Refresh
        res = self.app.get('/harvest/refresh/%s' % source.id,
                           status=status,
                           extra_environ=extra_environ)
Beispiel #16
0
    def handle_submit(self, id):
        data = clean_dict(
            dict_fns.unflatten(tuplize_dict(parse_params(request.params))))

        data['dataset_url'] = toolkit.url_for(controller='package',
                                              action='read',
                                              id=id,
                                              qualified=True)

        package = get_action('package_show')(None, {'id': id})
        self.fail_if_private(package, data['dataset_url'])

        # Comma separated config var
        to_addrs = config['ckanext.ands.support_emails'].split(',')

        subject = 'DataPortal Support: Request to publish dataset'

        body = base.render('package/doi_email.text', extra_vars=data)

        for email in to_addrs:
            mail_recipient('Dataportal support', email, subject, body)

        data['package_id'] = package['id']
        data['user_id'] = c.userobj.id

        doi_request = DoiRequest(**data)
        Session.add(doi_request)
        Session.commit()

        h.flash_success("DOI Request sent")
        return toolkit.redirect_to(data['dataset_url'])
 def _create_pkg(self):
     model.repo.new_revision()
     pkg = Package.get("annakarenina")
     pkg.name = "fookarenina"
     pkg.add_resource("www.google.com", description="foo", name="foo")
     Session.add(pkg)
     Session.commit()
     return pkg
Beispiel #18
0
 def _create_pkg(self):
     model.repo.new_revision()
     pkg = Package.get('annakarenina')
     pkg.name = "fookarenina"
     pkg.add_resource('www.google.com', description='foo', name="foo")
     Session.add(pkg)
     Session.commit()
     return pkg
Beispiel #19
0
 def save(self, commit=True):
     if not hasattr(self, 'slug') or not self.slug:
         self.slug = slugify(self.name)
     session = Session()
     log.debug(self)
     session.add(self)
     if commit:
         session.commit()
 def create(cls, **kwargs):
     '''
     Create a new record of type :class:`~m`.
     '''
     item_dict = cls._columns(**kwargs)
     new_item = cls.m(**item_dict)
     Session.add(new_item)
     Session.commit()
     return new_item
Beispiel #21
0
    def set_names(self, langs):
        """
        Set translated license names
        """

        self.names = []
        for lang_name, label in langs.items():
            localized = LocalizedLicenseName(license_id=self.id,
                                             lang=lang_name,
                                             label=label)
            Session.add(localized)
Beispiel #22
0
def save_package_extent(package_id, geometry=None, srid=None):
    '''Adds, updates or deletes the package extent geometry.
    
       package_id: Package unique identifier
       geometry: a Python object implementing the Python Geo Interface
                (i.e a loaded GeoJSON object)
       srid: The spatial reference in which the geometry is provided.
             If None, it defaults to the DB srid.
    
       Will throw ValueError if the geometry object does not provide a geo interface.
    
       The responsibility for calling model.Session.commit() is left to the
       caller.

    :param package_id: 
    :param geometry:  (Default value = None)
    :param srid:  (Default value = None)

    '''
    db_srid = int(toolkit.config.get(u'ckan.spatial.srid', u'4326'))

    existing_package_extent = Session.query(PackageExtent).filter(
        PackageExtent.package_id == package_id).first()

    if geometry:
        shape = asShape(geometry)

        if not srid:
            srid = db_srid

        package_extent = PackageExtent(package_id=package_id,
                                       the_geom=WKTElement(shape.wkt, srid))

    # Check if extent exists
    if existing_package_extent:

        # If extent exists but we received no geometry, we'll delete the existing one
        if not geometry:
            existing_package_extent.delete()
            log.debug(u'Deleted extent for package %s' % package_id)
        else:
            # Check if extent changed
            if not compare_geometry_fields(package_extent.the_geom,
                                           existing_package_extent.the_geom):
                # Update extent
                existing_package_extent.the_geom = package_extent.the_geom
                existing_package_extent.save()
                log.debug(u'Updated extent for package %s' % package_id)
            else:
                log.debug(u'Extent for package %s unchanged' % package_id)
    elif geometry:
        # Insert extent
        Session.add(package_extent)
        log.debug(u'Created new extent for package %s' % package_id)
Beispiel #23
0
    def setup(self):
        # Add sysadmin user
        harvest_user = model.User(name=u'harvest', password=u'test', sysadmin=True)
        Session.add(harvest_user)
        Session.commit()

        package_schema = default_update_package_schema()
        self.context ={'model':model,
                       'session':Session,
                       'user':u'harvest',
                       'schema':package_schema,
                       'api_version': '2'}
Beispiel #24
0
 def _create_harvester_info(self, config=True):
     rev = model.repo.new_revision()
     harv = OAIPMHHarvester()
     harvest_job = HarvestJob()
     harvest_job.source = HarvestSource()
     harvest_job.source.title = "Test"
     harvest_job.source.url = "http://helda.helsinki.fi/oai/request"
     if config:
         harvest_job.source.config = '{"query": ""}'
     harvest_job.source.type = "OAI-PMH"
     Session.add(harvest_job)
     return harvest_job, harv
Beispiel #25
0
    def setup(self):
        # Add sysadmin user
        harvest_user = model.User(name=u'harvest', password=u'test', sysadmin=True)
        Session.add(harvest_user)
        Session.commit()

        package_schema = default_update_package_schema()
        self.context ={'model':model,
                       'session':Session,
                       'user':u'harvest',
                       'schema':package_schema,
                       'api_version': '2'}
Beispiel #26
0
 def test_00duplicate_authz_group(self):
     group1 = AuthorizationGroup(name="1234567 My group")
     Session.add(group1)
     Session.commit()
     group2 = AuthorizationGroup(name="1234567 My group")
     Session.add(group2)
     Session.commit()
     plugin = self._makeOne()
     environ = {"REQUEST_METHOD": "GET", "QUERY_STRING": "oauth_token=foo", "ckan.who.oauth.challenge": "1"}
     self.assertRaises(AssertionError, plugin.identify, environ)
     group1.delete()
     group2.delete()
     Session.commit()
 def create(cls, identifier, package_id, published=None):
     '''
     Create a new record in the DOI table.
     :param identifier: a new DOI string
     :param package_id: the id of the package this DOI represents
     :param published: when this DOI was published (datetime, nullable)
     :return: the newly created record object
     '''
     new_record = DOI(identifier=identifier,
                      package_id=package_id,
                      published=published)
     Session.add(new_record)
     Session.commit()
     return new_record
Beispiel #28
0
    def setup(self):

        model.Session.remove()
        CreateTestData.create(auth_profile='publisher')
        self.sysadmin_user = model.User.get('testsysadmin')
        self.normal_user = model.User.get(
            'annafan')  # Does not belong to a publisher
        self.publisher1_user = model.User.by_name('russianfan')
        self.publisher2_user = model.User.by_name('tester')

        # Create two Publishers
        rev = model.repo.new_revision()
        self.publisher1 = model.Group(name=u'test-publisher1',
                                      title=u'Test Publihser 1',
                                      type=u'publisher')
        Session.add(self.publisher1)
        self.publisher2 = model.Group(name=u'test-publisher2',
                                      title=u'Test Publihser 2',
                                      type=u'publisher')
        Session.add(self.publisher2)

        member1 = model.Member(table_name='user',
                               table_id=self.publisher1_user.id,
                               group=self.publisher1,
                               capacity='admin')
        Session.add(member1)
        member2 = model.Member(table_name='user',
                               table_id=self.publisher2_user.id,
                               group=self.publisher2,
                               capacity='admin')
        Session.add(member2)

        Session.commit()
Beispiel #29
0
def update_resource_stats(resource_id, total_visits, total_downloads):
    q_result = Session.query(PiwikResource).filter(PiwikResource.resource_id == resource_id).first()
    if q_result:
        # update if resource is already in table
        q_result.total_visits = total_visits
        q_result.total_downloads = total_downloads
    else:
        # add new entry for resource
        rt = PiwikResource()
        rt.resource_id = resource_id
        rt.total_visits = total_visits
        rt.total_downloads = total_downloads
        Session.add(rt)
    Session.commit()
Beispiel #30
0
def update_resource_stats(resource_id, total_visits, total_downloads):
    q_result = Session.query(PiwikResource).filter(PiwikResource.resource_id == resource_id).first()
    if q_result:
        # update if resource is already in table
        q_result.total_visits = total_visits
        q_result.total_downloads = total_downloads
    else:
        # add new entry for resource
        rt = PiwikResource()
        rt.resource_id = resource_id
        rt.total_visits = total_visits
        rt.total_downloads = total_downloads
        Session.add(rt)
    Session.commit()
Beispiel #31
0
    def setup(self):

        # Add sysadmin user
        harvest_user = model.User(name=u"harvest", password=u"test")
        model.add_user_to_role(harvest_user, model.Role.ADMIN, model.System())
        Session.add(harvest_user)
        Session.commit()

        package_schema = default_update_package_schema()
        self.context = {
            "model": model,
            "session": Session,
            "user": u"harvest",
            "schema": package_schema,
            "api_version": "2",
        }

        if config.get("ckan.harvest.auth.profile") == u"publisher":
            # Create a publisher user
            rev = model.repo.new_revision()
            self.publisher_user = model.User(name=u"test-publisher-user", password=u"test")
            self.publisher = model.Group(name=u"test-publisher", title=u"Test Publihser", type=u"publisher")
            Session.add(self.publisher_user)
            Session.add(self.publisher)

            Session.commit()

            member = model.Member(
                table_name="user", table_id=self.publisher_user.id, group=self.publisher, capacity="admin"
            )
            Session.add(member)

            Session.commit()
    def setup(self):
        # Add sysadmin user
        harvest_user = model.User(name=u'harvest', password=u'test')
        model.add_user_to_role(harvest_user, model.Role.ADMIN, model.System())
        Session.add(harvest_user)
        Session.commit()

        package_schema = default_update_package_schema()
        self.context ={'model':model,
                       'session':Session,
                       'user':u'harvest',
                       'schema':package_schema,
                       'api_version': '2'}

        if config.get('ckan.harvest.auth.profile') == u'publisher':
            # Create a publisher user
            rev = model.repo.new_revision()
            self.publisher_user = model.User(name=u'test-publisher-user',password=u'test')
            self.publisher = model.Group(name=u'test-publisher',title=u'Test Publihser',type=u'publisher')
            Session.add(self.publisher_user)
            Session.add(self.publisher)

            Session.commit()

            member = model.Member(table_name = 'user',
                             table_id = self.publisher_user.id,
                             group=self.publisher,
                             capacity='admin')
            Session.add(member)

            Session.commit()
Beispiel #33
0
 def set_parent(self, parent_uri):
     """
     Set parent for given license
     """
     parent = License.get(parent_uri)
     if not parent:
         raise ValueError("No parent %s object" % parent_uri)
     self.parent_id = parent.id
     Session.add(self)
     try:
         rev = Session.revision
     except AttributeError:
         rev = None
     Session.flush()
Beispiel #34
0
def create_package_doi(pkg_dict):
    """
    Create a unique identifier, using the prefix and a random number: 10.5072/0044634
    Checks the random number doesn't exist in the table or the datacite repository
    All unique identifiers are created with
    @return:
    """
    _validate_package(pkg_dict)
    identifier = _create_unique_identifier()
    package_doi = CeonPackageDOI(package_id=pkg_dict['id'], identifier=identifier)
    Session.add(package_doi)
    Session.commit()
    log.debug(u"Created DOI {} for package {}".format(package_doi.identifier, pkg_dict['id']))
    return package_doi
    def setup(self):

        model.Session.remove()
        CreateTestData.create(auth_profile='publisher')
        self.sysadmin_user = model.User.get('testsysadmin')
        self.normal_user = model.User.get('annafan') # Does not belong to a publisher
        self.publisher1_user = model.User.by_name('russianfan')
        self.publisher2_user = model.User.by_name('tester')

        # Create two Publishers
        rev = model.repo.new_revision()
        self.publisher1 = model.Group(name=u'test-publisher1',title=u'Test Publihser 1',type=u'publisher')
        Session.add(self.publisher1)
        self.publisher2 = model.Group(name=u'test-publisher2',title=u'Test Publihser 2',type=u'publisher')
        Session.add(self.publisher2)

        member1 = model.Member(table_name = 'user',
                         table_id = self.publisher1_user.id,
                         group=self.publisher1,
                         capacity='admin')
        Session.add(member1)
        member2 = model.Member(table_name = 'user',
                         table_id = self.publisher2_user.id,
                         group=self.publisher2,
                         capacity='admin')
        Session.add(member2)

        Session.commit()
 def _create_harvester(self, config=True):
     harv = DDIHarvester()
     harv.config = "{}"
     harvest_job = HarvestJob()
     harvest_job.source = HarvestSource()
     harvest_job.source.title = "Test"
     harvest_job.source.url = "http://foo"
     if config:
         harvest_job.source.config = ''
     else:
         harvest_job.source.config = None
     harvest_job.source.type = "DDI"
     Session.add(harvest_job)
     return harv, harvest_job
Beispiel #37
0
def save_package_extent(package_id, geometry = None, srid = None):
    '''Adds, updates or deletes the package extent geometry.

       package_id: Package unique identifier
       geometry: a Python object implementing the Python Geo Interface
                (i.e a loaded GeoJSON object)
       srid: The spatial reference in which the geometry is provided.
             If None, it defaults to the DB srid.

       Will throw ValueError if the geometry object does not provide a geo interface.

       The responsibility for calling model.Session.commit() is left to the
       caller.
    '''
    db_srid = int(config.get('ckan.spatial.srid', '4326'))


    existing_package_extent = Session.query(PackageExtent).filter(PackageExtent.package_id==package_id).first()

    if geometry:
        shape = asShape(geometry)

        if not srid:
            srid = db_srid

        package_extent = PackageExtent(package_id=package_id,
                                       the_geom=WKTElement(shape.wkt, srid))

    # Check if extent exists
    if existing_package_extent:

        # If extent exists but we received no geometry, we'll delete the existing one
        if not geometry:
            existing_package_extent.delete()
            log.debug('Deleted extent for package %s' % package_id)
        else:
            # Check if extent changed
            if not compare_geometry_fields(package_extent.the_geom, existing_package_extent.the_geom):
                # Update extent
                existing_package_extent.the_geom = package_extent.the_geom
                existing_package_extent.save()
                log.debug('Updated extent for package %s' % package_id)
            else:
                log.debug('Extent for package %s unchanged' % package_id)
    elif geometry:
        # Insert extent
        Session.add(package_extent)
        log.debug('Created new extent for package %s' % package_id)
Beispiel #38
0
def update_package_stats(package_name, total_visits, recent_visits):
    q_result = Session.query(PiwikPackage).filter(PiwikPackage.package_name == package_name).first()

    if q_result:
        # update if package is already in table
        q_result.total_visits = total_visits
        q_result.recent_visits = recent_visits
        Session.commit()
    else:
        # add new entry for package
        pt = PiwikPackage()
        pt.package_name = package_name
        pt.total_visits = total_visits
        pt.recent_visits = recent_visits
        Session.add(pt)
        Session.commit()
Beispiel #39
0
 def setup(self):
     super(FisbrokerTestBase, self).setup()
     reset_mock_server()
     # Add sysadmin user
     user_name = u'harvest'
     harvest_user = model.User(name=user_name, password=u'test', sysadmin=True)
     Session.add(harvest_user)
     Session.commit()
     package_schema = default_update_package_schema()
     self.context = {
         'model': model,
         'session': Session,
         'user': user_name,
         'schema': package_schema,
         'api_version': '2'
     }
Beispiel #40
0
def create_package_doi(pkg_dict):
    """
    Create a unique identifier, using the prefix and a random number: 10.5072/0044634
    Checks the random number doesn't exist in the table or the datacite repository
    All unique identifiers are created with
    @return:
    """
    _validate_package(pkg_dict)
    identifier = _create_unique_identifier()
    package_doi = CeonPackageDOI(package_id=pkg_dict['id'],
                                 identifier=identifier)
    Session.add(package_doi)
    Session.commit()
    log.info(u"Created DOI {} for package {}".format(package_doi.identifier,
                                                     pkg_dict['id']))
    return package_doi
Beispiel #41
0
def update_package_stats(package_name, total_visits, recent_visits):
    q_result = Session.query(PiwikPackage).filter(PiwikPackage.package_name == package_name).first()

    if q_result:
        # update if package is already in table
        q_result.total_visits = total_visits
        q_result.recent_visits = recent_visits
        Session.commit()
    else:
        # add new entry for package
        pt = PiwikPackage()
        pt.package_name = package_name
        pt.total_visits = total_visits
        pt.recent_visits = recent_visits
        Session.add(pt)
        Session.commit()
    def create(cls, vocab, name, labels=None, parent=None, properties=None):
        if not isinstance(vocab, Vocabulary):
            vocab = Vocabulary.get(vocab)
        
        inst = cls(vocabulary=vocab,
                   name=name,
                   depth=parent.depth +1 if parent else 0,
                   parent=parent)
        inst.properties = properties or {}
        if labels:
            inst.set_labels(labels)

        inst.update_path()
        Session.add(inst)
        Session.flush()
        return inst
def save_package_extent(package_id, geometry = None, srid = None):
    '''Adds, updates or deletes the package extent geometry.

       package_id: Package unique identifier
       geometry: a Python object implementing the Python Geo Interface
                (i.e a loaded GeoJSON object)
       srid: The spatial reference in which the geometry is provided.
             If None, it defaults to the DB srid.

       Will throw ValueError if the geometry object does not provide a geo interface.

    '''
    db_srid = int(config.get('ckan.spatial.srid', '4326'))


    existing_package_extent = Session.query(PackageExtent).filter(PackageExtent.package_id==package_id).first()

    if geometry:
        shape = asShape(geometry)

        if not srid:
            srid = db_srid

        package_extent = PackageExtent(package_id=package_id,the_geom=WKTSpatialElement(shape.wkt, srid))

    # Check if extent exists
    if existing_package_extent:

        # If extent exists but we received no geometry, we'll delete the existing one
        if not geometry:
            existing_package_extent.delete()
            log.debug('Deleted extent for package %s' % package_id)
        else:
            # Check if extent changed
            if Session.scalar(package_extent.the_geom.wkt) <> Session.scalar(existing_package_extent.the_geom.wkt):
                # Update extent
                existing_package_extent.the_geom = package_extent.the_geom
                existing_package_extent.save()
                log.debug('Updated extent for package %s' % package_id)
            else:
                log.debug('Extent for package %s unchanged' % package_id)
    elif geometry:
        # Insert extent
        Session.add(package_extent)
        log.debug('Created new extent for package %s' % package_id)
Beispiel #44
0
def create_resource_doi(pkg_dict, res_dict):
    """
    Create a unique identifier, using the prefix and a random number: 10.5072/0044634
    Checks the random number doesn't exist in the table or the datacite repository
    All unique identifiers are created with
    @return:
    """
    _validate_resource(res_dict)
    resource_id = res_dict['id']
    package_doi = CeonPackageDOI.get(pkg_dict['id'])
    if not package_doi:
        create_package_doi(pkg_dict)
    identifier = _create_unique_identifier(package_doi.identifier)
    resource_doi = CeonResourceDOI(resource_id=resource_id, identifier=identifier)
    Session.add(resource_doi)
    Session.commit()
    log.debug(u"Created DOI {} for resource {}".format(resource_doi.identifier, res_dict['id']))
    return resource_doi
Beispiel #45
0
def map_nonconformant_groups(harvest_object):
    """
    Adds themes to fetched data
    """
    themes_data = _load_mapping_data()
    if not themes_data:
        return

    data = json.loads(harvest_object.content)
    _groups = data.get('groups')
    if not _groups:
        return

    groups = [g['name'] for g in _groups]
    groups.extend([g['display_name'] for g in _groups if 'display_name' in g])

    new_themes = _get_new_themes(groups, themes_data, add_existing=False)
    if not new_themes:
        return

    # ensure themes are upper-case, otherwise will be discarded
    # by validators
    tdata = {'key': 'theme', 'value': _encode_list(new_themes).upper()}
    existing = False
    extra = data.get('extras') or []
    for eitem in extra:
        if eitem['key'] == 'theme':
            existing = True
            eitem['value'] = tdata['value']
            break

    if not existing:
        extra.append(tdata)
    data['extras'] = extra
    data['theme'] = tdata['value']

    harvest_object.content = json.dumps(data)
    Session.add(harvest_object)
    try:
        rev = Session.revision
    except AttributeError:
        rev = None
    Session.flush()
    Session.revision = rev
    def updateValidationJobStatus(self,
                                  session=None,
                                  resource_id=None,
                                  status=None,
                                  report=None,
                                  error=None,
                                  validationRecord=None):
        # type: (object, Session, str, str, object, object) -> model.Validation
        """
        If report or error is attached, update finished to be now
        :param self:
        :param session Session
        :param resource_id:
        :param status:
        :param report:
        :param error:
        :return:
        """
        log.debug("updateValidationJobStatus: %s status: %s", resource_id,
                  status)
        if validationRecord is None:
            validationRecord = self.getValidationJob(session, resource_id)

        if validationRecord is None:
            log.error("record not found to update statues: %s", resource_id)
            raise ValidationJobDoesNotExist()

        # Handle already running status in in last hour
        if status == StatusTypes.running and validationRecord.status == status:
            if self.getHoursSince(validationRecord.created) < 1:
                raise ValidationJobAlreadyRunning()

        validationRecord.status = status
        validationRecord.report = report
        validationRecord.error = error
        if status in (StatusTypes.success, StatusTypes.failure,
                      StatusTypes.error):
            validationRecord.finished = datetime.datetime.utcnow()

        Session.add(validationRecord)
        Session.commit()
        # Flush so other transactions are not waiting
        Session.flush()
        return validationRecord
Beispiel #47
0
def init_db():
    """
    Create board, app, mark tables in the database.
    Prepopulate category table with default data.
    """
    if not model.package_table.exists():
        # during tests?
        return
    session = Session()
    for table in [board_table, app_table, mark_table]:
        if not table.exists():
            table.create(checkfirst=True)
            log.debug("Apps {} have been created".format(table.name))

    for board_name, board_desc in DEFAULT_BOARDS.iteritems():
        if not Board.get_by_slug(slugify(board_name)):
            board = Board()
            board.name = board_name
            board.slug = slugify(board_name)
            board.description = board_desc
            session.add(board)
            log.debug("Add {0} to {1} table".format(board_name,
                                                    board_table.name))
            session.commit()

    if not migration_table.exists():
        migration_table.create(checkfirst=True)
        session.commit()
    migration_number = session.query(migration_table).count()
    log.debug('Migration number: %s', migration_number)
    migration_sql_list = []
    for counter, sql in enumerate(migration_sql_list, start=1):
        if migration_number < counter:
            try:
                session.execute(sql)
            except ProgrammingError:
                session.rollback()
            finally:
                session.execute(migration_table.insert())
                session.commit()

    session.close()
Beispiel #48
0
 def authenticate(self, environ, identity):
     if 'repoze.who.plugins.openid.userid' in identity:
         openid = identity.get('repoze.who.plugins.openid.userid')
         user = User.by_openid(openid)
         if user is None:
             # TODO: Implement a mask to ask for an alternative user 
             # name instead of just using the OpenID identifier. 
             name = identity.get('repoze.who.plugins.openid.nickname')
             if not User.check_name_valid(name):
                 name = openid
             if not User.check_name_available(name):
                 name = openid
             user = User(openid=openid, name=name,
                     fullname=identity.get('repoze.who.plugins.openid.fullname'),
                     email=identity.get('repoze.who.plugins.openid.email'))
             Session.add(user)
             Session.commit()
             Session.remove()
         return user.name
     return None
Beispiel #49
0
def create_resource_doi(pkg_dict, res_dict):
    """
    Create a unique identifier, using the prefix and a random number: 10.5072/0044634
    Checks the random number doesn't exist in the table or the datacite repository
    All unique identifiers are created with
    @return:
    """
    _validate_resource(res_dict)
    resource_id = res_dict['id']
    package_doi = CeonPackageDOI.get(pkg_dict['id'])
    if not package_doi:
        create_package_doi(pkg_dict)
    identifier = _create_unique_identifier(package_doi.identifier)
    resource_doi = CeonResourceDOI(resource_id=resource_id,
                                   identifier=identifier)
    Session.add(resource_doi)
    Session.commit()
    log.info(u"Created DOI {} for resource {}".format(resource_doi.identifier,
                                                      res_dict['id']))
    return resource_doi
    def _test_auth_not_allowed(self, user_name=None, source=None, status=401):

        if not source:
            # Create harvest source
            source = HarvestSource(url=u'http://test-source.com', type='ckan')
            Session.add(source)
            Session.commit()

        if user_name:
            extra_environ = {'REMOTE_USER': user_name.encode('utf8')}
        else:
            extra_environ = {}

        # List
        res = self.app.get(
            '/harvest',
            status=status,
            extra_environ=extra_environ)
        # Create
        res = self.app.get(
            '/harvest/new',
            status=status,
            extra_environ=extra_environ)
        # Read
        res = self.app.get(
            '/harvest/%s' %
            source.id,
            status=status,
            extra_environ=extra_environ)
        # Edit
        res = self.app.get(
            '/harvest/edit/%s' %
            source.id,
            status=status,
            extra_environ=extra_environ)
        # Refresh
        res = self.app.get(
            '/harvest/refresh/%s' %
            source.id,
            status=status,
            extra_environ=extra_environ)
Beispiel #51
0
    def from_data(cls,
                  license_type,
                  version,
                  uri,
                  path,
                  document_uri,
                  rank_order,
                  names,
                  default_lang=None,
                  parent=None):

        if default_lang is None:
            default_lang = 'it'
        default_name = names[default_lang]

        if parent is not None:
            parent_inst = Session.query(License).filter_by(
                uri=str(parent)).first()
            if parent_inst:
                parent = parent_inst.id

        inst = cls(license_type=license_type,
                   version=version,
                   uri=uri,
                   path=path,
                   document_uri=document_uri,
                   rank_order=rank_order,
                   parent_id=parent,
                   default_name=default_name)
        Session.add(inst)
        try:
            rev = Session.revision
        except AttributeError:
            rev = None
        Session.flush()
        Session.revision = rev
        inst.set_names(names)
        Session.flush()
        Session.revision = rev
        return inst
Beispiel #52
0
    def test_zaincremental_harvester(self):

        client = CKANServer()
        metadata_registry = metadata.MetadataRegistry()
        metadata_registry.registerReader('oai_dc', oai_dc_reader)
        metadata_registry.registerWriter('oai_dc', oai_dc_writer)
        serv = BatchingServer(client, metadata_registry=metadata_registry)
        oaipmh.client.Client = mock.Mock(return_value=ServerClient(serv, metadata_registry))
        harv = OAIPMHHarvester()
        harvest_job = HarvestJob()
        harvest_job.source = HarvestSource()
        harvest_job.source.title = "Test"
        harvest_job.source.url = "http://helda.helsinki.fi/oai/request"
        harvest_job.gather_started = ((datetime.now() + timedelta(days=1)))
        harvest_job.source.config = '{"incremental":"True"}'
        harvest_job.source.type = "OAI-PMH"
        Session.add(harvest_job)
        rev = model.repo.new_revision()
        rev.timestamp = ((datetime.now() + timedelta(days=2)))
        pkg = Package(name='footest', revision=rev)
        Session.add(pkg)
        pkg.save()
        roger = Group.get('roger')
        roger.add_package_by_name('footest')
        Session.add(roger)
        roger.save()
        gathered = harv.gather_stage(harvest_job)
        harvest_object = HarvestObject.get(gathered[0])
        harv.fetch_stage(harvest_object)
        harvobj = json.loads(harvest_object.content)
        self.assert_(harvobj['records'])
Beispiel #53
0
    def _get_or_create_user(self, env):
        # WSGI Variables
        # Shib-Application-ID            'default'
        # Shib-Authentication-Instant    '2012-08-13T12:04:22.492Z'
        # Shib-Authentication-Method     'urn:oasis:names:tc:SAML:2.0:ac:classes:PasswordProtectedTransport'
        # Shib-AuthnContext-Class        'urn:oasis:names:tc:SAML:2.0:ac:classes:PasswordProtectedTransport'
        # Shib-Identity-Provider         'https://idp.example.com/idp/shibboleth'
        # Shib-Session-ID                '_7ec5a681e6dbae627c1cefcc7cb4d56a'
        # Shib-Session-Index             '39dafd8477850f5e0b968e3561570197f2109948c1d374a7a2b4c9a7adbf8628'
        # cn                             'My Other Self'
        # givenName                      'My Other Self'
        # mail                           '*****@*****.**'

        email = env.get(self.mail, None)
        fullname = env.get(self.name, None)

        if not email or not fullname:
            log.debug("Environ does not contain mail or cn attributes, user not loaded.")
            return None

        user = meta.Session.query(User).autoflush(False).filter_by(openid=email).first()

        if user is None:
            log.debug("User does not exists, creating new one.")

            import re

            username = re.sub("[.@]", "_", email)

            user = User(name=username, fullname=fullname, email=email, openid=email)

            Session.add(user)
            Session.commit()
            Session.remove()

            log.debug("Created new user %s" % fullname)

        return user
Beispiel #54
0
    def preauthenticate(self, environ, identity):
        # turn the oauth identity into a CKAN one; set it in our identity
        import oauth2 as oauth
        try:
            access_token = dict(urlparse.parse_qsl(identity['userdata']))
            oauth_token = access_token['oauth_token']
            oauth_token_secret = access_token['oauth_token_secret']
        except KeyError:
            return None
        access_token = oauth.Token(oauth_token,
                                   oauth_token_secret)
        client = oauth.Client(self.consumer, access_token)
        resp, content = client.request(self.user_url, "GET")
        data = json.loads(content)
        user_id = data['id']
        logging.info("Preauth: Got oauth user data for user %s" % user_id)
        user = User.by_openid(user_id)
        if user is None:
            user = User(openid=user_id,
                        name=data['id'],
                        fullname=data['name'],
                        email=data['mail'])
            Session.add(user)
        else:
            user.fullname = data['name'] # if the name is updated
        Session.commit()
        Session.remove()
        logging.info("Preauth: Created new/updated user %s" % user_id)

        # deal with groups
        user_groups = data['groups']
        _sync_auth_groups(user, user_groups)
        name = user.name.encode("utf8")
        logging.info("Preauth: Returning user identifier %s" % name)
        identity['repoze.who.userid'] = name 
        return identity
Beispiel #55
0
    def test_harvest_different_sources_same_document(self):

        # Create source1
        source1_fixture = {"url": u"http://127.0.0.1:8999/single/source1/same_dataset.xml", "type": u"gemini-single"}

        source1, first_job = self._create_source_and_job(source1_fixture)

        first_obj = self._run_job_for_single_document(first_job)

        first_package_dict = get_action("package_show_rest")(self.context, {"id": first_obj.package_id})

        # Package was created
        assert first_package_dict
        assert first_package_dict["state"] == u"active"
        assert first_obj.current == True

        # Harvest the same document, unchanged, from another source, the package
        # is not updated.
        # (As of https://github.com/okfn/ckanext-inspire/commit/9fb67
        # we are no longer throwing an exception when this happens)
        source2_fixture = {"url": u"http://127.0.0.1:8999/single/source2/same_dataset.xml", "type": u"gemini-single"}

        source2, second_job = self._create_source_and_job(source2_fixture)

        second_obj = self._run_job_for_single_document(second_job)

        second_package_dict = get_action("package_show_rest")(self.context, {"id": first_obj.package_id})

        # Package was not updated
        assert second_package_dict, first_package_dict["id"] == second_package_dict["id"]
        assert first_package_dict["metadata_modified"] == second_package_dict["metadata_modified"]
        assert not second_obj.package, not second_obj.package_id
        assert second_obj.current == False, first_obj.current == True

        # Inactivate source1 and reharvest from source2, package should be updated
        third_job = self._create_job(source2.id)
        third_obj = self._run_job_for_single_document(third_job, force_import=True)

        Session.remove()
        Session.add(first_obj)
        Session.add(second_obj)
        Session.add(third_obj)

        Session.refresh(first_obj)
        Session.refresh(second_obj)
        Session.refresh(third_obj)

        third_package_dict = get_action("package_show_rest")(self.context, {"id": first_obj.package_id})

        # Package was updated
        assert third_package_dict, first_package_dict["id"] == third_package_dict["id"]
        assert third_package_dict["metadata_modified"] > second_package_dict["metadata_modified"]
        assert third_obj.package, third_obj.package_id == first_package_dict["id"]
        assert third_obj.current == True
        assert second_obj.current == False
        assert first_obj.current == False
 def test_zzcomplete(self):
     raise SkipTest('Takes ages, do not run')
     urllib2.urlopen = realopen
     harv = DDIHarvester()
     harv.config = "{}"
     harvest_job = HarvestJob()
     harvest_job.source = HarvestSource()
     harvest_job.source.title = "Test"
     harvest_job.source.url = "http://www.fsd.uta.fi/fi/aineistot/luettelo/fsd-ddi-records-uris-fi.txt"
     harvest_job.source.config = ''
     harvest_job.source.type = "DDI"
     Session.add(harvest_job)
     gathered = harv.gather_stage(harvest_job)
     diffs = []
     for gath in gathered:
         harvest_object = HarvestObject.get(gath)
         print json.loads(harvest_object.content)['url']
         before = datetime.now()
         harv.fetch_stage(harvest_object)
         harv.import_stage(harvest_object)
         diff = datetime.now() - before
         print diff
         diffs.append(diff)
     print sum(diffs, timedelta)
Beispiel #57
0
    def test_harvest_import_command(self):

        # Create source
        source_fixture = {
            'title': 'Test Source',
            'name': 'test-source',
            'url': u'http://127.0.0.1:8999/gemini2.1/dataset1.xml',
            'source_type': u'gemini-single'
        }

        source, first_job = self._create_source_and_job(source_fixture)

        first_obj = self._run_job_for_single_document(first_job)

        before_package_dict = get_action('package_show_rest')(self.context,{'id':first_obj.package_id})

        # Package was created
        assert before_package_dict
        assert first_obj.current == True
        assert first_obj.package

        # Create and run two more jobs, the package should not be updated
        second_job = self._create_job(source.id)
        second_obj = self._run_job_for_single_document(second_job)
        third_job = self._create_job(source.id)
        third_obj = self._run_job_for_single_document(third_job)

        # Run the import command manually
        imported_objects = get_action('harvest_objects_import')(self.context,{'source_id':source.id})
        Session.remove()
        Session.add(first_obj)
        Session.add(second_obj)
        Session.add(third_obj)

        Session.refresh(first_obj)
        Session.refresh(second_obj)
        Session.refresh(third_obj)

        after_package_dict = get_action('package_show_rest')(self.context,{'id':first_obj.package_id})

        # Package was updated, and the current object remains the same
        assert after_package_dict, before_package_dict['id'] == after_package_dict['id']
        assert after_package_dict['metadata_modified'] > before_package_dict['metadata_modified']
        assert third_obj.current == False
        assert second_obj.current == False
        assert first_obj.current == True


        source_dict = get_action('harvest_source_show')(self.context,{'id':source.id})
        assert source_dict['status']['total_datasets'] == 1
Beispiel #58
0
    def test_harvest_import_command(self):

        # Create source
        source_fixture = {"url": u"http://127.0.0.1:8999/single/dataset1.xml", "type": u"gemini-single"}

        source, first_job = self._create_source_and_job(source_fixture)

        first_obj = self._run_job_for_single_document(first_job)

        before_package_dict = get_action("package_show_rest")(self.context, {"id": first_obj.package_id})

        # Package was created
        assert before_package_dict
        assert first_obj.current == True
        assert first_obj.package

        # Create and run two more jobs, the package should not be updated
        second_job = self._create_job(source.id)
        second_obj = self._run_job_for_single_document(second_job)
        third_job = self._create_job(source.id)
        third_obj = self._run_job_for_single_document(third_job)

        # Run the import command manually
        imported_objects = get_action("harvest_objects_import")(self.context, {"source_id": source.id})
        Session.remove()
        Session.add(first_obj)
        Session.add(second_obj)
        Session.add(third_obj)

        Session.refresh(first_obj)
        Session.refresh(second_obj)
        Session.refresh(third_obj)

        after_package_dict = get_action("package_show_rest")(self.context, {"id": imported_objects[0]["package_id"]})

        # Package was updated, and the current object remains the same
        assert after_package_dict, before_package_dict["id"] == after_package_dict["id"]
        assert after_package_dict["metadata_modified"] > before_package_dict["metadata_modified"]
        assert third_obj.current == False
        assert second_obj.current == False
        assert first_obj.current == True

        source_dict = get_action("harvest_source_show")(self.context, {"id": source.id})
        assert len(source_dict["status"]["packages"]) == 1
Beispiel #59
0
    def test_harvest_deleted_record(self):

        # Create source
        source_fixture = {"url": u"http://127.0.0.1:8999/single/service1.xml", "type": u"gemini-single"}

        source, first_job = self._create_source_and_job(source_fixture)

        first_obj = self._run_job_for_single_document(first_job)

        first_package_dict = get_action("package_show_rest")(self.context, {"id": first_obj.package_id})

        # Package was created
        assert first_package_dict
        assert first_package_dict["state"] == u"active"
        assert first_obj.current == True

        # Delete package
        first_package_dict["state"] = u"deleted"
        self.context.update({"id": first_package_dict["id"]})
        updated_package_dict = get_action("package_update_rest")(self.context, first_package_dict)

        # Create and run a second job, the date has not changed, so the package should not be updated
        # and remain deleted
        first_job.status = u"Finished"
        first_job.save()
        second_job = self._create_job(source.id)

        second_obj = self._run_job_for_single_document(second_job)

        second_package_dict = get_action("package_show_rest")(self.context, {"id": first_obj.package_id})

        # Package was not updated
        assert second_package_dict, updated_package_dict["id"] == second_package_dict["id"]
        assert not second_obj.package, not second_obj.package_id
        assert second_obj.current == False, first_obj.current == True

        # Harvest an updated document, with a more recent modified date, package should be
        # updated and reactivated
        source.url = u"http://127.0.0.1:8999/single/service1_newer.xml"
        source.save()

        third_job = self._create_job(source.id)

        third_obj = self._run_job_for_single_document(third_job)

        third_package_dict = get_action("package_show_rest")(self.context, {"id": first_obj.package_id})

        Session.remove()
        Session.add(first_obj)
        Session.add(second_obj)
        Session.add(third_obj)

        Session.refresh(first_obj)
        Session.refresh(second_obj)
        Session.refresh(third_obj)

        # Package was updated
        assert third_package_dict, third_package_dict["id"] == second_package_dict["id"]
        assert third_obj.package, third_obj.package
        assert third_obj.current == True, second_obj.current == False
        assert first_obj.current == False

        assert "NEWER" in third_package_dict["title"]
        assert third_package_dict["state"] == u"active"