Example #1
0
    def crawl(self):
        url = urljoin(self.host, '/ticket/all_closed/?format=json')
        collection = Collection.by_foreign_id(url, {
            'label': 'Investigative Dashboard Requests'
        })
        Permission.grant_foreign(collection, 'idashboard:occrp_staff',
                                 True, False)
        existing_entities = []
        terms = set()
        db.session.flush()
        for endpoint in ['all_closed', 'all_open']:
            url = urljoin(self.host, '/ticket/%s/?format=json' % endpoint)
            data = self.session.get(url).json()
            print url
            continue

            for req in data.get('paginator', {}).get('object_list'):
                ent = self.update_entity(req, collection)
                if ent is not None:
                    terms.update(ent.terms)
                    existing_entities.append(ent.id)
                    log.info("  # %s", ent.name)

        for entity in collection.entities:
            if entity.id not in existing_entities:
                entity.delete()
        self.emit_collection(collection, terms)
Example #2
0
File: spindle.py Project: 01-/aleph
    def crawl_collection(self, collection):
        if not len(collection.get('subjects', [])):
            return
        url = urljoin(self.URL, '/api/collections/%s' % collection.get('id'))
        collection = Collection.by_foreign_id(url, {
            'label': collection.get('title')
        })
        res = requests.get('%s/permissions' % url, headers=self.HEADERS)
        for perm in res.json().get('results', []):
            Permission.grant_foreign(collection, perm.get('role'),
                                     perm.get('read'), perm.get('write'))

        log.info(" > Spindle collection: %s", collection.label)
        res = requests.get('%s/entities' % url, headers=self.HEADERS)
        terms = set()
        existing_entities = []
        for entity in res.json().get('results', []):
            if entity.get('name') is None:
                continue
            aliases = [on.get('alias') for on in entity.get('other_names', [])]
            ent = Entity.by_foreign_id(entity.get('id'), collection, {
                'name': entity.get('name'),
                'category': SCHEMATA.get(entity.get('$schema'), OTHER),
                'data': entity,
                'selectors': aliases
            })
            terms.update(ent.terms)
            existing_entities.append(ent.id)
            log.info("  # %s (%s)", ent.name, ent.category)

        for entity in collection.entities:
            if entity.id not in existing_entities:
                entity.delete()
        self.emit_collection(collection, terms)
Example #3
0
    def crawl(self):
        url = urljoin(self.host, '/ticket/all_closed/?format=json')
        collection = Collection.by_foreign_id(
            url, {'label': 'Investigative Dashboard Requests'})
        Permission.grant_foreign(collection, 'idashboard:occrp_staff', True,
                                 False)
        existing_entities = []
        terms = set()
        db.session.flush()
        for endpoint in ['all_closed', 'all_open']:
            url = urljoin(self.host, '/ticket/%s/?format=json' % endpoint)
            data = self.session.get(url).json()
            print url
            continue

            for req in data.get('paginator', {}).get('object_list'):
                ent = self.update_entity(req, collection)
                if ent is not None:
                    terms.update(ent.terms)
                    existing_entities.append(ent.id)
                    log.info("  # %s", ent.name)

        for entity in collection.entities:
            if entity.id not in existing_entities:
                entity.delete()
        self.emit_collection(collection, terms)
Example #4
0
def delete_collection(collection, keep_metadata=False, sync=False):
    cancel_queue(collection)
    aggregator = get_aggregator(collection)
    try:
        aggregator.drop()
    finally:
        aggregator.close()
    flush_notifications(collection, sync=sync)
    index.delete_entities(collection.id, sync=sync)
    xref_index.delete_xref(collection, sync=sync)
    deleted_at = collection.deleted_at or datetime.utcnow()
    Entity.delete_by_collection(collection.id, deleted_at=deleted_at)
    Mapping.delete_by_collection(collection.id, deleted_at=deleted_at)
    Diagram.delete_by_collection(collection.id, deleted_at=deleted_at)
    Document.delete_by_collection(collection.id)
    if not keep_metadata:
        # Considering linkages metadata for now, might be wrong:
        Linkage.delete_by_collection(collection.id)
        Permission.delete_by_collection(collection.id, deleted_at=deleted_at)
        collection.delete(deleted_at=deleted_at)
    db.session.commit()
    if not keep_metadata:
        index.delete_collection(collection.id, sync=True)
        Authz.flush()
    refresh_collection(collection.id, sync=True)
Example #5
0
    def crawl_source(self, source):
        if source.get('source_id') in IGNORE_SOURCES:
            return

        json_file = source.get('data', {}).get('json')
        url = urljoin(JSON_PATH, json_file)
        source_name = source.get('source') or source.get('source_id')
        label = '%s - %s' % (source.get('publisher'), source_name)
        collection = self.find_collection(url, {'label': label})
        Permission.grant_foreign(collection, Role.SYSTEM_GUEST, True, False)
        log.info(" > OpenNames collection: %s", collection.label)
        entities = requests.get(url).json().get('entities', [])
        for entity in entities:
            data = {
                'identifiers': [{
                    'scheme':
                    'opennames:%s' % source.get('source_id'),
                    'identifier':
                    entity.get('uid')
                }],
                'other_names': [],
                'name':
                entity.get('name'),
                '$schema':
                SCHEMA.get(entity.get('type'), '/entity/entity.json#')
            }
            for on in entity.get('other_names', []):
                on['name'] = on.pop('other_name', None)
                data['other_names'].append(on)
            self.emit_entity(collection, data)
        self.emit_collection(collection)
Example #6
0
 def load_fixtures(self):
     self.private_coll = Collection.create({
         'foreign_id': 'test_private',
         'label': "Private Collection",
         'category': 'grey'
     })
     self._banana = Entity.create(
         {
             'schema': 'Person',
             'properties': {
                 'name': ['Banana'],
             }
         }, self.private_coll)
     user = Role.by_foreign_id(Role.SYSTEM_USER)
     Permission.grant(self.private_coll, user, True, False)
     self.public_coll = Collection.create({
         'foreign_id': 'test_public',
         'label': "Public Collection",
         'category': 'news'
     })
     self._kwazulu = Entity.create(
         {
             'schema': 'Company',
             'properties': {
                 'name': ['KwaZulu'],
                 'alias': ['kwazulu']
             }
         }, self.public_coll)
     visitor = Role.by_foreign_id(Role.SYSTEM_GUEST)
     Permission.grant(self.public_coll, visitor, True, False)
     db.session.commit()
     samples = read_entities(self.get_fixture_path('samples.ijson'))
     index_entities(self.private_coll, samples)
     process_collection(self.public_coll, ingest=False, reset=True)
     process_collection(self.private_coll, ingest=False, reset=True)
Example #7
0
    def crawl_source(self, source):
        if source.get('source_id') in IGNORE_SOURCES:
            return

        json_file = source.get('data', {}).get('json')
        url = urljoin(JSON_PATH, json_file)
        source_name = source.get('source') or source.get('source_id')
        label = '%s - %s' % (source.get('publisher'), source_name)
        collection = self.find_collection(url, {
            'label': label
        })
        Permission.grant_foreign(collection, Role.SYSTEM_GUEST, True, False)
        log.info(" > OpenNames collection: %s", collection.label)
        entities = requests.get(url).json().get('entities', [])
        for entity in entities:
            data = {
                'identifiers': [{
                    'scheme': 'opennames:%s' % source.get('source_id'),
                    'identifier': entity.get('uid')
                }],
                'other_names': [],
                'name': entity.get('name'),
                '$schema': SCHEMA.get(entity.get('type'),
                                      '/entity/entity.json#')
            }
            for on in entity.get('other_names', []):
                on['name'] = on.pop('other_name', None)
                data['other_names'].append(on)
            self.emit_entity(collection, data)
        self.emit_collection(collection)
Example #8
0
def update_permission(role, collection, read, write, editor_id=None):
    """Update a roles permission to access a given collection."""
    pre = Permission.by_collection_role(collection, role)
    post = Permission.grant(collection, role, read, write)
    db.session.commit()
    refresh_role(role)
    if post is None:
        return
    params = {"role": role, "collection": collection}
    if pre is None or not pre.read:
        if role.foreign_id == Role.SYSTEM_GUEST:
            publish(
                Events.PUBLISH_COLLECTION,
                actor_id=editor_id,
                params=params,
                channels=[GLOBAL],
            )
        else:
            publish(
                Events.GRANT_COLLECTION,
                actor_id=editor_id,
                params=params,
                channels=[role],
            )
    return post
Example #9
0
def delete_collection(collection_id, wait=False):
    # Deleting a collection affects many associated objects and requires
    # checks, so this is done manually and in detail here.
    q = db.session.query(Collection)
    q = q.filter(Collection.id == collection_id)
    collection = q.first()
    if collection is None:
        log.error("No collection with ID: %r", collection_id)
        return

    log.info("Deleting collection [%r]: %r", collection.id, collection.label)
    deleted_at = datetime.utcnow()
    index_delete(collection_id, wait=wait)

    log.info("Deleting cross-referencing matches...")
    Match.delete_by_collection(collection_id)

    log.info("Deleting permissions...")
    Permission.delete_by_collection(collection_id, deleted_at=deleted_at)

    delete_documents(collection_id, wait=wait)
    delete_entities(collection_id, wait=wait)

    collection.delete(deleted_at=deleted_at)
    db.session.commit()
Example #10
0
 def crawl(self):
     url = urljoin(self.host, '/ticket/all_closed/?format=json')
     watchlist = Watchlist.by_foreign_id(url, {
         'label': 'Investigative Dashboard Requests'
     })
     Permission.grant_foreign(watchlist, 'idashboard:occrp_staff',
                              True, False)
     existing_entities = []
     previous_terms = watchlist.terms
     updated_terms = set()
     db.session.flush()
     for endpoint in ['all_closed', 'all_open']:
         url = urljoin(self.host, '/ticket/%s/?format=json' % endpoint)
         data = self.session.get(url).json()
         for req in data.get('paginator', {}).get('object_list'):
             category = REQUEST_TYPES.get(req.get('ticket_type'))
             if category is None:
                 continue
             ent = Entity.by_foreign_id(str(req.get('id')), watchlist, {
                 'name': req.get('name'),
                 'category': category,
                 'data': req,
                 'selectors': [req.get('name')]
             })
             updated_terms.update(ent.terms)
             existing_entities.append(ent.id)
             log.info("  # %s (%s)", ent.name, ent.category)
     watchlist.delete_entities(spare=existing_entities)
     terms = previous_terms.symmetric_difference(updated_terms)
     self.emit_watchlist(watchlist, terms)
Example #11
0
    def crawl_collection(self, collection):
        if not len(collection.get('subjects', [])):
            return
        url = urljoin(self.URL, '/api/collections/%s' % collection.get('id'))
        watchlist = Watchlist.by_foreign_id(url, {
            'label': collection.get('title')
        })
        res = requests.get('%s/permissions' % url, headers=self.HEADERS)
        for perm in res.json().get('results', []):
            Permission.grant_foreign(watchlist, perm.get('role'),
                                     perm.get('read'), perm.get('write'))

        log.info(" > Spindle collection: %s", watchlist.label)
        res = requests.get('%s/entities' % url, headers=self.HEADERS)
        previous_terms = watchlist.terms
        updated_terms = set()
        existing_entities = []
        for entity in res.json().get('results', []):
            if entity.get('name') is None:
                continue
            aliases = [on.get('alias') for on in entity.get('other_names', [])]
            ent = Entity.by_foreign_id(entity.get('id'), watchlist, {
                'name': entity.get('name'),
                'category': SCHEMATA.get(entity.get('$schema'), OTHER),
                'data': entity,
                'selectors': aliases
            })
            updated_terms.update(ent.terms)
            existing_entities.append(ent.id)
            log.info("  # %s (%s)", ent.name, ent.category)
        watchlist.delete_entities(spare=existing_entities)
        terms = previous_terms.symmetric_difference(updated_terms)
        self.emit_watchlist(watchlist, terms)
Example #12
0
    def test_update_collections_via_doc_update(self):
        url = '/api/1/documents/1000'
        ores = self.client.get(url)
        user = self.login()
        Permission.grant_collection(1000, user, True, True)

        can_write = Collection.create({'label': "Write"}, user)
        no_write = Collection.create({'label': "No-write"})
        db.session.commit()

        data = ores.json.copy()
        data['collection_id'].append(can_write.id)
        res = self.client.post(url,
                               data=json.dumps(data),
                               content_type='application/json')
        assert res.status_code == 200, res
        assert can_write.id in res.json['collection_id'], res.json

        data = ores.json.copy()
        data['collection_id'] = [no_write.id]
        res = self.client.post(url,
                               data=json.dumps(data),
                               content_type='application/json')
        assert res.status_code == 200, res
        assert no_write.id not in res.json['collection_id'], res.json
        assert 1000 in res.json['collection_id'], res.json

        data = ores.json.copy()
        data['collection_id'] = ['foo']
        res = self.client.post(url,
                               data=json.dumps(data),
                               content_type='application/json')
        assert res.status_code == 400, res
Example #13
0
    def crawl(self):
        url = urljoin(self.host, '/ticket/all_closed/?format=json')
        collection = Collection.by_foreign_id(url, {
            'label': 'Investigative Dashboard Requests'
        })
        Permission.grant_foreign(collection, 'idashboard:occrp_staff',
                                 True, False)
        existing_entities = []
        terms = set()
        db.session.flush()
        for endpoint in ['all_closed', 'all_open']:
            url = urljoin(self.host, '/ticket/%s/?format=json' % endpoint)
            data = self.session.get(url).json()
            for req in data.get('paginator', {}).get('object_list'):
                category = REQUEST_TYPES.get(req.get('ticket_type'))
                if category is None:
                    continue
                ent = Entity.by_foreign_id(str(req.get('id')), collection, {
                    'name': req.get('name'),
                    'category': category,
                    'data': req,
                    'selectors': [req.get('name')]
                })
                terms.update(ent.terms)
                existing_entities.append(ent.id)
                log.info("  # %s (%s)", ent.name, ent.category)

        for entity in collection.entities:
            if entity.id not in existing_entities:
                entity.delete()
        self.emit_collection(collection, terms)
Example #14
0
    def test_update_collections_via_doc_update(self):
        url = '/api/1/documents/1000'
        ores = self.client.get(url)
        user = self.login()
        Permission.grant_collection(1000, user, True, True)

        can_write = Collection.create({'label': "Write"}, user)
        no_write = Collection.create({'label': "No-write"})
        db.session.commit()

        data = ores.json.copy()
        data['collection_id'].append(can_write.id)
        res = self.client.post(url, data=json.dumps(data),
                               content_type='application/json')
        assert res.status_code == 200, res
        assert can_write.id in res.json['collection_id'], res.json

        data = ores.json.copy()
        data['collection_id'] = [no_write.id]
        res = self.client.post(url, data=json.dumps(data),
                               content_type='application/json')
        assert res.status_code == 200, res
        assert no_write.id not in res.json['collection_id'], res.json
        assert 1000 in res.json['collection_id'], res.json

        data = ores.json.copy()
        data['collection_id'] = ['foo']
        res = self.client.post(url, data=json.dumps(data),
                               content_type='application/json')
        assert res.status_code == 400, res
Example #15
0
def bulk_load(config):
    """Bulk load entities from a CSV file or SQL database.

    This is done by mapping the rows in the source data to entities and links
    which can be understood by the entity index.
    """
    for foreign_id, data in config.items():
        collection = Collection.by_foreign_id(foreign_id)
        if collection is None:
            collection = Collection.create({
                'foreign_id': foreign_id,
                'managed': True,
                'label': data.get('label') or foreign_id,
                'summary': data.get('summary'),
                'category': data.get('category'),
            })

        for role_fk in dict_list(data, 'roles', 'role'):
            role = Role.by_foreign_id(role_fk)
            if role is not None:
                Permission.grant(collection, role, True, False)
            else:
                log.warning("Could not find role: %s", role_fk)

        db.session.commit()
        update_collection(collection)

        for query in dict_list(data, 'queries', 'query'):
            load_query(collection, query)
Example #16
0
    def load_fixtures(self):
        self.admin = self.create_user(foreign_id='admin', is_admin=True)
        self.private_coll = self.create_collection(
            foreign_id='test_private',
            label="Private Collection",
            category='grey',
            creator=self.admin
        )
        self._banana = self.create_entity({
            'schema': 'Person',
            'properties': {
                'name': ['Banana'],
                'birthDate': '1970-08-21'
            }
        }, self.private_coll)
        self._banana2 = self.create_entity({
            'schema': 'Person',
            'properties': {
                'name': ['Banana'],
                'birthDate': '1970-03-21'
            }
        }, self.private_coll)
        self._banana3 = self.create_entity({
            'schema': 'Person',
            'properties': {
                'name': ['Banana'],
                'birthDate': '1970-05-21'
            }
        }, self.private_coll)
        user = Role.by_foreign_id(Role.SYSTEM_USER)
        Permission.grant(self.private_coll, user, True, False)
        self.public_coll = self.create_collection(
            foreign_id='test_public',
            label="Public Collection",
            category='news',
            creator=self.admin
        )
        self._kwazulu = self.create_entity({
            'schema': 'Company',
            'properties': {
                'name': ['KwaZulu'],
                'alias': ['kwazulu']
            }
        }, self.public_coll)
        visitor = Role.by_foreign_id(Role.SYSTEM_GUEST)
        Permission.grant(self.public_coll, visitor, True, False)
        db.session.commit()

        aggregator = get_aggregator(self.public_coll)
        aggregator.delete()
        aggregator.close()
        reindex_collection(self.public_coll, sync=True)

        aggregator = get_aggregator(self.private_coll)
        aggregator.delete()
        for sample in read_entities(self.get_fixture_path('samples.ijson')):
            aggregator.put(sample, fragment='sample')
        aggregator.close()
        reindex_collection(self.private_coll, sync=True)
Example #17
0
def cleanup_deleted():
    from aleph.model import Alert, Entity, Collection
    from aleph.model import Permission, Role
    Alert.cleanup_deleted()
    Permission.cleanup_deleted()
    Entity.cleanup_deleted()
    Collection.cleanup_deleted()
    Role.cleanup_deleted()
    db.session.commit()
Example #18
0
    def crawl_collection(self, collection):
        if not len(collection.get('subjects', [])):
            return
        url = urljoin(self.URL, '/api/collections/%s' % collection.get('id'))
        collection = Collection.by_foreign_id(url, {
            'label': collection.get('title')
        })
        res = requests.get('%s/permissions' % url, headers=self.HEADERS)
        for perm in res.json().get('results', []):
            Permission.grant_foreign(collection, perm.get('role'),
                                     perm.get('read'), perm.get('write'))

        log.info(" > Spindle collection: %s", collection.label)
        res = requests.get('%s/entities' % url, headers=self.HEADERS)
        terms = set()
        existing_entities = []
        for entity in res.json().get('results', []):
            if entity.get('name') is None:
                continue
            entity['$schema'] = SCHEMATA.get(entity.get('$schema'), OTHER)
            if 'jurisdiction_code' in entity:
                entity['jurisdiction_code'] = \
                    entity['jurisdiction_code'].lower()
            entity.pop('members', None)
            entity.pop('memberships', None)
            entity.pop('assets', None)
            entity.pop('owners', None)
            entity.pop('family_first', None)
            entity.pop('family_second', None)
            entity.pop('social_first', None)
            entity.pop('social_second', None)

            for date_field in ['birth_date']:
                if date_field in entity and 'T' in entity[date_field]:
                    entity[date_field], _ = entity[date_field].split('T', 1)

            for on in entity.get('other_names', []):
                name = on.pop('alias', None)
                if name is not None:
                    on['name'] = name

            entity['identifiers'] = [{
                'scheme': 'spindle',
                'identifier': entity.pop('id', None)
            }]
            ent = Entity.save(entity, collection_id=collection.id, merge=True)
            db.session.flush()
            terms.update(ent.terms)
            existing_entities.append(ent.id)
            log.info("  # %s", ent.name)

        for entity in collection.entities:
            if entity.id not in existing_entities:
                entity.delete()
        self.emit_collection(collection, terms)
Example #19
0
def delete_collection(collection, sync=False):
    reset_collection(collection, sync=False)
    flush_notifications(collection)
    deleted_at = collection.deleted_at or datetime.utcnow()
    Entity.delete_by_collection(collection.id, deleted_at=deleted_at)
    Document.delete_by_collection(collection.id)
    Permission.delete_by_collection(collection.id, deleted_at=deleted_at)
    collection.delete(deleted_at=deleted_at)
    db.session.commit()
    index.delete_collection(collection.id, sync=sync)
    Authz.flush()
Example #20
0
def update_permission(role, collection, read, write):
    """Update a roles permission to access a given collection."""
    pre = Permission.by_collection_role(collection, role)
    post = Permission.grant(collection, role, read, write)
    db.session.commit()

    notify_role_template(role, collection.label, 'email/permission.html',
                         url=collection_url(collection.id),
                         pre=pre,
                         post=post,
                         collection=collection)
    return post
Example #21
0
def delete_collection(collection, sync=False):
    flush_notifications(collection)
    drop_aggregator(collection)
    deleted_at = collection.deleted_at or datetime.utcnow()
    Entity.delete_by_collection(collection.id, deleted_at=deleted_at)
    Match.delete_by_collection(collection.id, deleted_at=deleted_at)
    Permission.delete_by_collection(collection.id, deleted_at=deleted_at)
    collection.delete(deleted_at=deleted_at)
    db.session.commit()
    index.delete_collection(collection.id, sync=sync)
    index.delete_entities(collection.id, sync=False)
    refresh_collection(collection.id)
    Authz.flush()
Example #22
0
def cleanup_deleted():
    from aleph.model import Alert, Entity, Collection
    from aleph.model import Permission, Role, Document
    from aleph.model import Diagram, Mapping
    Mapping.cleanup_deleted()
    Diagram.cleanup_deleted()
    Document.cleanup_deleted()
    Alert.cleanup_deleted()
    Permission.cleanup_deleted()
    Entity.cleanup_deleted()
    Collection.cleanup_deleted()
    Role.cleanup_deleted()
    db.session.commit()
Example #23
0
    def crawl(self):
        url = urljoin(self.host, '/ticket/all_closed/?format=json')
        coll = self.find_collection(
            url, {'label': 'Investigative Dashboard Requests'})
        Permission.grant_foreign(coll, 'idashboard:occrp_staff', True, False)
        for endpoint in ['all_closed', 'all_open']:
            url = urljoin(self.host, '/ticket/%s/?format=json' % endpoint)
            data = self.session.get(url).json()

            for req in data.get('paginator', {}).get('object_list'):
                # TODO: get the ID API fixed.
                self.update_entity(req, coll)

        self.emit_collection(coll)
Example #24
0
def delete_collection(collection, keep_metadata=False, sync=False):
    reset_collection(collection, sync=False)
    deleted_at = collection.deleted_at or datetime.utcnow()
    Entity.delete_by_collection(collection.id, deleted_at=deleted_at)
    Mapping.delete_by_collection(collection.id, deleted_at=deleted_at)
    Diagram.delete_by_collection(collection.id, deleted_at=deleted_at)
    Document.delete_by_collection(collection.id)
    if not keep_metadata:
        Permission.delete_by_collection(collection.id, deleted_at=deleted_at)
        collection.delete(deleted_at=deleted_at)
    db.session.commit()
    if not keep_metadata:
        index.delete_collection(collection.id, sync=sync)
        Authz.flush()
    refresh_collection(collection.id, sync=True)
Example #25
0
    def load_fixtures(self):
        self.admin = self.create_user(foreign_id='admin', is_admin=True)
        self.private_coll = self.create_collection(foreign_id='test_private',
                                                   label="Private Collection",
                                                   category='grey',
                                                   casefile=False,
                                                   creator=self.admin)
        self._banana = Entity.create(
            {
                'schema': 'Person',
                'properties': {
                    'name': ['Banana'],
                }
            }, self.private_coll)
        user = Role.by_foreign_id(Role.SYSTEM_USER)
        Permission.grant(self.private_coll, user, True, False)
        self.public_coll = self.create_collection(foreign_id='test_public',
                                                  label="Public Collection",
                                                  category='news',
                                                  casefile=False,
                                                  creator=self.admin)
        self._kwazulu = Entity.create(
            {
                'schema': 'Company',
                'properties': {
                    'name': ['KwaZulu'],
                    'alias': ['kwazulu']
                }
            }, self.public_coll)
        visitor = Role.by_foreign_id(Role.SYSTEM_GUEST)
        Permission.grant(self.public_coll, visitor, True, False)
        db.session.commit()

        drop_aggregator(self.public_coll)
        stage = get_stage(self.public_coll, OP_PROCESS)
        process_collection(stage, self.public_coll, ingest=False, sync=True)

        aggregator = get_aggregator(self.private_coll)
        aggregator.delete()
        stage = get_stage(self.private_coll, OP_PROCESS)
        for sample in read_entities(self.get_fixture_path('samples.ijson')):
            aggregator.put(sample, fragment='sample')
            index_aggregate(stage,
                            self.private_coll,
                            entity_id=sample.id,
                            sync=True)
        aggregator.close()
        process_collection(stage, self.private_coll, ingest=False, sync=True)
Example #26
0
def permissions_index(id):
    collection = get_db_collection(id, request.authz.WRITE)
    q = Permission.all()
    q = q.filter(Permission.collection_id == collection.id)
    permissions = []
    roles = [r for r in Role.all_groups() if check_visible(r, request.authz)]
    for permission in q.all():
        if not check_visible(permission.role, request.authz):
            continue
        permissions.append(permission)
        if permission.role in roles:
            roles.remove(permission.role)

    # this workaround ensures that all groups are visible for the user to
    # select in the UI even if they are not currently associated with the
    # collection.
    for role in roles:
        permissions.append({
            'collection_id': collection.id,
            'write': False,
            'read': False,
            'role': role
        })

    return jsonify({
        'total': len(permissions),
        'results': PermissionSchema().dump(permissions, many=True)
    })
Example #27
0
def index(id):
    collection = get_db_collection(id, request.authz.WRITE)
    roles = Role.all_groups(request.authz).all()
    if request.authz.is_admin:
        roles.extend(Role.all_system())
    q = Permission.all()
    q = q.filter(Permission.collection_id == collection.id)
    permissions = []
    for permission in q.all():
        if not check_visible(permission.role, request.authz):
            continue
        permissions.append(permission)
        if permission.role in roles:
            roles.remove(permission.role)

    # this workaround ensures that all groups are visible for the user to
    # select in the UI even if they are not currently associated with the
    # collection.
    for role in roles:
        if collection.casefile and role.is_public:
            continue
        permissions.append({
            'collection_id': collection.id,
            'write': False,
            'read': False,
            'role_id': str(role.id)
        })

    permissions = PermissionSerializer().serialize_many(permissions)
    return jsonify({'total': len(permissions), 'results': permissions})
Example #28
0
def permissions_index(collection):
    request.authz.require(request.authz.collection_write(collection))
    q = Permission.all()
    q = q.filter(Permission.collection_id == collection)
    permissions = []
    roles_seen = set()
    for permission in q.all():
        if check_visible(permission.role):
            permissions.append(permission)
            roles_seen.add(permission.role.id)

    # this workaround ensures that all groups are visible for the user to
    # select in the UI even if they are not currently associated with the
    # collection.
    for role in Role.all_groups():
        if check_visible(role):
            if role.id not in roles_seen:
                roles_seen.add(role.id)
                permissions.append({
                    'write': False,
                    'read': False,
                    'role': role,
                    'role_id': role.id
                })

    return jsonify({'total': len(permissions), 'results': permissions})
Example #29
0
def index(id):
    collection = get_db_collection(id, request.authz.WRITE)
    record_audit(Audit.ACT_COLLECTION, id=id)
    roles = [r for r in Role.all_groups() if check_visible(r, request.authz)]
    q = Permission.all()
    q = q.filter(Permission.collection_id == collection.id)
    permissions = []
    for permission in q.all():
        if not check_visible(permission.role, request.authz):
            continue
        permissions.append(permission)
        if permission.role in roles:
            roles.remove(permission.role)

    # this workaround ensures that all groups are visible for the user to
    # select in the UI even if they are not currently associated with the
    # collection.
    for role in roles:
        if collection.casefile and role.is_public:
            continue
        permissions.append({
            'collection_id': collection.id,
            'write': False,
            'read': False,
            'role_id': str(role.id)
        })

    permissions = PermissionSerializer().serialize_many(permissions)
    return jsonify({
        'total': len(permissions),
        'results': permissions
    })
Example #30
0
    def crawl(self):
        url = urljoin(self.host, '/ticket/all_closed/?format=json')
        coll = self.find_collection(url, {
            'label': 'Investigative Dashboard Requests'
        })
        Permission.grant_foreign(coll, 'idashboard:occrp_staff',
                                 True, False)
        for endpoint in ['all_closed', 'all_open']:
            url = urljoin(self.host, '/ticket/%s/?format=json' % endpoint)
            data = self.session.get(url).json()

            for req in data.get('paginator', {}).get('object_list'):
                # TODO: get the ID API fixed.
                self.update_entity(req, coll)

        self.emit_collection(coll)
Example #31
0
def permissions_index(collection):
    authz.require(authz.collection_write(collection))
    q = Permission.all()
    q = q.filter(Permission.collection_id == collection)
    return jsonify({
        'total': q.count(),
        'results': q
    })
Example #32
0
def source_permissions_index(source=None):
    authz.require(authz.source_write(source))
    q = Permission.all()
    q = q.filter(Permission.resource_type == Permission.SOURCE)
    q = q.filter(Permission.resource_id == source)
    return jsonify({
        'total': q.count(),
        'results': q
    })
Example #33
0
def collection_permissions_index(collection=None):
    authz.require(authz.collection_write(collection))
    q = Permission.all()
    q = q.filter(Permission.resource_type == Permission.COLLECTION)
    q = q.filter(Permission.resource_id == collection)
    return jsonify({
        'total': q.count(),
        'results': q
    })
Example #34
0
def permissions_index(collection):
    request.authz.require(request.authz.collection_write(collection))
    q = Permission.all()
    q = q.filter(Permission.collection_id == collection)
    permissions = []
    for permission in q.all():
        if check_visible(permission.role):
            permissions.append(permission)
    return jsonify({'total': len(permissions), 'results': permissions})
Example #35
0
    def crawl_source(self, source):
        if source.get('source_id') in IGNORE_SOURCES:
            return

        json_file = source.get('data', {}).get('json')
        url = urljoin(JSON_PATH, json_file)
        source_name = source.get('source') or source.get('source_id')
        label = '%s - %s' % (source.get('publisher'), source_name)
        collection = Collection.by_foreign_id(url, {'label': label})
        Permission.grant_foreign(collection, Role.SYSTEM_GUEST, True, False)
        log.info(" > OpenNames collection: %s", collection.label)
        terms = set()
        existing_entities = []
        db.session.flush()
        entities = requests.get(url).json().get('entities', [])
        for entity in entities:
            data = {
                'identifiers': [{
                    'scheme':
                    'opennames:%s' % source.get('source_id'),
                    'identifier':
                    entity.get('uid')
                }],
                'other_names': [],
                'name':
                entity.get('name'),
                '$schema':
                SCHEMA.get(entity.get('type'), '/entity/entity.json#')
            }
            for on in entity.get('other_names', []):
                on['name'] = on.pop('other_name', None)
                data['other_names'].append(on)

            ent = Entity.save(data, collection_id=collection.id, merge=True)
            db.session.flush()
            terms.update(ent.terms)
            existing_entities.append(ent.id)
            log.info("  # %s", ent.name)

        for entity in collection.entities:
            if entity.id not in existing_entities:
                entity.delete()

        self.emit_collection(collection, terms)
Example #36
0
def delete_collection_content(collection_id):
    # Deleting a collection affects many associated objects and requires
    # checks, so this is done manually and in detail here.
    q = db.session.query(Collection)
    q = q.filter(Collection.id == collection_id)
    collection = q.first()
    if collection is None:
        log.error("No collection with ID: %r", collection_id)
        return

    log.info("Deleting collection [%r]: %r", collection.id, collection.label)
    deleted_at = collection.deleted_at or datetime.utcnow()
    Entity.delete_by_collection(collection_id, deleted_at=deleted_at)
    Match.delete_by_collection(collection_id, deleted_at=deleted_at)
    Permission.delete_by_collection(collection_id, deleted_at=deleted_at)
    index.delete_collection(collection_id)
    index.delete_entities(collection_id)
    collection.delete(deleted_at=deleted_at)
    db.session.commit()
Example #37
0
def delete_collection(collection, keep_metadata=False,
                      sync=False, reset_sync=False):
    reset_collection(collection, sync=reset_sync)
    deleted_at = collection.deleted_at or datetime.utcnow()
    Entity.delete_by_collection(collection.id, deleted_at=deleted_at)
    Mapping.delete_by_collection(collection.id, deleted_at=deleted_at)
    Diagram.delete_by_collection(collection.id, deleted_at=deleted_at)
    Document.delete_by_collection(collection.id)
    if not keep_metadata:
        # Considering this metadata for now, might be wrong:
        Linkage.delete_by_collection(collection.id)

        Permission.delete_by_collection(collection.id, deleted_at=deleted_at)
        collection.delete(deleted_at=deleted_at)
    db.session.commit()
    if not keep_metadata:
        index.delete_collection(collection.id, sync=sync)
        Authz.flush()
    refresh_collection(collection.id, sync=True)
Example #38
0
    def crawl_item(self, item, sources, source):
        source_data = item.meta.get('source', {})
        source_id = source_data.pop('foreign_id', source)
        if source_id is None:
            raise ValueError("No foreign_id for source given: %r" % item)
        if source_id not in sources:
            label = source_data.get('label', source_id)
            sources[source_id] = self.create_source(foreign_id=source_id,
                                                    label=label)
            if source_data.get('public'):
                Permission.grant_foreign(sources[source_id], Role.SYSTEM_GUEST,
                                         True, False)
            if source_data.get('users'):
                Permission.grant_foreign(sources[source_id], Role.SYSTEM_USER,
                                         True, False)

        log.info('Import: %r', item.identifier)
        meta = self.normalize_metadata(item)
        self.emit_file(sources[source_id], meta, item.data_path)
Example #39
0
def delete_collection_content(collection_id):
    # Deleting a collection affects many associated objects and requires
    # checks, so this is done manually and in detail here.
    q = db.session.query(Collection)
    q = q.filter(Collection.id == collection_id)
    collection = q.first()
    if collection is None:
        log.error("No collection with ID: %r", collection_id)
        return

    log.info("Deleting collection [%r]: %r", collection.id, collection.label)
    deleted_at = collection.deleted_at or datetime.utcnow()
    Entity.delete_by_collection(collection_id, deleted_at=deleted_at)
    Match.delete_by_collection(collection_id, deleted_at=deleted_at)
    Permission.delete_by_collection(collection_id, deleted_at=deleted_at)
    index.delete_collection(collection_id)
    index.delete_entities(collection_id)
    collection.delete(deleted_at=deleted_at)
    db.session.commit()
Example #40
0
def update_permission(role, collection, read, write, editor_id=None):
    """Update a roles permission to access a given collection."""
    pre = Permission.by_collection_role(collection, role)
    post = Permission.grant(collection, role, read, write)

    params = {'role': role, 'collection': collection}
    if (pre is None or not pre.read) and post.read:
        if role.is_public:
            publish(Events.PUBLISH_COLLECTION,
                    actor_id=editor_id,
                    params=params,
                    channels=[Notification.GLOBAL])
        else:
            publish(Events.GRANT_COLLECTION, actor_id=editor_id, params=params)
    elif pre is not None and pre.read and not post.read:
        publish(Events.REVOKE_COLLECTION, actor_id=editor_id, params=params)
    db.session.commit()
    Authz.flush()
    return post
Example #41
0
    def crawl_source(self, source):
        if source.get('source_id') in IGNORE_SOURCES:
            return

        json_file = source.get('data', {}).get('json')
        url = urljoin(JSON_PATH, json_file)
        source_name = source.get('source') or source.get('source_id')
        label = '%s - %s' % (source.get('publisher'), source_name)
        collection = Collection.by_foreign_id(url, {
            'label': label
        })
        Permission.grant_foreign(collection, Role.SYSTEM_GUEST, True, False)
        log.info(" > OpenNames collection: %s", collection.label)
        terms = set()
        existing_entities = []
        db.session.flush()
        entities = requests.get(url).json().get('entities', [])
        for entity in entities:
            data = {
                'identifiers': [{
                    'scheme': 'opennames:%s' % source.get('source_id'),
                    'identifier': entity.get('uid')
                }],
                'other_names': [],
                'name': entity.get('name'),
                '$schema': SCHEMA.get(entity.get('type'),
                                      '/entity/entity.json#')
            }
            for on in entity.get('other_names', []):
                on['name'] = on.pop('other_name', None)
                data['other_names'].append(on)

            ent = Entity.save(data, collection_id=collection.id, merge=True)
            db.session.flush()
            terms.update(ent.terms)
            existing_entities.append(ent.id)
            log.info("  # %s", ent.name)

        for entity in collection.entities:
            if entity.id not in existing_entities:
                entity.delete()

        self.emit_collection(collection, terms)
Example #42
0
    def crawl_source(self, source):
        if source.get('source_id') in IGNORE_SOURCES:
            return

        json_file = source.get('data', {}).get('json')
        url = urljoin(JSON_PATH, json_file)
        source_name = source.get('source') or source.get('source_id')
        label = '%s - %s' % (source.get('publisher'), source_name)
        collection = Collection.by_foreign_id(url, {
            'label': label
        })
        Permission.grant_foreign(collection, Role.SYSTEM_GUEST, True, False)
        log.info(" > OpenNames collection: %s", collection.label)
        terms = set()
        existing_entities = []
        db.session.flush()
        entities = requests.get(url).json().get('entities', [])
        for entity in entities:
            if entity.get('name') is None:
                continue
            selectors = []
            for on in entity.get('other_names', []):
                selectors.append(on.get('other_name'))

            for iden in entity.get('identities', []):
                if iden.get('number'):
                    selectors.append(iden.get('number'))

            ent = Entity.by_foreign_id(entity.get('uid'), collection, {
                'name': entity.get('name'),
                'category': CATEGORIES.get(entity.get('type'), OTHER),
                'data': entity,
                'selectors': selectors
            })
            terms.update(ent.terms)
            existing_entities.append(ent.id)
            log.info("  # %s (%s)", ent.name, ent.category)

        for entity in collection.entities:
            if entity.id not in existing_entities:
                entity.delete()
        self.emit_collection(collection, terms)
Example #43
0
def delete_collection(collection, keep_metadata=False, sync=False):
    cancel_queue(collection)
    aggregator = get_aggregator(collection)
    aggregator.drop()
    flush_notifications(collection, sync=sync)
    index.delete_entities(collection.id, sync=sync)
    xref_index.delete_xref(collection, sync=sync)
    deleted_at = collection.deleted_at or datetime.utcnow()
    Mapping.delete_by_collection(collection.id)
    EntitySet.delete_by_collection(collection.id, deleted_at)
    Entity.delete_by_collection(collection.id)
    Document.delete_by_collection(collection.id)
    if not keep_metadata:
        Permission.delete_by_collection(collection.id)
        collection.delete(deleted_at=deleted_at)
    db.session.commit()
    if not keep_metadata:
        index.delete_collection(collection.id, sync=True)
        Authz.flush()
    refresh_collection(collection.id)
Example #44
0
def update_permission(role, collection, read, write, editor_id=None):
    """Update a roles permission to access a given collection."""
    pre = Permission.by_collection_role(collection, role)
    post = Permission.grant(collection, role, read, write)
    params = {'role': role, 'collection': collection}
    if (pre is None or not pre.read) and post.read:
        if role.foreign_id == Role.SYSTEM_GUEST:
            publish(Events.PUBLISH_COLLECTION,
                    actor_id=editor_id,
                    params=params,
                    channels=[Notification.GLOBAL])
        else:
            publish(Events.GRANT_COLLECTION,
                    actor_id=editor_id,
                    params=params,
                    channels=[role])
    db.session.commit()
    Authz.flush()
    refresh_role(role)
    return post
Example #45
0
    def crawl_item(self, item):
        coll_data = item.meta.get('source', {})
        coll_fk = coll_data.pop('foreign_id')
        if coll_fk is None:
            raise ValueError("No foreign_id for collection given: %r" % item)
        if coll_fk not in self.collections:
            label = coll_data.get('label', coll_fk)
            self.collections[coll_fk] = Collection.create({
                'foreign_id': coll_fk,
                'label': label
            })
            if coll_data.get('public'):
                Permission.grant_foreign(self.collections[coll_fk],
                                         Role.SYSTEM_GUEST,
                                         True, False)
            db.session.commit()

        log.info('Import: %r', item.identifier)
        meta = self.normalize_metadata(item)
        ingest_file(self.collections[coll_fk].id, meta,
                    item.data_path, move=False)
Example #46
0
File: mf.py Project: 01-/aleph
    def crawl_item(self, item, sources, source):
        source_data = item.meta.get('source', {})
        source_id = source_data.pop('foreign_id', source)
        if source_id is None:
            raise ValueError("No foreign_id for source given: %r" % item)
        if source_id not in sources:
            label = source_data.get('label', source_id)
            sources[source_id] = self.create_source(foreign_id=source_id,
                                                    label=label)
            if source_data.get('public'):
                Permission.grant_foreign(sources[source_id],
                                         Role.SYSTEM_GUEST,
                                         True, False)
            if source_data.get('users'):
                Permission.grant_foreign(sources[source_id],
                                         Role.SYSTEM_USER,
                                         True, False)

        log.info('Import: %r', item.identifier)
        meta = self.normalize_metadata(item)
        self.emit_file(sources[source_id], meta, item.data_path)
Example #47
0
def permissions_update(collection):
    authz.require(authz.collection_write(collection))
    data = request_data()
    validate(data, "permission.json#")

    role = Role.all().filter(Role.id == data["role"]).first()
    if role is None:
        raise BadRequest()

    permission = Permission.grant_collection(collection, role, data["read"], data["write"])
    db.session.commit()
    log_event(request)
    return jsonify({"status": "ok", "updated": permission})
Example #48
0
    def crawl_source(self, source):
        if source.get('source_id') in IGNORE_SOURCES:
            return

        json_file = source.get('data', {}).get('json')
        url = urljoin(JSON_PATH, json_file)

        watchlist = Watchlist.by_foreign_id(url, {
            'label': source.get('source_id')
        })
        Permission.grant_foreign(watchlist, Role.SYSTEM_GUEST, True, False)
        log.info(" > OpenNames collection: %s", watchlist.label)
        previous_terms = watchlist.terms
        updated_terms = set()
        existing_entities = []
        db.session.flush()
        entities = requests.get(url).json().get('entities', [])
        for entity in entities:
            if entity.get('name') is None:
                continue
            selectors = []
            for on in entity.get('other_names', []):
                selectors.append(on.get('other_name'))
            for iden in entity.get('identities', []):
                if iden.get('number'):
                    selectors.append(iden.get('number'))
            ent = Entity.by_foreign_id(entity.get('uid'), watchlist, {
                'name': entity.get('name'),
                'category': CATEGORIES.get(entity.get('type'), OTHER),
                'data': entity,
                'selectors': selectors
            })
            updated_terms.update(ent.terms)
            existing_entities.append(ent.id)
            log.info("  # %s (%s)", ent.name, ent.category)
        watchlist.delete_entities(spare=existing_entities)
        terms = previous_terms.symmetric_difference(updated_terms)
        self.emit_watchlist(watchlist, terms)
Example #49
0
def update_permission(role, collection, read, write, editor_id=None):
    """Update a roles permission to access a given collection."""
    pre = Permission.by_collection_role(collection, role)
    post = Permission.grant(collection, role, read, write)

    params = {'role': role, 'collection': collection}
    if (pre is None or not pre.read) and post.read:
        if role.is_public:
            publish(Events.PUBLISH_COLLECTION,
                    actor_id=editor_id,
                    params=params,
                    channels=[Notification.GLOBAL])
        else:
            publish(Events.GRANT_COLLECTION,
                    actor_id=editor_id,
                    params=params)
    elif pre is not None and pre.read and not post.read:
        publish(Events.REVOKE_COLLECTION,
                actor_id=editor_id,
                params=params)
    db.session.commit()
    Authz.flush()
    refresh_role(role)
    return post
Example #50
0
File: authz.py Project: 01-/aleph
def sources(action):
    if not hasattr(request, 'auth_sources'):
        request.auth_sources = {READ: set(), WRITE: set()}
        if is_admin():
            for source_id, in Source.all_ids():
                request.auth_sources[READ].add(source_id)
                request.auth_sources[WRITE].add(source_id)
        else:
            q = Permission.all()
            q = q.filter(Permission.role_id.in_(request.auth_roles))
            q = q.filter(Permission.resource_type == Permission.SOURCE)
            for perm in q:
                if perm.read:
                    request.auth_sources[READ].add(perm.resource_id)
                if perm.write and request.logged_in:
                    request.auth_sources[WRITE].add(perm.resource_id)
    return list(request.auth_sources.get(action, []))
Example #51
0
def collections(action):
    if not hasattr(request, 'auth_collections'):
        request.auth_collections = {READ: set(), WRITE: set()}
        if is_admin():
            q = Collection.all_ids().filter(Collection.deleted_at == None)  # noqa
            for col_id, in q:
                request.auth_collections[READ].add(col_id)
                request.auth_collections[WRITE].add(col_id)
        else:
            q = Permission.all()
            q = q.filter(Permission.role_id.in_(request.auth_roles))
            q = q.filter(Permission.collection_id != None)  # noqa
            for perm in q:
                if perm.read or perm.write:
                    request.auth_collections[READ].add(perm.collection_id)
                if perm.write and request.logged_in:
                    request.auth_collections[WRITE].add(perm.collection_id)
    return list(request.auth_collections.get(action, []))
Example #52
0
def permissions_update(collection):
    authz.require(authz.collection_write(collection))
    data = request_data()
    validate(data, 'permission.json#')

    role = Role.all().filter(Role.id == data['role']).first()
    if role is None:
        raise BadRequest()

    permission = Permission.grant_collection(collection.id,
                                             role,
                                             data['read'],
                                             data['write'])
    db.session.commit()
    return jsonify({
        'status': 'ok',
        'updated': permission
    })
Example #53
0
 def setUp(self):
     super(SourcesApiTestCase, self).setUp()
     self.source = Source()
     self.source.foreign_id = "test"
     self.source.label = "Test Collection"
     self.source.category = "news"
     db.session.add(self.source)
     db.session.flush()
     permission = Permission()
     permission.role_id = Role.system(Role.SYSTEM_USER)
     permission.read = True
     permission.write = True
     permission.resource_id = self.source.id
     permission.resource_type = Permission.SOURCE
     db.session.add(permission)
     db.session.commit()
Example #54
0
def permissions_save(watchlist=None, source=None):
    if watchlist is not None:
        authz.require(authz.watchlist_write(watchlist))
    if source is not None:
        authz.require(authz.source_write(source))

    resource_type = Permission.WATCHLIST if watchlist else Permission.SOURCE
    resource_id = watchlist or source
    data = request_data()
    validate(data, permissions_schema)

    role = db.session.query(Role).filter(Role.id == data['role']).first()
    if role is None:
        raise BadRequest()

    permission = Permission.grant_resource(resource_type, resource_id, role,
                                           data['read'], data['write'])
    db.session.commit()
    return jsonify({
        'status': 'ok',
        'updated': permission
    })
Example #55
0
File: util.py Project: pudo/aleph
 def grant(self, collection, role, read, write):
     Permission.grant(collection, role, read, write)
     db.session.commit()
     update_collection(collection)