def update(slug): authz.require(authz.source_write(slug)) source = obj_or_404(Source.by_slug(slug)) source.update(request_data(), current_user) db.session.add(source) db.session.commit() return view(slug)
def crawl_source(slug, ignore_tags=False): Source.sync() source = Source.by_slug(slug) if source is None: raise ValueError("Invalid source: %r" % slug) source.crawler_instance.ignore_tags = ignore_tags source.crawler_instance.crawl()
def crawl(slug): logging.debug('starting a crawl of %s' % slug) authz.require(authz.source_write(slug)) source = obj_or_404(Source.by_slug(slug)) crawl_source.delay(source.slug) logging.debug('started crawl') return jsonify({'status': 'ok'})
def crawl_source(slug, ignore_tags=False): logging.debug('crawl source -- celery task going') Source.sync() source = Source.by_slug(slug) if source is None: raise ValueError("Invalid source: %r" % slug) source.crawler_instance.ignore_tags = ignore_tags source.crawler_instance.crawl()
def view(slug): authz.require(authz.source_read(slug)) source = obj_or_404(Source.by_slug(slug)) etag_cache_keygen(source) data = source.to_dict() data['can_write'] = authz.source_write(slug) if data['can_write']: data['users'] = [u.id for u in source.users] data['config'] = source.config return jsonify(data)
def view(slug): authz.require(authz.source_read(slug) and authz.is_admin()) source = obj_or_404(Source.by_slug(slug)) etag_cache_keygen(source) data = source.to_dict() data['can_write'] = authz.source_write(slug) if data['can_write']: data['users'] = [u.id for u in source.users] data['config'] = source.config return jsonify(data)
def dc_projects(): slug = request.args.get('source') authz.require(authz.source_read(slug)) source = obj_or_404(Source.by_slug(slug)) if not isinstance(source.crawler_instance, DocumentCloudCrawler): return jsonify({'credentials': False}) username = request.args.get('username') password = request.args.get('password') projects = source.crawler_instance.get_projects(username, password) if projects is False: return jsonify({'credentials': False}) else: return jsonify({'credentials': True, 'projects': projects})
def process_row(row, attributes): src = row.get('_source') data = {} for name in attributes: value = src.get(name) for attr in src.get('attributes', []): if attr.get('name') == name: value = attr.get('value') if name == 'entities': objs = Entity.by_id_set([e.get('id') for e in value]) value = ', '.join([o.label for o in objs.values()]) if name == 'collection': # WARNING: don't to one query per row value = unicode(Source.by_slug(value) or value) data[name] = value return data
def process(slug): authz.require(authz.source_write(slug)) source = obj_or_404(Source.by_slug(slug)) process_collection.delay(source.slug) return jsonify({'status': 'ok'})
def crawl(slug): authz.require(authz.source_write(slug)) source = obj_or_404(Source.by_slug(slug)) crawl_source.delay(source.slug) return jsonify({'status': 'ok'})