def create(): authz.require(authz.logged_in()) collection = Collection.create(request_data(), request.auth_role) db.session.commit() update_collection(collection) log_event(request) return view(collection.id)
def delete(id): request.authz.require(request.authz.session_write()) alert = obj_or_404(Alert.by_id(id, role=request.authz.role)) alert.delete() db.session.commit() log_event(request) return jsonify({'status': 'ok'})
def reconcile(): """ Reconciliation API, emulates Google Refine API. See: http://code.google.com/p/google-refine/wiki/ReconciliationServiceApi """ data = request.args.copy() data.update(request.form.copy()) log_event(request) if 'query' in data: # single q = data.get('query') if q.startswith('{'): try: q = json.loads(q) except ValueError: raise BadRequest() else: q = data return jsonify(reconcile_op(q)) elif 'queries' in data: # multiple requests in one query qs = data.get('queries') try: qs = json.loads(qs) except ValueError: raise BadRequest() queries = {} for k, q in qs.items(): queries[k] = reconcile_op(q) return jsonify(queries) else: return reconcile_index()
def delete(id): collection = obj_or_404(Collection.by_id(id)) authz.require(authz.collection_write(id)) delete_collection.apply_async([collection.id], queue=USER_QUEUE, routing_key=USER_ROUTING_KEY) log_event(request) return jsonify({'status': 'ok'})
def process(id): collection = obj_or_404(Collection.by_id(id)) request.authz.require(request.authz.collection_write(collection)) analyze_collection.apply_async([collection.id], queue=USER_QUEUE, routing_key=USER_ROUTING_KEY) log_event(request) return jsonify({'status': 'ok'})
def create(collection_id): collection = obj_or_404(Collection.by_id(collection_id)) authz.require(authz.collection_write(collection.id)) network = Network.create(request_data(), collection, request.auth_role) db.session.commit() log_event(request) return view(collection_id, network.id)
def ingest_upload(collection_id): collection = obj_or_404(Collection.by_id(collection_id)) request.authz.require(request.authz.collection_write(collection.id)) log_event(request) try: meta = json.loads(request.form.get('meta', '{}')) meta['crawler_id'] = 'user_upload:%s' % request.authz.role.id meta['crawler_run'] = make_textid() except Exception as ex: raise BadRequest(unicode(ex)) metas = [] for storage in request.files.values(): file_meta = meta.copy() file_meta['mime_type'] = storage.mimetype file_meta['file_name'] = storage.filename file_meta['source_path'] = storage.filename validate(file_meta, 'metadata.json#') file_meta = Metadata.from_data(file_meta) sec_fn = os.path.join(upload_folder, secure_filename(storage.filename)) storage.save(sec_fn) ingest_file(collection_id, file_meta, sec_fn, move=True, queue=USER_QUEUE, routing_key=USER_ROUTING_KEY) metas.append(file_meta) return jsonify({'status': 'ok', 'metadata': metas})
def password_login(): """Provides email and password authentication.""" data = request_data() email = data.get('email') password = data.get('password') if not email or not password: abort(404) log_event(request) q = Role.by_email(email) q = q.filter(Role.password_digest != None) # noqa role = q.first() # Try a password authentication and an LDAP authentication if it is enabled if role and role.check_password(password) is False: return Unauthorized("Authentication has failed.") elif not role: role = Role.authenticate_using_ldap(email, password) if not role: return Unauthorized("Authentication has failed.") session['user'] = role.id session['next_url'] = extract_next_url(request) return jsonify({ 'logout': url_for('.logout'), 'api_key': role.api_key, 'role': role })
def reconcile(): """ Reconciliation API, emulates Google Refine API. See: http://code.google.com/p/google-refine/wiki/ReconciliationServiceApi """ # authz.require(authz.system_read()) data = request.args.copy() data.update(request.form.copy()) log_event(request) if 'query' in data: # single q = data.get('query') if q.startswith('{'): try: q = json.loads(q) except ValueError: raise BadRequest() else: q = data return jsonify(reconcile_op(q)) elif 'queries' in data: # multiple requests in one query qs = data.get('queries') try: qs = json.loads(qs) except ValueError: raise BadRequest() queries = {} for k, q in qs.items(): queries[k] = reconcile_op(q) return jsonify(queries) else: return reconcile_index()
def ingest_upload(collection_id): collection = obj_or_404(Collection.by_id(collection_id)) request.authz.require(request.authz.collection_write(collection.id)) log_event(request) crawler_run = make_textid() try: meta = json.loads(request.form.get('meta', '{}')) except Exception as ex: raise BadRequest(unicode(ex)) documents = [] for storage in request.files.values(): sec_fn = os.path.join(upload_folder, secure_filename(storage.filename)) storage.save(sec_fn) content_hash = checksum(sec_fn) document = Document.by_keys(collection=collection, content_hash=content_hash) document.crawler = 'user_upload:%s' % request.authz.role.id document.crawler_run = crawler_run document.mime_type = storage.mimetype document.file_name = storage.filename try: meta = json.loads(request.form.get('meta', '{}')) validate(meta, 'metadata.json#') document.meta.update(meta) except Exception as ex: raise BadRequest(unicode(ex)) ingest_document(document, sec_fn, user_queue=True) os.unlink(sec_fn) documents.append(document) return jsonify({'status': 'ok', 'documents': documents})
def ingest_upload(collection_id): collection = obj_or_404(Collection.by_id(collection_id)) authz.require(authz.collection_write(collection.id)) log_event(request) try: meta = json.loads(request.form.get('meta', '{}')) except Exception as ex: raise BadRequest(unicode(ex)) metas = [] for storage in request.files.values(): file_meta = meta.copy() file_meta['mime_type'] = storage.mimetype file_meta['file_name'] = storage.filename validate(file_meta, 'metadata.json#') file_meta = Metadata.from_data(file_meta) file_meta.crawler_id = 'user_upload:%s' % request.auth_role.id file_meta.crawler_run = make_textid() sec_fn = os.path.join(get_upload_folder(), secure_filename(storage.filename)) storage.save(sec_fn) ingest_file(collection.id, file_meta, sec_fn, move=True, queue=USER_QUEUE, routing_key=USER_ROUTING_KEY) metas.append(file_meta) return jsonify({'status': 'ok', 'metadata': metas})
def create(): authz.require(authz.logged_in()) alert = Alert.create(request_data(), request.auth_role) db.session.commit() log_event(request) return view(alert.id)
def delete(id): entity = obj_or_404(Entity.by_id(id)) check_authz(entity, authz.WRITE) delete_entity(entity) db.session.commit() log_event(request, entity_id=entity.id) return jsonify({'status': 'ok'})
def delete(id): authz.require(authz.logged_in()) alert = obj_or_404(Alert.by_id(id, role=request.auth_role)) alert.delete() db.session.commit() log_event(request) return jsonify({'status': 'ok'})
def update(document_id): document = get_document(document_id, action=request.authz.WRITE) data = request_data() document.update(data) db.session.commit() log_event(request, document_id=document.id) update_document(document) return view(document_id)
def export(): state = QueryState(request.args, request.authz, limit=0) log_event(request) output = make_excel(get_results(state, 50000), FIELDS) return send_file(output, mimetype=XLSX_MIME, as_attachment=True, attachment_filename='export.xlsx')
def update(id): authz.require(authz.collection_write(id)) collection = obj_or_404(Collection.by_id(id)) collection.update(request_data()) db.session.add(collection) db.session.commit() log_event(request) return view(id)
def update(collection_id, id): collection = obj_or_404(Collection.by_id(collection_id)) authz.require(authz.collection_write(collection_id)) network = obj_or_404(Network.by_id_collection(id, collection)) network.update(request_data()) log_event(request) db.session.commit() return view(collection_id, network.id)
def export(): query = documents_query(request.args) query = {'query': query['query']} log_event(request) limit = min(10000, get_limit(default=50)) output = make_excel(get_results(query, limit), FIELDS) return send_file(output, mimetype=XLSX_MIME, as_attachment=True, attachment_filename='export.xlsx')
def delete(collection_id, id): collection = obj_or_404(Collection.by_id(collection_id)) authz.require(authz.collection_write(collection.id)) network = obj_or_404(Network.by_id_collection(id, collection)) network.delete() db.session.commit() log_event(request) return jsonify({'status': 'ok'})
def update(id): role = obj_or_404(Role.by_id(id)) request.authz.require(request.authz.session_write()) request.authz.require(role.id == request.authz.role.id) role.update(request_data()) db.session.add(role) db.session.commit() log_event(request) return jsonify(role)
def create(): request.authz.require(request.authz.logged_in) data = request_data() data['managed'] = False collection = Collection.create(data, request.authz.role) db.session.commit() update_collection(collection) log_event(request) return jsonify(collection)
def update(id): role = obj_or_404(Role.by_id(id)) authz.require(authz.logged_in()) authz.require(role.id == request.auth_role.id) role.update(request_data()) db.session.add(role) db.session.commit() log_event(request) return jsonify(role)
def query(): enable_cache(vary_user=True) state = QueryState(request.args, request.authz) result = documents_query(state) params = next_params(request.args, result) log_event(request) if params is not None: result['next'] = url_for('search_api.query', **params) return jsonify(result)
def update(id): collection = obj_or_404(Collection.by_id(id)) request.authz.require(request.authz.collection_write(collection)) collection.update(request_data()) db.session.add(collection) db.session.commit() update_collection(collection) log_event(request) return view(id)
def update_collections(document_id): document = get_document(document_id) data = request_data() if not isinstance(data, list) or \ False in [isinstance(d, int) for d in data]: raise BadRequest() document.update_collections(data, writeable=authz.collections(authz.WRITE)) db.session.commit() log_event(request, document_id=document.id) update_document(document) return view_collections(document_id)
def update(document_id): document = get_document(document_id) # This is a special requirement for documents, so # they cannot escalate privs: authz.require(authz.collection_write(document.source_collection_id)) data = request_data() document.update(data, writeable=authz.collections(authz.WRITE)) db.session.commit() log_event(request, document_id=document.id) update_document(document) return view(document_id)
def merge(id, other_id): entity = obj_or_404(Entity.by_id(id)) check_authz(entity, authz.WRITE) other = obj_or_404(Entity.by_id(other_id)) check_authz(other, authz.WRITE) entity.merge(other) db.session.commit() update_entity(entity) update_entity(other) log_event(request, entity_id=entity.id) return view(entity.id)
def file(document_id): document = get_document(document_id) enable_cache(server_side=True) log_event(request, document_id=document.id) url = get_archive().generate_url(document.meta) if url is not None: return redirect(url) local_path = get_archive().load_file(document.meta) fh = open(local_path, 'rb') return send_file(fh, as_attachment=True, attachment_filename=document.meta.file_name, mimetype=document.meta.mime_type)
def query(): enable_cache(vary_user=True, vary=authz.collections(authz.READ)) query = documents_query(request.args) query["size"] = get_limit(default=100) query["from"] = get_offset() # import json # print json.dumps(query, indent=2) result = execute_documents_query(request.args, query) params = next_params(request.args, result) log_event(request) if params is not None: result["next"] = url_for("search_api.query", **params) return jsonify(result)
def create(): data = request_data() data.pop('id', None) data['collections'] = get_collections(data) for collection in data['collections']: authz.require(authz.collection_write(collection.id)) entity = Entity.save(data) for collection in entity.collections: collection.touch() db.session.commit() log_event(request, entity_id=entity.id) update_entity(entity) return view(entity.id)
def query(): enable_cache(vary_user=True, vary=authz.collections(authz.READ)) query = documents_query(request.args) query['size'] = get_limit(default=100) query['from'] = get_offset() # import json # print json.dumps(query, indent=2) result = execute_documents_query(request.args, query) params = next_params(request.args, result) log_event(request) if params is not None: result['next'] = url_for('search_api.query', **params) return jsonify(result)
def query(): graph = generate_graph(request.args) format = request.args.get('format', '').lower().strip() log_event(request) if format == 'gexf': sio = StringIO() nx.write_gexf(graph, sio) sio.seek(0) return send_file(sio, mimetype='application/xml') else: data = json_graph.node_link_data(graph) data['partial'] = graph.partial return jsonify(data)
def permissions_update(collection): authz.require(authz.collection_write(collection)) data = request_data() validate(data, "permission.json#") role = Role.all().filter(Role.id == data["role"]).first() if role is None: raise BadRequest() permission = Permission.grant_collection(collection, role, data["read"], data["write"]) db.session.commit() log_event(request) return jsonify({"status": "ok", "updated": permission})
def permissions_update(collection): authz.require(authz.collection_write(collection)) data = request_data() validate(data, 'permission.json#') role = Role.all().filter(Role.id == data['role']).first() if role is None: raise BadRequest() permission = Permission.grant_collection(collection, role, data['read'], data['write']) db.session.commit() log_event(request) return jsonify({'status': 'ok', 'updated': permission})
def permissions_update(collection): request.authz.require(request.authz.collection_write(collection)) data = request_data() validate(data, 'permission.json#') role = Role.all().filter(Role.id == data['role_id']).first() collection = Collection.by_id(collection) if role is None or collection is None: raise BadRequest() request.authz.require(check_visible(role)) perm = update_permission(role, collection, data['read'], data['write']) log_event(request) return jsonify({'status': 'ok', 'updated': perm})
def callback(): resp = oauth_provider.authorized_response() if resp is None or isinstance(resp, OAuthException): log.warning("Failed OAuth: %r", resp) # FIXME: notify the user, somehow. return redirect('/') session['oauth'] = resp session['roles'] = [Role.system(Role.SYSTEM_USER)] signals.handle_oauth_session.send(provider=oauth_provider, session=session) db.session.commit() log_event(request, role_id=session['user']) log.info("Logged in: %r", session['user']) return redirect('/')
def view(document_id): doc = get_document(document_id) enable_cache() data = doc.to_dict() if doc.parent is not None: data['parent'] = doc.parent.to_dict() log_event(request, document_id=doc.id) data['data_url'] = archive.generate_url(doc.content_hash) if data['data_url'] is None: data['data_url'] = url_for('documents_api.file', document_id=document_id) if doc.pdf_version: data['pdf_url'] = url_for('documents_api.pdf', document_id=document_id) return jsonify(data)
def login(provider=None): if not provider: # by default use the first provider if none is requested, # which is a useful default if there's only one provider = oauth.remote_apps.keys()[0] oauth_provider = oauth.remote_apps.get(provider) if not oauth_provider: abort(404) log_event(request) session['next_url'] = extract_next_url(request) callback_url = url_for('.callback', provider=provider) return oauth_provider.authorize(callback=callback_url)
def merge(id, other_id): _, entity = get_entity(id, request.authz.WRITE) _, other = get_entity(other_id, request.authz.WRITE) try: entity.merge(other) except ValueError as ve: raise BadRequest(ve.message) db.session.commit() log_event(request, entity_id=entity.id) update_entity(entity) update_entity(other) return view(entity.id)
def update(id): entity = obj_or_404(Entity.by_id(id)) check_authz(entity, authz.WRITE) data = request_data() data['id'] = entity.id possible_collections = authz.collections(authz.WRITE) possible_collections.extend([c.id for c in entity.collections]) collections = [c for c in get_collections(data) if c.id in possible_collections] entity = Entity.save(data, collections, merge=arg_bool('merge')) for collection in entity.collections: collection.touch() db.session.commit() log_event(request, entity_id=entity.id) update_entity(entity) return view(entity.id)
def create(): data = request_data() data.pop("id", None) collections = get_collections(data) for collection in collections: authz.require(authz.collection_write(collection.id)) try: entity = Entity.save(data, collections) except ValueError as ve: raise BadRequest(ve.message) for collection in entity.collections: collection.touch() db.session.commit() log_event(request, entity_id=entity.id) update_entity(entity) return view(entity.id)
def pdf(document_id): document = get_document(document_id) enable_cache(server_side=True) log_event(request, document_id=document.id) if document.type != Document.TYPE_TEXT: raise BadRequest("PDF is only available for text documents") pdf = document.meta.pdf url = get_archive().generate_url(pdf) if url is not None: return redirect(url) try: local_path = get_archive().load_file(pdf) fh = open(local_path, 'rb') except Exception as ex: raise NotFound("Missing PDF file: %r" % ex) return send_file(fh, mimetype=pdf.mime_type)
def update(id): entity = obj_or_404(Entity.by_id(id)) check_authz(entity, authz.WRITE) data = request_data() data["id"] = entity.id possible_collections = authz.collections(authz.WRITE) possible_collections.extend([c.id for c in entity.collections]) collections = [c for c in get_collections(data) if c.id in possible_collections] try: entity = Entity.save(data, collections, merge=arg_bool("merge")) except ValueError as ve: raise BadRequest(ve.message) for collection in entity.collections: collection.touch() db.session.commit() log_event(request, entity_id=entity.id) update_entity(entity) return view(entity.id)
def view(document_id): doc = get_document(document_id) enable_cache() data = doc.to_dict() log_event(request, document_id=doc.id) data['data_url'] = get_archive().generate_url(doc.meta) if data['data_url'] is None: data['data_url'] = url_for('documents_api.file', document_id=document_id) if doc.meta.is_pdf: data['pdf_url'] = data['data_url'] else: try: data['pdf_url'] = get_archive().generate_url(doc.meta.pdf) except Exception as ex: log.info('Could not generate PDF url: %r', ex) if data.get('pdf_url') is None: data['pdf_url'] = url_for('documents_api.pdf', document_id=document_id) return jsonify(data)
def ingest_upload(collection_id): collection = obj_or_404(Collection.by_id(collection_id)) authz.require(authz.collection_write(collection.id)) log_event(request) try: meta = json.loads(request.form.get("meta", "{}")) except Exception as ex: raise BadRequest(unicode(ex)) metas = [] for storage in request.files.values(): file_meta = meta.copy() file_meta["mime_type"] = storage.mimetype file_meta["file_name"] = storage.filename validate(file_meta, "metadata.json#") file_meta = Metadata.from_data(file_meta) file_meta.crawler_id = "user_upload:%s" % request.auth_role.id file_meta.crawler_run = make_textid() sec_fn = os.path.join(get_upload_folder(), secure_filename(storage.filename)) storage.save(sec_fn) ingest_file(collection.id, file_meta, sec_fn, move=True) metas.append(file_meta) return jsonify({"status": "ok", "metadata": metas})
def post_edges(): log_event(request) return jsonify(EdgeQuery(get_graph(), get_post_multidict()))
def get_edges(): log_event(request) return jsonify(EdgeQuery(get_graph(), request.args))