Beispiel #1
0
def create():
    authz.require(authz.logged_in())
    collection = Collection.create(request_data(), request.auth_role)
    db.session.commit()
    update_collection(collection)
    log_event(request)
    return view(collection.id)
Beispiel #2
0
def delete(id):
    request.authz.require(request.authz.session_write())
    alert = obj_or_404(Alert.by_id(id, role=request.authz.role))
    alert.delete()
    db.session.commit()
    log_event(request)
    return jsonify({'status': 'ok'})
Beispiel #3
0
def reconcile():
    """
    Reconciliation API, emulates Google Refine API.

    See: http://code.google.com/p/google-refine/wiki/ReconciliationServiceApi
    """
    data = request.args.copy()
    data.update(request.form.copy())
    log_event(request)

    if 'query' in data:
        # single
        q = data.get('query')
        if q.startswith('{'):
            try:
                q = json.loads(q)
            except ValueError:
                raise BadRequest()
        else:
            q = data
        return jsonify(reconcile_op(q))
    elif 'queries' in data:
        # multiple requests in one query
        qs = data.get('queries')
        try:
            qs = json.loads(qs)
        except ValueError:
            raise BadRequest()
        queries = {}
        for k, q in qs.items():
            queries[k] = reconcile_op(q)
        return jsonify(queries)
    else:
        return reconcile_index()
Beispiel #4
0
def delete(id):
    collection = obj_or_404(Collection.by_id(id))
    authz.require(authz.collection_write(id))
    delete_collection.apply_async([collection.id], queue=USER_QUEUE,
                                  routing_key=USER_ROUTING_KEY)
    log_event(request)
    return jsonify({'status': 'ok'})
Beispiel #5
0
def process(id):
    collection = obj_or_404(Collection.by_id(id))
    request.authz.require(request.authz.collection_write(collection))
    analyze_collection.apply_async([collection.id], queue=USER_QUEUE,
                                   routing_key=USER_ROUTING_KEY)
    log_event(request)
    return jsonify({'status': 'ok'})
Beispiel #6
0
def create(collection_id):
    collection = obj_or_404(Collection.by_id(collection_id))
    authz.require(authz.collection_write(collection.id))
    network = Network.create(request_data(), collection, request.auth_role)
    db.session.commit()
    log_event(request)
    return view(collection_id, network.id)
Beispiel #7
0
def create():
    authz.require(authz.logged_in())
    collection = Collection.create(request_data(), request.auth_role)
    db.session.commit()
    update_collection(collection)
    log_event(request)
    return view(collection.id)
Beispiel #8
0
def ingest_upload(collection_id):
    collection = obj_or_404(Collection.by_id(collection_id))
    request.authz.require(request.authz.collection_write(collection.id))
    log_event(request)
    try:
        meta = json.loads(request.form.get('meta', '{}'))
        meta['crawler_id'] = 'user_upload:%s' % request.authz.role.id
        meta['crawler_run'] = make_textid()

    except Exception as ex:
        raise BadRequest(unicode(ex))

    metas = []
    for storage in request.files.values():
        file_meta = meta.copy()
        file_meta['mime_type'] = storage.mimetype
        file_meta['file_name'] = storage.filename
        file_meta['source_path'] = storage.filename
        validate(file_meta, 'metadata.json#')
        file_meta = Metadata.from_data(file_meta)
        sec_fn = os.path.join(upload_folder, secure_filename(storage.filename))
        storage.save(sec_fn)
        ingest_file(collection_id, file_meta, sec_fn, move=True,
                    queue=USER_QUEUE, routing_key=USER_ROUTING_KEY)
        metas.append(file_meta)
    return jsonify({'status': 'ok', 'metadata': metas})
Beispiel #9
0
def password_login():
    """Provides email and password authentication."""
    data = request_data()
    email = data.get('email')
    password = data.get('password')

    if not email or not password:
        abort(404)

    log_event(request)

    q = Role.by_email(email)
    q = q.filter(Role.password_digest != None)  # noqa
    role = q.first()

    # Try a password authentication and an LDAP authentication if it is enabled
    if role and role.check_password(password) is False:
        return Unauthorized("Authentication has failed.")
    elif not role:
        role = Role.authenticate_using_ldap(email, password)

    if not role:
        return Unauthorized("Authentication has failed.")

    session['user'] = role.id
    session['next_url'] = extract_next_url(request)

    return jsonify({
        'logout': url_for('.logout'),
        'api_key': role.api_key,
        'role': role
    })
Beispiel #10
0
def reconcile():
    """
    Reconciliation API, emulates Google Refine API.

    See: http://code.google.com/p/google-refine/wiki/ReconciliationServiceApi
    """
    # authz.require(authz.system_read())
    data = request.args.copy()
    data.update(request.form.copy())
    log_event(request)

    if 'query' in data:
        # single
        q = data.get('query')
        if q.startswith('{'):
            try:
                q = json.loads(q)
            except ValueError:
                raise BadRequest()
        else:
            q = data
        return jsonify(reconcile_op(q))
    elif 'queries' in data:
        # multiple requests in one query
        qs = data.get('queries')
        try:
            qs = json.loads(qs)
        except ValueError:
            raise BadRequest()
        queries = {}
        for k, q in qs.items():
            queries[k] = reconcile_op(q)
        return jsonify(queries)
    else:
        return reconcile_index()
Beispiel #11
0
def ingest_upload(collection_id):
    collection = obj_or_404(Collection.by_id(collection_id))
    request.authz.require(request.authz.collection_write(collection.id))
    log_event(request)
    crawler_run = make_textid()

    try:
        meta = json.loads(request.form.get('meta', '{}'))
    except Exception as ex:
        raise BadRequest(unicode(ex))

    documents = []
    for storage in request.files.values():
        sec_fn = os.path.join(upload_folder, secure_filename(storage.filename))
        storage.save(sec_fn)
        content_hash = checksum(sec_fn)
        document = Document.by_keys(collection=collection,
                                    content_hash=content_hash)
        document.crawler = 'user_upload:%s' % request.authz.role.id
        document.crawler_run = crawler_run
        document.mime_type = storage.mimetype
        document.file_name = storage.filename

        try:
            meta = json.loads(request.form.get('meta', '{}'))
            validate(meta, 'metadata.json#')
            document.meta.update(meta)
        except Exception as ex:
            raise BadRequest(unicode(ex))

        ingest_document(document, sec_fn, user_queue=True)
        os.unlink(sec_fn)
        documents.append(document)
    return jsonify({'status': 'ok', 'documents': documents})
Beispiel #12
0
def ingest_upload(collection_id):
    collection = obj_or_404(Collection.by_id(collection_id))
    authz.require(authz.collection_write(collection.id))
    log_event(request)
    try:
        meta = json.loads(request.form.get('meta', '{}'))
    except Exception as ex:
        raise BadRequest(unicode(ex))

    metas = []
    for storage in request.files.values():
        file_meta = meta.copy()
        file_meta['mime_type'] = storage.mimetype
        file_meta['file_name'] = storage.filename
        validate(file_meta, 'metadata.json#')
        file_meta = Metadata.from_data(file_meta)
        file_meta.crawler_id = 'user_upload:%s' % request.auth_role.id
        file_meta.crawler_run = make_textid()
        sec_fn = os.path.join(get_upload_folder(),
                              secure_filename(storage.filename))
        storage.save(sec_fn)
        ingest_file(collection.id, file_meta, sec_fn, move=True,
                    queue=USER_QUEUE, routing_key=USER_ROUTING_KEY)
        metas.append(file_meta)
    return jsonify({'status': 'ok', 'metadata': metas})
Beispiel #13
0
def create():
    authz.require(authz.logged_in())
    alert = Alert.create(request_data(),
                         request.auth_role)
    db.session.commit()
    log_event(request)
    return view(alert.id)
Beispiel #14
0
def delete(id):
    entity = obj_or_404(Entity.by_id(id))
    check_authz(entity, authz.WRITE)
    delete_entity(entity)
    db.session.commit()
    log_event(request, entity_id=entity.id)
    return jsonify({'status': 'ok'})
Beispiel #15
0
def delete(id):
    entity = obj_or_404(Entity.by_id(id))
    check_authz(entity, authz.WRITE)
    delete_entity(entity)
    db.session.commit()
    log_event(request, entity_id=entity.id)
    return jsonify({'status': 'ok'})
Beispiel #16
0
def delete(id):
    authz.require(authz.logged_in())
    alert = obj_or_404(Alert.by_id(id, role=request.auth_role))
    alert.delete()
    db.session.commit()
    log_event(request)
    return jsonify({'status': 'ok'})
Beispiel #17
0
def update(document_id):
    document = get_document(document_id, action=request.authz.WRITE)
    data = request_data()
    document.update(data)
    db.session.commit()
    log_event(request, document_id=document.id)
    update_document(document)
    return view(document_id)
Beispiel #18
0
def export():
    state = QueryState(request.args, request.authz, limit=0)
    log_event(request)
    output = make_excel(get_results(state, 50000), FIELDS)
    return send_file(output,
                     mimetype=XLSX_MIME,
                     as_attachment=True,
                     attachment_filename='export.xlsx')
Beispiel #19
0
def update(id):
    authz.require(authz.collection_write(id))
    collection = obj_or_404(Collection.by_id(id))
    collection.update(request_data())
    db.session.add(collection)
    db.session.commit()
    log_event(request)
    return view(id)
Beispiel #20
0
def update(collection_id, id):
    collection = obj_or_404(Collection.by_id(collection_id))
    authz.require(authz.collection_write(collection_id))
    network = obj_or_404(Network.by_id_collection(id, collection))
    network.update(request_data())
    log_event(request)
    db.session.commit()
    return view(collection_id, network.id)
Beispiel #21
0
def export():
    query = documents_query(request.args)
    query = {'query': query['query']}
    log_event(request)
    limit = min(10000, get_limit(default=50))
    output = make_excel(get_results(query, limit), FIELDS)
    return send_file(output, mimetype=XLSX_MIME, as_attachment=True,
                     attachment_filename='export.xlsx')
Beispiel #22
0
def delete(collection_id, id):
    collection = obj_or_404(Collection.by_id(collection_id))
    authz.require(authz.collection_write(collection.id))
    network = obj_or_404(Network.by_id_collection(id, collection))
    network.delete()
    db.session.commit()
    log_event(request)
    return jsonify({'status': 'ok'})
Beispiel #23
0
def update(id):
    role = obj_or_404(Role.by_id(id))
    request.authz.require(request.authz.session_write())
    request.authz.require(role.id == request.authz.role.id)
    role.update(request_data())
    db.session.add(role)
    db.session.commit()
    log_event(request)
    return jsonify(role)
Beispiel #24
0
def create():
    request.authz.require(request.authz.logged_in)
    data = request_data()
    data['managed'] = False
    collection = Collection.create(data, request.authz.role)
    db.session.commit()
    update_collection(collection)
    log_event(request)
    return jsonify(collection)
Beispiel #25
0
def update(id):
    role = obj_or_404(Role.by_id(id))
    authz.require(authz.logged_in())
    authz.require(role.id == request.auth_role.id)
    role.update(request_data())
    db.session.add(role)
    db.session.commit()
    log_event(request)
    return jsonify(role)
Beispiel #26
0
def query():
    enable_cache(vary_user=True)
    state = QueryState(request.args, request.authz)
    result = documents_query(state)
    params = next_params(request.args, result)
    log_event(request)
    if params is not None:
        result['next'] = url_for('search_api.query', **params)
    return jsonify(result)
Beispiel #27
0
def update(id):
    collection = obj_or_404(Collection.by_id(id))
    request.authz.require(request.authz.collection_write(collection))
    collection.update(request_data())
    db.session.add(collection)
    db.session.commit()
    update_collection(collection)
    log_event(request)
    return view(id)
Beispiel #28
0
def update_collections(document_id):
    document = get_document(document_id)
    data = request_data()
    if not isinstance(data, list) or \
            False in [isinstance(d, int) for d in data]:
        raise BadRequest()
    document.update_collections(data, writeable=authz.collections(authz.WRITE))
    db.session.commit()
    log_event(request, document_id=document.id)
    update_document(document)
    return view_collections(document_id)
Beispiel #29
0
def update(document_id):
    document = get_document(document_id)
    # This is a special requirement for documents, so
    # they cannot escalate privs:
    authz.require(authz.collection_write(document.source_collection_id))
    data = request_data()
    document.update(data, writeable=authz.collections(authz.WRITE))
    db.session.commit()
    log_event(request, document_id=document.id)
    update_document(document)
    return view(document_id)
Beispiel #30
0
def merge(id, other_id):
    entity = obj_or_404(Entity.by_id(id))
    check_authz(entity, authz.WRITE)
    other = obj_or_404(Entity.by_id(other_id))
    check_authz(other, authz.WRITE)
    entity.merge(other)
    db.session.commit()
    update_entity(entity)
    update_entity(other)
    log_event(request, entity_id=entity.id)
    return view(entity.id)
Beispiel #31
0
def update_collections(document_id):
    document = get_document(document_id)
    data = request_data()
    if not isinstance(data, list) or \
            False in [isinstance(d, int) for d in data]:
        raise BadRequest()
    document.update_collections(data, writeable=authz.collections(authz.WRITE))
    db.session.commit()
    log_event(request, document_id=document.id)
    update_document(document)
    return view_collections(document_id)
Beispiel #32
0
def merge(id, other_id):
    entity = obj_or_404(Entity.by_id(id))
    check_authz(entity, authz.WRITE)
    other = obj_or_404(Entity.by_id(other_id))
    check_authz(other, authz.WRITE)
    entity.merge(other)
    db.session.commit()
    update_entity(entity)
    update_entity(other)
    log_event(request, entity_id=entity.id)
    return view(entity.id)
Beispiel #33
0
def update(document_id):
    document = get_document(document_id)
    # This is a special requirement for documents, so
    # they cannot escalate privs:
    authz.require(authz.collection_write(document.source_collection_id))
    data = request_data()
    document.update(data, writeable=authz.collections(authz.WRITE))
    db.session.commit()
    log_event(request, document_id=document.id)
    update_document(document)
    return view(document_id)
Beispiel #34
0
def file(document_id):
    document = get_document(document_id)
    enable_cache(server_side=True)
    log_event(request, document_id=document.id)
    url = get_archive().generate_url(document.meta)
    if url is not None:
        return redirect(url)

    local_path = get_archive().load_file(document.meta)
    fh = open(local_path, 'rb')
    return send_file(fh, as_attachment=True,
                     attachment_filename=document.meta.file_name,
                     mimetype=document.meta.mime_type)
Beispiel #35
0
def query():
    enable_cache(vary_user=True, vary=authz.collections(authz.READ))
    query = documents_query(request.args)
    query["size"] = get_limit(default=100)
    query["from"] = get_offset()
    # import json
    # print json.dumps(query, indent=2)
    result = execute_documents_query(request.args, query)
    params = next_params(request.args, result)
    log_event(request)
    if params is not None:
        result["next"] = url_for("search_api.query", **params)
    return jsonify(result)
Beispiel #36
0
def create():
    data = request_data()
    data.pop('id', None)
    data['collections'] = get_collections(data)
    for collection in data['collections']:
        authz.require(authz.collection_write(collection.id))
    entity = Entity.save(data)
    for collection in entity.collections:
        collection.touch()
    db.session.commit()
    log_event(request, entity_id=entity.id)
    update_entity(entity)
    return view(entity.id)
Beispiel #37
0
def query():
    enable_cache(vary_user=True, vary=authz.collections(authz.READ))
    query = documents_query(request.args)
    query['size'] = get_limit(default=100)
    query['from'] = get_offset()
    # import json
    # print json.dumps(query, indent=2)
    result = execute_documents_query(request.args, query)
    params = next_params(request.args, result)
    log_event(request)
    if params is not None:
        result['next'] = url_for('search_api.query', **params)
    return jsonify(result)
Beispiel #38
0
def query():
    graph = generate_graph(request.args)
    format = request.args.get('format', '').lower().strip()
    log_event(request)
    if format == 'gexf':
        sio = StringIO()
        nx.write_gexf(graph, sio)
        sio.seek(0)
        return send_file(sio, mimetype='application/xml')
    else:
        data = json_graph.node_link_data(graph)
        data['partial'] = graph.partial
        return jsonify(data)
Beispiel #39
0
def create():
    data = request_data()
    data.pop('id', None)
    data['collections'] = get_collections(data)
    for collection in data['collections']:
        authz.require(authz.collection_write(collection.id))
    entity = Entity.save(data)
    for collection in entity.collections:
        collection.touch()
    db.session.commit()
    log_event(request, entity_id=entity.id)
    update_entity(entity)
    return view(entity.id)
Beispiel #40
0
def permissions_update(collection):
    authz.require(authz.collection_write(collection))
    data = request_data()
    validate(data, "permission.json#")

    role = Role.all().filter(Role.id == data["role"]).first()
    if role is None:
        raise BadRequest()

    permission = Permission.grant_collection(collection, role, data["read"], data["write"])
    db.session.commit()
    log_event(request)
    return jsonify({"status": "ok", "updated": permission})
Beispiel #41
0
def permissions_update(collection):
    authz.require(authz.collection_write(collection))
    data = request_data()
    validate(data, 'permission.json#')

    role = Role.all().filter(Role.id == data['role']).first()
    if role is None:
        raise BadRequest()

    permission = Permission.grant_collection(collection, role, data['read'],
                                             data['write'])
    db.session.commit()
    log_event(request)
    return jsonify({'status': 'ok', 'updated': permission})
Beispiel #42
0
def permissions_update(collection):
    request.authz.require(request.authz.collection_write(collection))
    data = request_data()
    validate(data, 'permission.json#')

    role = Role.all().filter(Role.id == data['role_id']).first()
    collection = Collection.by_id(collection)
    if role is None or collection is None:
        raise BadRequest()
    request.authz.require(check_visible(role))

    perm = update_permission(role, collection, data['read'], data['write'])
    log_event(request)
    return jsonify({'status': 'ok', 'updated': perm})
Beispiel #43
0
def file(document_id):
    document = get_document(document_id)
    enable_cache(server_side=True)
    log_event(request, document_id=document.id)
    url = get_archive().generate_url(document.meta)
    if url is not None:
        return redirect(url)

    local_path = get_archive().load_file(document.meta)
    fh = open(local_path, 'rb')
    return send_file(fh,
                     as_attachment=True,
                     attachment_filename=document.meta.file_name,
                     mimetype=document.meta.mime_type)
Beispiel #44
0
def callback():
    resp = oauth_provider.authorized_response()
    if resp is None or isinstance(resp, OAuthException):
        log.warning("Failed OAuth: %r", resp)
        # FIXME: notify the user, somehow.
        return redirect('/')

    session['oauth'] = resp
    session['roles'] = [Role.system(Role.SYSTEM_USER)]
    signals.handle_oauth_session.send(provider=oauth_provider, session=session)
    db.session.commit()
    log_event(request, role_id=session['user'])
    log.info("Logged in: %r", session['user'])
    return redirect('/')
Beispiel #45
0
def view(document_id):
    doc = get_document(document_id)
    enable_cache()
    data = doc.to_dict()
    if doc.parent is not None:
        data['parent'] = doc.parent.to_dict()
    log_event(request, document_id=doc.id)
    data['data_url'] = archive.generate_url(doc.content_hash)
    if data['data_url'] is None:
        data['data_url'] = url_for('documents_api.file',
                                   document_id=document_id)
    if doc.pdf_version:
        data['pdf_url'] = url_for('documents_api.pdf', document_id=document_id)
    return jsonify(data)
Beispiel #46
0
def login(provider=None):
    if not provider:
        # by default use the first provider if none is requested,
        # which is a useful default if there's only one
        provider = oauth.remote_apps.keys()[0]

    oauth_provider = oauth.remote_apps.get(provider)
    if not oauth_provider:
        abort(404)

    log_event(request)
    session['next_url'] = extract_next_url(request)
    callback_url = url_for('.callback', provider=provider)
    return oauth_provider.authorize(callback=callback_url)
Beispiel #47
0
def merge(id, other_id):
    _, entity = get_entity(id, request.authz.WRITE)
    _, other = get_entity(other_id, request.authz.WRITE)

    try:
        entity.merge(other)
    except ValueError as ve:
        raise BadRequest(ve.message)

    db.session.commit()
    log_event(request, entity_id=entity.id)
    update_entity(entity)
    update_entity(other)
    return view(entity.id)
Beispiel #48
0
def update(id):
    entity = obj_or_404(Entity.by_id(id))
    check_authz(entity, authz.WRITE)
    data = request_data()
    data['id'] = entity.id
    possible_collections = authz.collections(authz.WRITE)
    possible_collections.extend([c.id for c in entity.collections])
    collections = [c for c in get_collections(data)
                   if c.id in possible_collections]
    entity = Entity.save(data, collections, merge=arg_bool('merge'))
    for collection in entity.collections:
        collection.touch()
    db.session.commit()
    log_event(request, entity_id=entity.id)
    update_entity(entity)
    return view(entity.id)
Beispiel #49
0
def create():
    data = request_data()
    data.pop("id", None)
    collections = get_collections(data)
    for collection in collections:
        authz.require(authz.collection_write(collection.id))

    try:
        entity = Entity.save(data, collections)
    except ValueError as ve:
        raise BadRequest(ve.message)
    for collection in entity.collections:
        collection.touch()
    db.session.commit()
    log_event(request, entity_id=entity.id)
    update_entity(entity)
    return view(entity.id)
Beispiel #50
0
def pdf(document_id):
    document = get_document(document_id)
    enable_cache(server_side=True)
    log_event(request, document_id=document.id)
    if document.type != Document.TYPE_TEXT:
        raise BadRequest("PDF is only available for text documents")
    pdf = document.meta.pdf
    url = get_archive().generate_url(pdf)
    if url is not None:
        return redirect(url)

    try:
        local_path = get_archive().load_file(pdf)
        fh = open(local_path, 'rb')
    except Exception as ex:
        raise NotFound("Missing PDF file: %r" % ex)
    return send_file(fh, mimetype=pdf.mime_type)
Beispiel #51
0
def update(id):
    entity = obj_or_404(Entity.by_id(id))
    check_authz(entity, authz.WRITE)
    data = request_data()
    data["id"] = entity.id
    possible_collections = authz.collections(authz.WRITE)
    possible_collections.extend([c.id for c in entity.collections])
    collections = [c for c in get_collections(data) if c.id in possible_collections]
    try:
        entity = Entity.save(data, collections, merge=arg_bool("merge"))
    except ValueError as ve:
        raise BadRequest(ve.message)
    for collection in entity.collections:
        collection.touch()
    db.session.commit()
    log_event(request, entity_id=entity.id)
    update_entity(entity)
    return view(entity.id)
Beispiel #52
0
def view(document_id):
    doc = get_document(document_id)
    enable_cache()
    data = doc.to_dict()
    log_event(request, document_id=doc.id)
    data['data_url'] = get_archive().generate_url(doc.meta)
    if data['data_url'] is None:
        data['data_url'] = url_for('documents_api.file',
                                   document_id=document_id)
    if doc.meta.is_pdf:
        data['pdf_url'] = data['data_url']
    else:
        try:
            data['pdf_url'] = get_archive().generate_url(doc.meta.pdf)
        except Exception as ex:
            log.info('Could not generate PDF url: %r', ex)
        if data.get('pdf_url') is None:
            data['pdf_url'] = url_for('documents_api.pdf',
                                      document_id=document_id)
    return jsonify(data)
Beispiel #53
0
def ingest_upload(collection_id):
    collection = obj_or_404(Collection.by_id(collection_id))
    authz.require(authz.collection_write(collection.id))
    log_event(request)
    try:
        meta = json.loads(request.form.get("meta", "{}"))
    except Exception as ex:
        raise BadRequest(unicode(ex))

    metas = []
    for storage in request.files.values():
        file_meta = meta.copy()
        file_meta["mime_type"] = storage.mimetype
        file_meta["file_name"] = storage.filename
        validate(file_meta, "metadata.json#")
        file_meta = Metadata.from_data(file_meta)
        file_meta.crawler_id = "user_upload:%s" % request.auth_role.id
        file_meta.crawler_run = make_textid()
        sec_fn = os.path.join(get_upload_folder(), secure_filename(storage.filename))
        storage.save(sec_fn)
        ingest_file(collection.id, file_meta, sec_fn, move=True)
        metas.append(file_meta)
    return jsonify({"status": "ok", "metadata": metas})
Beispiel #54
0
def post_edges():
    log_event(request)
    return jsonify(EdgeQuery(get_graph(), get_post_multidict()))
Beispiel #55
0
def get_edges():
    log_event(request)
    return jsonify(EdgeQuery(get_graph(), request.args))