Ejemplo n.º 1
0
def _normalize_document_uris_window(session, window):
    query = session.query(models.DocumentURI) \
        .filter(models.DocumentURI.updated.between(window.start, window.end)) \
        .order_by(models.DocumentURI.updated.asc())

    for docuri in query:
        documents = models.Document.find_by_uris(session, [docuri.uri])
        if documents.count() > 1:
            models.merge_documents(session, documents)

        existing = session.query(models.DocumentURI).filter(
            models.DocumentURI.id != docuri.id,
            models.DocumentURI.document_id == docuri.document_id,
            models.DocumentURI.claimant_normalized == uri.normalize(docuri.claimant),
            models.DocumentURI.uri_normalized == uri.normalize(docuri.uri),
            models.DocumentURI.type == docuri.type,
            models.DocumentURI.content_type == docuri.content_type)

        if existing.count() > 0:
            session.delete(docuri)
        else:
            docuri._claimant_normalized = uri.normalize(docuri.claimant)
            docuri._uri_normalized = uri.normalize(docuri.uri)

        session.flush()
Ejemplo n.º 2
0
def _normalize_document_uris_window(session, window):
    query = session.query(models.DocumentURI) \
        .filter(models.DocumentURI.updated.between(window.start, window.end)) \
        .order_by(models.DocumentURI.updated.asc())

    for docuri in query:
        documents = models.Document.find_by_uris(session, [docuri.uri])
        if documents.count() > 1:
            models.merge_documents(session, documents)

        existing = session.query(models.DocumentURI).filter(
            models.DocumentURI.id != docuri.id,
            models.DocumentURI.document_id == docuri.document_id,
            models.DocumentURI.claimant_normalized == uri.normalize(
                docuri.claimant),
            models.DocumentURI.uri_normalized == uri.normalize(docuri.uri),
            models.DocumentURI.type == docuri.type,
            models.DocumentURI.content_type == docuri.content_type)

        if existing.count() > 0:
            session.delete(docuri)
        else:
            docuri._claimant_normalized = uri.normalize(docuri.claimant)
            docuri._uri_normalized = uri.normalize(docuri.uri)

        session.flush()
Ejemplo n.º 3
0
def move_uri(ctx, old, new):
    """
    Move annotations and document equivalence data from one URL to another.

    This will **replace** the annotation's ``target_uri`` and all the
    document uri's ``claimant``, plus the matching ``uri`` for self-claim and
    canonical uris.
    """

    request = ctx.obj['bootstrap']()

    annotations = _fetch_annotations(request.db, old)
    docuris_claimant = _fetch_document_uri_claimants(request.db, old)
    docuris_uri = _fetch_document_uri_canonical_self_claim(request.db, old)

    prompt = (
        'Changing all annotations and document data matching:\n' +
        '"{old}"\nto:\n"{new}"\n' +
        'This will affect {ann_count} annotations, {doc_claimant} ' +
        'document uri claimants, and {doc_uri} document uri self-claims ' +
        'or canonical uris.\n' + 'Are you sure? [y/N]').format(
            old=old,
            new=new,
            ann_count=len(annotations),
            doc_claimant=len(docuris_claimant),
            doc_uri=len(docuris_uri))
    c = click.prompt(prompt, default='n', show_default=False)

    if c != 'y':
        print('Aborted')
        return

    for annotation in annotations:
        annotation.target_uri = new

    for docuri in docuris_claimant:
        docuri.claimant = new

    for docuri in docuris_uri:
        docuri.uri = new

    if annotations:
        indexer = BatchIndexer(request.db, request.es, request)
        ids = [a.id for a in annotations]
        indexer.index(ids)

    request.db.flush()

    documents = models.Document.find_by_uris(request.db, [new])
    if documents.count() > 1:
        merge_documents(request.db, documents)

    request.tm.commit()
Ejemplo n.º 4
0
Archivo: move_uri.py Proyecto: nlisgo/h
def move_uri(ctx, old, new):
    """
    Move annotations and document equivalence data from one URL to another.

    This will **replace** the annotation's ``target_uri`` and all the
    document uri's ``claimant``, plus the matching ``uri`` for self-claim and
    canonical uris.
    """

    request = ctx.obj['bootstrap']()

    annotations = _fetch_annotations(request.db, old)
    docuris_claimant = _fetch_document_uri_claimants(request.db, old)
    docuris_uri = _fetch_document_uri_canonical_self_claim(request.db, old)

    prompt = ('Changing all annotations and document data matching:\n' +
              '"{old}"\nto:\n"{new}"\n' +
              'This will affect {ann_count} annotations, {doc_claimant} ' +
              'document uri claimants, and {doc_uri} document uri self-claims ' +
              'or canonical uris.\n' +
              'Are you sure? [y/N]').format(old=old, new=new,
                                            ann_count=len(annotations),
                                            doc_claimant=len(docuris_claimant),
                                            doc_uri=len(docuris_uri))
    c = click.prompt(prompt, default='n', show_default=False)

    if c != 'y':
        print('Aborted')
        return

    for annotation in annotations:
        annotation.target_uri = new

    for docuri in docuris_claimant:
        docuri.claimant = new

    for docuri in docuris_uri:
        docuri.uri = new

    if annotations:
        indexer = BatchIndexer(request.db, request.es, request)
        ids = [a.id for a in annotations]
        indexer.index(ids)

    request.db.flush()

    documents = models.Document.find_by_uris(request.db, [new])
    if documents.count() > 1:
        merge_documents(request.db, documents)

    request.tm.commit()