Example #1
0
    def create_anno(self, row):
        datum = validate(row)

        document_dict = datum.pop('document')
        document_uri_dicts = document_dict['document_uri_dicts']
        document_meta_dicts = document_dict['document_meta_dicts']

        id = row['id']
        target_uri = datum['target_uri']
        created = row['created']
        updated = row['updated']

        annotation = models.Annotation(**datum)

        document = update_document_metadata(  # TODO update normalization rules
            self.session,
            target_uri,
            document_meta_dicts,
            document_uri_dicts,
            created=created,  # FIXME doesn't quite seem right, would klobber
            updated=updated)

        print(id)
        annotation.document = document
        annotation.id = id
        annotation.target_uri = target_uri
        annotation.created = created
        annotation.updated = updated
        self.session.add(annotation)
        self.session.flush()
        self.session.commit()  # FIXME hypothesis doesn't call this
Example #2
0
def make_anno(data, dbdocs):
    #document_uri_dicts = data['document']['document_uri_dicts']
    #document_meta_dicts = data['document']['document_meta_dicts']
    #del data['document']
    #data = {k:v for k, v in data.items() if k != 'document'}  # prevent overwrite on batch load

    annotation = models.Annotation(
        **data)  # FIXME for batch the overhead here is stupid beyond belief
    annotation.document_id = dbdocs[uri_normalize(annotation.target_uri)].id
    #for k, v in data.items():
    #print(k, v)
    #setattr(annotation, k, v)
    #id, created, updated = extra
    #annotation.id = id
    #annotation.created = created
    #annotation.updated = updated

    return annotation

    # this baby is super slow
    document = update_document_metadata(session,
                                        annotation.target_uri,
                                        document_meta_dicts,
                                        document_uri_dicts,
                                        created=created,
                                        updated=updated)
    annotation.document = document

    return annotation
Example #3
0
 def sync_anno_stream(self, search_after=None, stop_at=None):
     """ streaming one anno at a time version of sync """
     for row in self.yield_from_api(search_after=last_updated,
                                    stop_at=stop_at):
         yield row, 'TODO'
         continue
         # TODO
         datum = validate(row)  # roughly 30x slower than quickload
         # the h code I'm calling assumes these are new annos
         datum['id'] = row['id']
         datum['created'] = row['created']
         datum['updated'] = row['updated']
         document_dict = datum.pop('document')
         document_uri_dicts = document_dict['document_uri_dicts']
         document_meta_dicts = document_dict['document_meta_dicts']
         a = [
             models.Annotation(**d,
                               document_id=dbdocs[uri_normalize(
                                   d['target_uri'])].id) for d in datas
         ]  # slow
         self.log.debug('making annotations')
         self.session.add_all(a)
         self.log.debug('adding all annotations')
Example #4
0
    def merge_data(self, db_session, request):
        master = document.Document(
            document_uris=[
                document.DocumentURI(
                    claimant="https://en.wikipedia.org/wiki/Main_Page",
                    uri="https://en.wikipedia.org/wiki/Main_Page",
                    type="self-claim",
                )
            ],
            meta=[
                document.DocumentMeta(
                    claimant="https://en.wikipedia.org/wiki/Main_Page",
                    type="title",
                    value="Wikipedia, the free encyclopedia",
                )
            ],
        )
        duplicate_1 = document.Document(
            document_uris=[
                document.DocumentURI(
                    claimant="https://m.en.wikipedia.org/wiki/Main_Page",
                    uri="https://en.wikipedia.org/wiki/Main_Page",
                    type="rel-canonical",
                )
            ],
            meta=[
                document.DocumentMeta(
                    claimant="https://m.en.wikipedia.org/wiki/Main_Page",
                    type="title",
                    value="Wikipedia, the free encyclopedia",
                )
            ],
        )
        duplicate_2 = document.Document(
            document_uris=[
                document.DocumentURI(
                    claimant="https://en.wikipedia.org/wiki/Home",
                    uri="https://en.wikipedia.org/wiki/Main_Page",
                    type="rel-canonical",
                )
            ],
            meta=[
                document.DocumentMeta(
                    claimant="https://en.wikipedia.org/wiki/Home",
                    type="title",
                    value="Wikipedia, the free encyclopedia",
                )
            ],
        )

        db_session.add_all([master, duplicate_1, duplicate_2])
        db_session.flush()

        master_ann_1 = models.Annotation(userid="luke", document_id=master.id)
        master_ann_2 = models.Annotation(userid="alice", document_id=master.id)
        duplicate_1_ann_1 = models.Annotation(userid="lucy",
                                              document_id=duplicate_1.id)
        duplicate_1_ann_2 = models.Annotation(userid="bob",
                                              document_id=duplicate_1.id)
        duplicate_2_ann_1 = models.Annotation(userid="amy",
                                              document_id=duplicate_2.id)
        duplicate_2_ann_2 = models.Annotation(userid="dan",
                                              document_id=duplicate_2.id)
        db_session.add_all([
            master_ann_1,
            master_ann_2,
            duplicate_1_ann_1,
            duplicate_1_ann_2,
            duplicate_2_ann_1,
            duplicate_2_ann_2,
        ])
        return (master, duplicate_1, duplicate_2)
Example #5
0
 def annotation(self):
     return mock.Mock(spec=models.Annotation())