def create_anno(self, row): datum = validate(row) document_dict = datum.pop('document') document_uri_dicts = document_dict['document_uri_dicts'] document_meta_dicts = document_dict['document_meta_dicts'] id = row['id'] target_uri = datum['target_uri'] created = row['created'] updated = row['updated'] annotation = models.Annotation(**datum) document = update_document_metadata( # TODO update normalization rules self.session, target_uri, document_meta_dicts, document_uri_dicts, created=created, # FIXME doesn't quite seem right, would klobber updated=updated) print(id) annotation.document = document annotation.id = id annotation.target_uri = target_uri annotation.created = created annotation.updated = updated self.session.add(annotation) self.session.flush() self.session.commit() # FIXME hypothesis doesn't call this
def make_anno(data, dbdocs): #document_uri_dicts = data['document']['document_uri_dicts'] #document_meta_dicts = data['document']['document_meta_dicts'] #del data['document'] #data = {k:v for k, v in data.items() if k != 'document'} # prevent overwrite on batch load annotation = models.Annotation( **data) # FIXME for batch the overhead here is stupid beyond belief annotation.document_id = dbdocs[uri_normalize(annotation.target_uri)].id #for k, v in data.items(): #print(k, v) #setattr(annotation, k, v) #id, created, updated = extra #annotation.id = id #annotation.created = created #annotation.updated = updated return annotation # this baby is super slow document = update_document_metadata(session, annotation.target_uri, document_meta_dicts, document_uri_dicts, created=created, updated=updated) annotation.document = document return annotation
def sync_anno_stream(self, search_after=None, stop_at=None): """ streaming one anno at a time version of sync """ for row in self.yield_from_api(search_after=last_updated, stop_at=stop_at): yield row, 'TODO' continue # TODO datum = validate(row) # roughly 30x slower than quickload # the h code I'm calling assumes these are new annos datum['id'] = row['id'] datum['created'] = row['created'] datum['updated'] = row['updated'] document_dict = datum.pop('document') document_uri_dicts = document_dict['document_uri_dicts'] document_meta_dicts = document_dict['document_meta_dicts'] a = [ models.Annotation(**d, document_id=dbdocs[uri_normalize( d['target_uri'])].id) for d in datas ] # slow self.log.debug('making annotations') self.session.add_all(a) self.log.debug('adding all annotations')
def merge_data(self, db_session, request): master = document.Document( document_uris=[ document.DocumentURI( claimant="https://en.wikipedia.org/wiki/Main_Page", uri="https://en.wikipedia.org/wiki/Main_Page", type="self-claim", ) ], meta=[ document.DocumentMeta( claimant="https://en.wikipedia.org/wiki/Main_Page", type="title", value="Wikipedia, the free encyclopedia", ) ], ) duplicate_1 = document.Document( document_uris=[ document.DocumentURI( claimant="https://m.en.wikipedia.org/wiki/Main_Page", uri="https://en.wikipedia.org/wiki/Main_Page", type="rel-canonical", ) ], meta=[ document.DocumentMeta( claimant="https://m.en.wikipedia.org/wiki/Main_Page", type="title", value="Wikipedia, the free encyclopedia", ) ], ) duplicate_2 = document.Document( document_uris=[ document.DocumentURI( claimant="https://en.wikipedia.org/wiki/Home", uri="https://en.wikipedia.org/wiki/Main_Page", type="rel-canonical", ) ], meta=[ document.DocumentMeta( claimant="https://en.wikipedia.org/wiki/Home", type="title", value="Wikipedia, the free encyclopedia", ) ], ) db_session.add_all([master, duplicate_1, duplicate_2]) db_session.flush() master_ann_1 = models.Annotation(userid="luke", document_id=master.id) master_ann_2 = models.Annotation(userid="alice", document_id=master.id) duplicate_1_ann_1 = models.Annotation(userid="lucy", document_id=duplicate_1.id) duplicate_1_ann_2 = models.Annotation(userid="bob", document_id=duplicate_1.id) duplicate_2_ann_1 = models.Annotation(userid="amy", document_id=duplicate_2.id) duplicate_2_ann_2 = models.Annotation(userid="dan", document_id=duplicate_2.id) db_session.add_all([ master_ann_1, master_ann_2, duplicate_1_ann_1, duplicate_1_ann_2, duplicate_2_ann_1, duplicate_2_ann_2, ]) return (master, duplicate_1, duplicate_2)
def annotation(self): return mock.Mock(spec=models.Annotation())