def external_list_fixture(self): """ Loads a fixture with two papers and two users, with one user beeing subscribed to one paper, and the external lists loaded. Should probably be moved into a json file """ ExternalList(url=self.system["body"], last_update=old_date).save() ExternalList(url=self.body["person"], last_update=old_date).save() ExternalList(url=self.body["meeting"], last_update=old_date).save() ExternalList(url=self.body["organization"], last_update=old_date).save() ExternalList(url=self.body["paper"], last_update=old_date).save()
def fetch_list_initial(self, url: str) -> None: """Saves a complete external list as flattened json to the database""" logger.info(f"Fetching List {url}") timestamp = timezone.now() next_url = url all_objects = set() while next_url: logger.info(f"Fetching {next_url}") response = self.loader.load(next_url) objects = set() for element in response["data"]: externalized = externalize(element) for i in externalized: if not i.data.get("deleted") and i not in all_objects: objects.update(externalized) next_url = response["links"].get("next") # We can't have the that block outside the loop due to mysql's max_allowed_packet, manifesting # "MySQL server has gone away" https://stackoverflow.com/a/36637118/3549270 # We'll be able to solve this a lot better after the django 2.2 update with ignore_conflicts try: # Also avoid "MySQL server has gone away" errors due to timeouts # https://stackoverflow.com/a/32720475/3549270 db.close_old_connections() # The test are run with sqlite, which failed here with a TransactionManagementError: # "An error occurred in the current transaction. # You can't execute queries until the end of the 'atomic' block." # That's why we build our own atomic block if settings.TESTING: with transaction.atomic(): saved_objects = CachedObject.objects.bulk_create( objects) else: saved_objects = CachedObject.objects.bulk_create(objects) except IntegrityError: saved_objects = set() for i in objects: defaults = { "data": i.data, "to_import": True, "oparl_type": i.oparl_type, } saved_objects.add( CachedObject.objects.update_or_create( url=i.url, defaults=defaults)[0]) all_objects.update(saved_objects) logger.info(f"Found {len(all_objects)} objects in {url}") ExternalList(url=url, last_update=timestamp).save()