def work(self): """ Go through all datasources, and if it is configured to automatically delete old datasets, do so for all qualifying datasets :return: """ for datasource_id in self.all_modules.datasources: datasource = self.all_modules.datasources[datasource_id] # default = never expire if not datasource.get("expire-datasets", None): continue cutoff = time.time() - datasource.get("expire-datasets") datasets = self.db.fetchall( "SELECT key FROM datasets WHERE key_parent = '' AND parameters::json->>'datasource' = %s AND timestamp < %s", (datasource_id, cutoff)) # we instantiate the dataset, because its delete() method does all # the work (e.g. deleting child datasets) for us for dataset in datasets: dataset = DataSet(key=dataset["key"], db=self.db) dataset.delete() self.log.info( "Deleting dataset %s/%s (expired per configuration)" % (datasource, dataset.key)) self.job.finish()
cli.add_argument("-k", "--key", required=True, help="Dataset key to delete.") cli.add_argument( "-q", "--quiet", type=bool, default=False, help="Whether to skip asking for confirmation. Defaults to false.") args = cli.parse_args() if not args.quiet: confirm = input( "This will delete the dataset and any child datasets. Are you sure? (y/n)" ) if confirm.strip().lower() != "y": sys.exit(0) logger = Logger() database = Database(logger=logger, appname="delete-dataset") # Initialize query try: parent = DataSet(key=args.key, db=database) except TypeError: print("No dataset found with that key.") sys.exit(1) parent.delete() print( "Done. Note that running jobs for the datasets above are not stopped; you will have to wait for them to finish on their own." )