def fillup_target(self, **filters): # fetch all alert summaries & alerts # with only a subset of the datum & jobs oldest_day = datetime.datetime.now() - self.time_window print('\nFetching data subset no older than {0}...'.format(str(oldest_day))) self.delete_local_data() alert_summaries = list(self.query_set) alert_summaries_len = len(alert_summaries) # close all database connections # new connections will be automatically opened in processes connections.close_all() processes_list = [] num_workers = min(self.num_workers, alert_summaries_len) for idx in range(num_workers): start_idx = int(idx*alert_summaries_len/num_workers) stop_idx = int((idx+1)*alert_summaries_len/num_workers) alerts = alert_summaries[start_idx:stop_idx] p = Process(target=self.db_worker, args=(idx+1, alerts)) processes_list.append(p) # start the processes for p in processes_list: p.start() # start the processes for p in processes_list: p.join() self.save_local_data()
def check_table_exists(model): conn = connections[model._meta.app_label] cursor = conn.cursor() table_names = [table_info.name for table_info in conn.introspection.get_table_list(cursor)] cursor.close() connections.close_all() return model._meta.db_table in table_names
def job_update_product_details(): call_command('update_product_details') create_countries() create_locales() # Django won't close db connections after call_command. Close them manually # to prevent errors in case the DB goes away, e.g. during a failover event. connections.close_all()
def convert(self,): """ Convert external service to gpkg. """ from eventkit_cloud.tasks.task_process import TaskProcess conf_dict, seed_configuration, mapproxy_configuration = self.get_check_config() # Customizations... mapproxy.seed.seeder.exp_backoff = get_custom_exp_backoff(max_repeat=int(conf_dict.get('max_repeat', 5))) mapproxy.cache.geopackage.GeopackageCache.load_tile_metadata = load_tile_metadata logger.info("Beginning seeding to {0}".format(self.gpkgfile)) try: auth_requests.patch_https(self.name) auth_requests.patch_mapproxy_opener_cache(slug=self.name) progress_store = get_progress_store(self.gpkgfile) progress_logger = CustomLogger(verbose=True, task_uid=self.task_uid, progress_store=progress_store) task_process = TaskProcess(task_uid=self.task_uid) task_process.start_process(billiard=True, target=seeder.seed, kwargs={"tasks": seed_configuration.seeds(['seed']), "concurrency": get_concurrency(conf_dict), "progress_logger": progress_logger}) check_zoom_levels(self.gpkgfile, mapproxy_configuration) remove_empty_zoom_levels(self.gpkgfile) set_gpkg_contents_bounds(self.gpkgfile, self.layer, self.bbox) if task_process.exitcode != 0: raise Exception("The Raster Service failed to complete, please contact an administrator.") except Exception: logger.error("Export failed for url {}.".format(self.service_url)) raise finally: connections.close_all() return self.gpkgfile
def get_migration_status(**options): # type: (**Any) -> str verbosity = options.get('verbosity', 1) for app_config in apps.get_app_configs(): if module_has_submodule(app_config.module, "management"): import_module('.management', app_config.name) app_labels = [options['app_label']] if options.get('app_label') else None db = options.get('database', DEFAULT_DB_ALIAS) out = StringIO() call_command( 'showmigrations', '--list', app_labels=app_labels, database=db, no_color=options.get('no_color', False), settings=options.get('settings', os.environ['DJANGO_SETTINGS_MODULE']), stdout=out, traceback=options.get('traceback', True), verbosity=verbosity, ) connections.close_all() out.seek(0) output = out.read() return re.sub('\x1b\[(1|0)m', '', output)
def run_from_argv(self, argv): """ Set up any environment changes requested (e.g., Python path and Django settings), then run this command. If the command raises a ``CommandError``, intercept it and print it sensibly to stderr. If the ``--traceback`` option is present or the raised ``Exception`` is not ``CommandError``, raise it. """ self._called_from_command_line = True parser = self.create_parser(argv[0], argv[1]) if self.use_argparse: options = parser.parse_args(argv[2:]) cmd_options = vars(options) # Move positional args out of options to mimic legacy optparse args = cmd_options.pop('args', ()) else: options, args = parser.parse_args(argv[2:]) cmd_options = vars(options) handle_default_options(options) try: self.execute(*args, **cmd_options) except Exception as e: if options.traceback or not isinstance(e, CommandError): raise # SystemCheckError takes care of its own formatting. if isinstance(e, SystemCheckError): self.stderr.write(str(e), lambda x: x) else: self.stderr.write('%s: %s' % (e.__class__.__name__, e)) sys.exit(1) finally: connections.close_all()
def _target(): try: with transaction.atomic(using='locking'): target() self._start.set() self._end.wait(30) finally: connections.close_all()
def database_exists(database_name, **options): # type: (text_type, **Any) -> bool db = options.get('database', DEFAULT_DB_ALIAS) connection = connections[db] with connection.cursor() as cursor: cursor.execute("SELECT 1 from pg_database WHERE datname='{}';".format(database_name)) return_value = bool(cursor.fetchone()) connections.close_all() return return_value
def database_exists(database_name: Text, **options: Any) -> bool: db = options.get('database', DEFAULT_DB_ALIAS) try: connection = connections[db] with connection.cursor() as cursor: cursor.execute("SELECT 1 from pg_database WHERE datname='{}';".format(database_name)) return_value = bool(cursor.fetchone()) connections.close_all() return return_value except OperationalError: return False
def init_worker(counter): # type: (Synchronized) -> None """ This function runs only under parallel mode. It initializes the individual processes which are also called workers. """ global _worker_id with counter.get_lock(): counter.value += 1 _worker_id = counter.value """ You can now use _worker_id. """ test_classes.API_KEYS = {} # Clear the cache from zerver.lib.cache import get_cache_backend cache = get_cache_backend(None) cache.clear() # Close all connections connections.close_all() destroy_test_databases(_worker_id) create_test_databases(_worker_id) # Every process should upload to a separate directory so that # race conditions can be avoided. settings.LOCAL_UPLOADS_DIR = '{}_{}'.format(settings.LOCAL_UPLOADS_DIR, _worker_id) def is_upload_avatar_url(url): # type: (RegexURLPattern) -> bool if url.regex.pattern == r'^user_avatars/(?P<path>.*)$': return True return False # We manually update the upload directory path in the url regex. from zproject import dev_urls found = False for url in dev_urls.urls: if is_upload_avatar_url(url): found = True new_root = os.path.join(settings.LOCAL_UPLOADS_DIR, "avatars") url.default_args['document_root'] = new_root if not found: print("*** Upload directory not found.")
def run_from_argv(self, argv): """ Override django's run_from_argv to parse a subcommand. If the subcommand is present, then mangle argv and defer to the subcommand """ self._called_from_command_line = True parser = self.create_parser(argv[0], argv[1]) try: # first find if we have a known subcommand known, __ = parser.parse_known_args(argv[2:]) except SubcommandsError: known = None is_subcommand = ( known and hasattr(known, "subcommand") and known.subcommand in self.subcommands) if is_subcommand: # if there is a subcommand then move it in argv to the front of other # params and defer to the the subcommand del argv[argv.index(known.subcommand)] argv[1] = "%s %s" % (argv[1], known.subcommand) return self.subcommands[known.subcommand]().run_from_argv(argv) # continue with the normal parsing/execution # and make subcommand optional for action in parser._actions: if action.dest == "subcommand": action.option_strings = [""] try: options = parser.parse_args(argv[2:]) except SubcommandsError as e: # we have to raise SystemExit here if necessary parser.print_usage(sys.stderr) return parser.exit(2, "%s\n" % e) cmd_options = vars(options) args = cmd_options.pop('args', ()) handle_default_options(options) try: self.execute(*args, **cmd_options) except Exception as e: if options.traceback or not isinstance(e, CommandError): raise e if isinstance(e, SystemCheckError): self.stderr.write(str(e), lambda x: x) else: self.stderr.write('%s: %s' % (e.__class__.__name__, e)) sys.exit(1) finally: connections.close_all()
def add_db(db_conf): app_label = 'al_' + str(uuid4()) settings.DATABASES[app_label] = db_conf router_class_name = 'Router' + app_label.capitalize() setattr( settings, router_class_name, type(router_class_name, (Router,), dict(app_label=app_label)), ) settings.DATABASE_ROUTERS.append( '.'.join([settings.__name__, router_class_name]) ) connections.close_all() return app_label
def run(self): """ Sets up the live server and databases, and then loops over handling http requests. """ if self.connections_override: # Override this thread's database connections with the ones # provided by the main thread. for alias, conn in self.connections_override.items(): connections[alias] = conn try: self.daphne = self._create_server() self.daphne.run() except Exception as e: self.error = e self.is_ready.set() finally: connections.close_all()
def spawn_process(self, process=None): """ spawn a new process """ if process is None: return False # start new child process pid = fork() if pid != 0: # parent process process.pid = pid self.PROCESSES[process.process_id] = process connections.close_all() return True # child process process.pid = getpid() # connection.connection.close() # connection.connection = None process.pre_init_process() process.init_process() process.run() sys.exit(0)
def get_messages(name, timestamp, limit, order, timeout): def _get_messages(): messages = ChannelMessage.objects.filter( name=name, destroy_time__gt=timezone.now(), created_time__gt=datetime.datetime.utcfromtimestamp( timestamp ).replace( tzinfo=utc, microsecond = int(timestamp * 1000000) % int(timestamp) ), ).order_by("%screated_time" % order)[:limit] return [{ "content": message.content, "timestamp": calendar.timegm( message.created_time.utctimetuple() ) + message.created_time.microsecond / 1000000.0, } for message in messages] messages = _get_messages() connections.close_all() # clean connections if not messages: sub = CHANNEL_REDIS.pubsub() try: sub.subscribe([name, ]) sub.get_message(True, timeout=timeout) while True: data = sub.get_message(timeout=timeout) if not data: break if data and data["type"] == 'message': messages.append(json.loads(data["data"].decode("utf-8"))) break finally: sub.close() return messages
def worker(queue): while True: # wait for a job bits = queue.get() # Check job type and do it. There are two types of jobs: do_update (which is called multiple times # during worker lifetime) and close (which is called before killing the process) if bits[0] == 'close': # Django makes sure that when new process/thread hits DB it gets a new connection # (or connections). Such connections won't be usable for other threads so let's close them. connections.close_all() queue.task_done() # mark job as done and exit the loop break elif bits[0] == 'do_update': func, model, start, end, total, using, start_date, end_date, verbosity, commit = bits unified_index = haystack_connections[using].get_unified_index() index = unified_index.get_index(model) backend = haystack_connections[using].get_backend() qs = index.build_queryset(start_date=start_date, end_date=end_date) do_update(backend, index, qs, start, end, total, verbosity=verbosity, commit=commit) queue.task_done() # mark job as done
def after_call(self, worker, result, exc): """Close all the connections on teardown TODO: Autocommit?? """ from django.db import connections connections.close_all()
def handle(self, *args, **options): # Pick up the options database = options['database'] if database not in settings.DATABASES: raise CommandError("No database settings known for '%s'" % database ) continuous = options['continuous'] # Use the test database if we are running the test suite if 'FREPPLE_TEST' in os.environ: connections[database].close() settings.DATABASES[database]['NAME'] = settings.DATABASES[database]['TEST']['NAME'] # Check if a worker already exists if checkActive(database): if 'FREPPLE_TEST' not in os.environ: logger.info("Worker for database '%s' already active" % settings.DATABASES[database]['NAME']) return # Spawn a worker-alive thread WorkerAlive(database).start() # Process the queue if 'FREPPLE_TEST' not in os.environ: logger.info("Worker %s for database '%s' starting to process jobs" % ( os.getpid(), settings.DATABASES[database]['NAME'] )) idle_loop_done = False setattr(_thread_locals, 'database', database) while True: try: task = Task.objects.all().using(database).filter(status='Waiting').order_by('id')[0] idle_loop_done = False except: # No more tasks found if continuous: time.sleep(5) continue else: # Special case: we need to permit a single idle loop before shutting down # the worker. If we shut down immediately, a newly launched task could think # that a worker is already running - while it just shut down. if idle_loop_done: break else: idle_loop_done = True time.sleep(5) continue try: if 'FREPPLE_TEST' not in os.environ: logger.info("Worker %s for database '%s' starting task %d at %s" % ( os.getpid(), settings.DATABASES[database]['NAME'], task.id, datetime.now() )) background = False task.started = datetime.now() # Verify the command exists exists = False for commandname in get_commands(): if commandname == task.name: exists = True break if not exists: # No such task exists logger.error('Task %s not recognized' % task.name) task.status = 'Failed' task.processid = None task.save(using=database) else: # Close all database connections to assure the parent and child # process don't share them. connections.close_all() # Spawn a new command process args = [] kwargs = { 'database': database, 'task': task.id, 'verbosity': 0 } background = 'background' in task.arguments if task.arguments else False if task.arguments: for i in shlex.split(task.arguments): if '=' in i: key, val = i.split('=') kwargs[key.strip("--").replace('-', '_')] = val else: args.append(i) child = Process( target=runCommand, args=(task.name, *args), kwargs=kwargs, name="frepplectl %s" % task.name ) child.start() # Normally, the child will update the processid. # Just to make sure, we do it also here. task.processid = child.pid task.save(update_fields=['processid'], using=database) # Wait for the child to finish child.join() # Read the task again from the database and update it task = Task.objects.all().using(database).get(pk=task.id) task.processid = None if task.status not in ('Done', 'Failed') or not task.finished or not task.started: now = datetime.now() if not task.started: task.started = now if not background: if not task.finished: task.finished = now if task.status not in ('Done', 'Failed'): task.status = 'Done' task.save(using=database) if 'FREPPLE_TEST' not in os.environ: logger.info("Worker %s for database '%s' finished task %d at %s: success" % ( os.getpid(), settings.DATABASES[database]['NAME'], task.id, datetime.now() )) except Exception as e: # Read the task again from the database and update. task = Task.objects.all().using(database).get(pk=task.id) task.status = 'Failed' now = datetime.now() if not task.started: task.started = now task.finished = now task.message = str(e) task.save(using=database) if 'FREPPLE_TEST' not in os.environ: logger.info("Worker %s for database '%s' finished task %d at %s: failed" % ( os.getpid(), settings.DATABASES[database]['NAME'], task.id, datetime.now() )) # Remove the parameter again try: Parameter.objects.all().using(database).get(pk='Worker alive').delete() except: pass setattr(_thread_locals, 'database', None) # Remove log files exceeding the configured disk space allocation totallogs = 0 filelist = [] for x in os.listdir(settings.FREPPLE_LOGDIR): if x.endswith('.log'): size = 0 creation = 0 filename = os.path.join(settings.FREPPLE_LOGDIR, x) # needs try/catch because log files may still be open or being used and Windows does not like it try: size = os.path.getsize(filename) creation = os.path.getctime(filename) filelist.append( {'name': filename, 'size': size, 'creation': creation} ) except: pass totallogs += size todelete = totallogs - settings.MAXTOTALLOGFILESIZE * 1024 * 1024 filelist.sort(key=operator.itemgetter('creation')) for fordeletion in filelist: if todelete > 0: try: os.remove(fordeletion['name']) todelete -= fordeletion['size'] except: pass # Exit if 'FREPPLE_TEST' not in os.environ: logger.info("Worker %s for database '%s' finished all jobs in the queue and exits" % ( os.getpid(), settings.DATABASES[database]['NAME'] ))
def worker_teardown(self, worker_ctx): from django.db import connections connections.close_all()
def backup(repo, progress=None, single=False): """Perform a backup This is usually called from Repository.backup() and is tightly integrated with the Repository class. It lives in its own module for organizational reasons. :type repo: backathon.repository.Repository :param progress: A callback function that provides status updates on the scan :param single: If this parameter is true, the backup process will all happen in a single thread. This can help with debugging and profiling. The progress callable takes two parameters: the backup count and backup total. """ if models.FSEntry.objects.using(repo.db).filter(new=True).exists(): # This happens when a new root is added but hasn't been scanned yet. raise RuntimeError("You need to run a scan first") to_backup = models.FSEntry.objects.using(repo.db).filter(obj__isnull=True) # The ready_to_backup set is the set of all nodes whose children have all # already been backed up. In other words, these are the entries that we # can back up right now. ready_to_backup = to_backup.exclude( # The sub query selects the *parents* of entries that are not yet # backed up. Therefore, we're excluding entries whose children are # not yet backed up. id__in=to_backup.exclude(parent__isnull=True).values("parent_id")) # The two above querysets remain unevaluated. We therefore get new results # on each call to .exists() below. Calls to .iterator() always return new # results. backup_total = to_backup.count() backup_count = 0 if single: executor = DummyExecutor() else: executor = concurrent.futures.ProcessPoolExecutor( max_workers=NUM_WORKERS, ) # SQLite connections should not be forked, according to the SQLite # documentation. Django and/or Python may have some protections # from this problem, but I'm not aware of any, so I'm taking caution and # closing all connections before forcing the process pool to immediately # launch the processes by submitting a dummy task. connections.close_all() executor.submit(time.time).result() tasks = set() contexts = ExitStack() with contexts: contexts.enter_context(executor) # Cancel all tasks that haven't been started yet def on_exit(): for t in tasks: t.cancel() contexts.callback(on_exit) def catch_sigint(exc_type, exc_value, traceback): if exc_type and issubclass(exc_type, KeyboardInterrupt): print() print("Ctrl-C caught. Finishing the current batch of " "uploads, please wait...") contexts.push(catch_sigint) while to_backup.exists(): ct = 0 last_checkpoint = time.monotonic() iterator = ready_to_backup.iterator() for entry_batch in batcher(iterator, BATCH_SIZE): ct += 1 # Assert our query is working correctly and that there are no # SQLite isolation problems (entries we've already backed up # re-appearing later in the same query) assert all(entry.obj_id is None for entry in entry_batch) tasks.add(executor.submit(backup_entry, repo, entry_batch)) # Don't put the entire to_backup result set in the queue at # once, to save memory. # If there are too many unfinished tasks, wait for one to # finish. if len(tasks) >= NUM_WORKERS + 1 or single: done, tasks = concurrent.futures.wait( tasks, timeout=None, return_when=concurrent.futures.FIRST_COMPLETED, ) for f in done: backup_count += f.result() if progress is not None: progress(backup_count, backup_total) # SQLite won't auto-checkpoint the write-ahead log while we # have the query iterator still open. So we force the inner # loop to exit every once in a while and force a WAL # checkpoint to keep the WAL from growing unbounded. if time.monotonic() - last_checkpoint > 30: # Note: closing the iterator should close the cursor # within it, but I think this is relying on reference # counted garbage collection. # If we run into problems, we'll have to find a different # strategy to run checkpoints iterator.close() with connections[repo.db].cursor() as cursor: cursor.execute("PRAGMA wal_checkpoint=RESTART") # Sanity check: if we entered the outer loop but the inner loop's # query didn't select anything, then we're not making progress and # may be caught in an infinite loop. In particular, this could happen # if we somehow got a cycle in the FSEntry tree in the database. # There would be entries needing backing up, but none of them have # all their dependent children backed up. assert ct > 0 # Collect results for the rest of the tasks. We have to do this # at the end of each inner loop to guarantee a correct ordering # to backed up entries. Items selected next loop could depend on # items still in process in the pool. # This stalls the workers but it doesn't end up costing all that # much time compared to time spent working. for f in concurrent.futures.as_completed(tasks): backup_count += f.result() if progress is not None: progress(backup_count, backup_total) tasks.clear() # End of outer "while" loop, and end of the contexts ExitStack. The # Executor is shut down at this point. # Now add the Snapshot object(s) to the database representing this backup # run. There's one snapshot per root, but we give them all the same datetime # so they can still be grouped together in queries. now = timezone.now() for root in models.FSEntry.objects.using( repo.db).filter(parent__isnull=True): assert root.obj_id is not None with atomic_immediate(using=repo.db): ss = models.Snapshot.objects.using(repo.db).create( path=root.path, root_id=root.obj_id, date=now, ) repo.put_snapshot(ss) with connections[repo.db].cursor() as cursor: cursor.execute("ANALYZE")
from balsam.launcher.util import get_tail, remaining_time_minutes from balsam.core.models import BalsamJob, safe_select, PROCESSABLE_STATES from django.conf import settings Queue = multiprocessing.Queue try: Queue().qsize() except NotImplementedError: from balsam.launcher.multi_queue_fallback import MyQueue Queue = MyQueue print("No queue.qsize support: will use fallback MyQueue implementation") SERIAL_CORES_PER_NODE = settings.SERIAL_CORES_PER_NODE SERIAL_HYPERTHREAD_STRIDE = settings.SERIAL_HYPERTHREAD_STRIDE logger = logging.getLogger('balsam.launcher.zmq_ensemble') connections.close_all() class StatusUpdater(multiprocessing.Process): def __init__(self): super().__init__() self.queue = Queue() def run(self): connections.close_all() signal.signal(signal.SIGINT, signal.SIG_IGN) signal.signal(signal.SIGTERM, signal.SIG_IGN) while True: first_item = self.queue.get(block=True, timeout=None) updates = [first_item] waited = False
def test_statements_pool_clear(self): prepared_qs = Book.objects.prepare() prepared_qs.execute() self.assertEqual(len(statements_pool), 1) connections.close_all() self.assertEqual(len(statements_pool), 0)
def multiprocessing_create_thirdparty_cache(thirdparty): connections.close_all() create_third_party_cache(thirdparty, True)
def runTask(task, database): task.started = datetime.now() # Verify the command exists exists = False for commandname in get_commands(): if commandname == task.name: exists = True break if not exists: # No such task exists logger.error("Task %s not recognized" % task.name) task.status = "Failed" task.processid = None task.save(using=database) else: # Close all database connections to assure the parent and child # process don't share them. connections.close_all() # Spawn a new command process args = [] kwargs = {"database": database, "task": task.id, "verbosity": 0} background = "background" in task.arguments if task.arguments else False if task.arguments: for i in shlex.split(task.arguments): if "=" in i: key, val = i.split("=") kwargs[key.strip("--").replace("-", "_")] = val else: args.append(i) child = Process( target=runCommand, args=(task.name, *args), kwargs=kwargs, name="frepplectl %s" % task.name, ) child.start() # Normally, the child will update the processid. # Just to make sure, we do it also here. task.processid = child.pid task.save(update_fields=["processid"], using=database) # Wait for the child to finish child.join() # Read the task again from the database and update it task = Task.objects.all().using(database).get(pk=task.id) task.processid = None if (task.status not in ("Done", "Failed") or not task.finished or not task.started): now = datetime.now() if not task.started: task.started = now if not background: if not task.finished: task.finished = now if task.status not in ("Done", "Failed"): task.status = "Done" task.save(using=database) if "FREPPLE_TEST" not in os.environ: logger.info( "Worker %s for database '%s' finished task %d at %s: success" % ( os.getpid(), settings.DATABASES[database]["NAME"], task.id, datetime.now(), ))
def validation(request): dc_prefix = 'datasets' ref_repfix = 'ref' data_initial_values = [{'filters': DataFilter.objects.filter(name='FIL_ALL_VALID_RANGE'), 'dataset': Dataset.objects.get(short_name=val_globals.C3S), }] ref_initial_values = {'filters': DataFilter.objects.filter(name='FIL_ALL_VALID_RANGE'), 'dataset': Dataset.objects.get(short_name=val_globals.ISMN), } if request.method == "POST": if Settings.load().maintenance_mode: __logger.info('Redirecting to the validation page because the system is in maintenance mode.') return redirect('validation') # formset for data configurations for our new validation dc_formset = DatasetConfigurationFormSet(request.POST, prefix=dc_prefix, initial=data_initial_values) ## apparently, a missing management form on the formset is a reason to throw a hissy fit err... ## ValidationError - instead of just appending it to dc_formset.non_form_errors. Whatever... try: dc_formset.is_valid() except ValidationError as e: __logger.exception(e) if e.code == 'missing_management_form': return HttpResponseBadRequest('Not a valid request: ' + e.message) # form for the reference configuration ref_dc_form = DatasetConfigurationForm(request.POST, prefix=ref_repfix, is_reference=True, initial=ref_initial_values) # form for the rest of the validation parameters val_form = ValidationRunForm(request.POST) if val_form.is_valid() and dc_formset.is_valid() and ref_dc_form.is_valid(): newrun = val_form.save(commit=False) newrun.user = request.user newrun.start_time = datetime.now(tzlocal()) if newrun.interval_from is not None: # truncate time newrun.interval_from = datetime(year=newrun.interval_from.year, month=newrun.interval_from.month, day=newrun.interval_from.day, tzinfo=newrun.interval_from.tzinfo) if newrun.interval_to is not None: # truncate time and go to 1 sec before midnight newrun.interval_to = datetime(year=newrun.interval_to.year, month=newrun.interval_to.month, day=newrun.interval_to.day, hour=23, minute=59, second=59, microsecond=999999, tzinfo=newrun.interval_to.tzinfo) newrun.save() # save the validation run run_id = newrun.id # attach all dataset configurations to the validation and save them for dc_form in dc_formset: dc = dc_form.save(commit=False) dc.validation = newrun dc.save() dc_form.save_m2m() # save many-to-many related objects, e.g. filters. If you don't do this, filters won't get saved! # also attach the reference config ref_dc = ref_dc_form.save(commit=False) ref_dc.validation = newrun ref_dc.save() ref_dc_form.save_m2m() # save many-to-many related objects, e.g. filters. If you don't do this, filters won't get saved! newrun.reference_configuration = ref_dc ## determine the scaling reference. For intercomparison, only the reference makes sense. Otherwise let the user pick. if ((len(dc_formset) == 1) and (val_form.cleaned_data['scaling_ref'] == ValidationRun.SCALE_TO_DATA)): newrun.scaling_ref = dc else: newrun.scaling_ref = ref_dc newrun.save() # need to close all db connections before forking, see # https://stackoverflow.com/questions/8242837/django-multiprocessing-and-database-connections/10684672#10684672 connections.close_all() p = Process(target=run_validation, kwargs={"validation_id": run_id}) p.start() return redirect('result', result_uuid=run_id) else: __logger.error("Errors in validation form {}\n{}\n{}".format(val_form.errors, dc_formset.errors, ref_dc_form.errors)) else: val_form = ValidationRunForm() dc_formset = DatasetConfigurationFormSet(prefix=dc_prefix, initial=data_initial_values) ref_dc_form = DatasetConfigurationForm(prefix=ref_repfix, is_reference=True, initial=ref_initial_values) # ref_dc_form. return render(request, 'validator/validate.html', {'val_form': val_form, 'dc_formset': dc_formset, 'ref_dc_form': ref_dc_form, 'maintenance_mode':Settings.load().maintenance_mode})
def import_business_data(self, file_format=None, business_data=None, mapping=None, overwrite='append', bulk=False, create_concepts=False, create_collections=False, use_multiprocessing=False): reader = None start = time() cursor = connection.cursor() try: if file_format == None: file_format = self.file_format if business_data == None: business_data = self.business_data if mapping == None: mapping = self.mapping if file_format == 'json': reader = ArchesFileReader() reader.import_business_data(business_data, mapping) elif file_format == 'jsonl': with open(self.file[0], 'rU') as openf: lines = openf.readlines() if use_multiprocessing is True: pool = Pool(cpu_count()) pool.map(import_one_resource, lines) connections.close_all() reader = ArchesFileReader() else: reader = ArchesFileReader() for line in lines: archesresource = JSONDeserializer().deserialize( line) reader.import_business_data( {"resources": [archesresource]}) elif file_format == 'csv' or file_format == 'shp' or file_format == 'zip': if mapping != None: reader = CsvReader() reader.import_business_data( business_data=business_data, mapping=mapping, overwrite=overwrite, bulk=bulk, create_concepts=create_concepts, create_collections=create_collections) else: print '*' * 80 print 'ERROR: No mapping file detected. Please indicate one with the \'-c\' paramater or place one in the same directory as your business data.' print '*' * 80 sys.exit() elapsed = (time() - start) print 'Time to import_business_data = {0}'.format( datetime.timedelta(seconds=elapsed)) reader.report_errors() finally: datatype_factory = DataTypeFactory() datatypes = DDataType.objects.all() for datatype in datatypes: datatype_instance = datatype_factory.get_instance( datatype.datatype) datatype_instance.after_update_all()
def handle(self, *args, **options): # Pick up the options database = options['database'] if database not in settings.DATABASES: raise CommandError("No database settings known for '%s'" % database) continuous = options['continuous'] # Use the test database if we are running the test suite if 'FREPPLE_TEST' in os.environ: connections[database].close() settings.DATABASES[database]['NAME'] = settings.DATABASES[ database]['TEST']['NAME'] # Check if a worker already exists if checkActive(database): if 'FREPPLE_TEST' not in os.environ: logger.info("Worker for database '%s' already active" % settings.DATABASES[database]['NAME']) return # Spawn a worker-alive thread WorkerAlive(database).start() # Process the queue if 'FREPPLE_TEST' not in os.environ: logger.info( "Worker %s for database '%s' starting to process jobs" % (os.getpid(), settings.DATABASES[database]['NAME'])) idle_loop_done = False setattr(_thread_locals, 'database', database) while True: try: task = Task.objects.all().using(database).filter( status='Waiting').order_by('id')[0] idle_loop_done = False except: # No more tasks found if continuous: time.sleep(5) continue else: # Special case: we need to permit a single idle loop before shutting down # the worker. If we shut down immediately, a newly launched task could think # that a worker is already running - while it just shut down. if idle_loop_done: break else: idle_loop_done = True time.sleep(5) continue try: if 'FREPPLE_TEST' not in os.environ: logger.info( "Worker %s for database '%s' starting task %d at %s" % (os.getpid(), settings.DATABASES[database]['NAME'], task.id, datetime.now())) background = False task.started = datetime.now() # Verify the command exists exists = False for commandname in get_commands(): if commandname == task.name: exists = True break if not exists: # No such task exists logger.error('Task %s not recognized' % task.name) task.status = 'Failed' task.processid = None task.save(using=database) else: # Close all database connections to assure the parent and child # process don't share them. connections.close_all() # Spawn a new command process args = [] kwargs = { 'database': database, 'task': task.id, 'verbosity': 0 } if task.arguments: for i in shlex.split(task.arguments): if '=' in i: key, val = i.split('=') kwargs[key.strip("--").replace('-', '_')] = val else: args.append(i) child = Process(target=runCommand, args=(task.name, *args), kwargs=kwargs, name="frepplectl %s" % task.name) child.start() background = 'background' in kwargs or '--background' in kwargs # Normally, the child will update the processid. # Just to make sure, we do it also here. task.processid = child.pid task.save(update_fields=['processid'], using=database) # Wait for the child to finish child.join() # Read the task again from the database and update it task = Task.objects.all().using(database).get(pk=task.id) task.processid = None if task.status not in ( 'Done', 'Failed') or not task.finished or not task.started: now = datetime.now() if not task.started: task.started = now if not background: if not task.finished: task.finished = now if task.status not in ('Done', 'Failed'): task.status = 'Done' task.save(using=database) if 'FREPPLE_TEST' not in os.environ: logger.info( "Worker %s for database '%s' finished task %d at %s: success" % (os.getpid(), settings.DATABASES[database]['NAME'], task.id, datetime.now())) except Exception as e: # Read the task again from the database and update. task = Task.objects.all().using(database).get(pk=task.id) task.status = 'Failed' now = datetime.now() if not task.started: task.started = now task.finished = now task.message = str(e) task.save(using=database) if 'FREPPLE_TEST' not in os.environ: logger.info( "Worker %s for database '%s' finished task %d at %s: failed" % (os.getpid(), settings.DATABASES[database]['NAME'], task.id, datetime.now())) # Remove the parameter again try: Parameter.objects.all().using(database).get( pk='Worker alive').delete() except: pass setattr(_thread_locals, 'database', None) # Remove log files exceeding the configured disk space allocation totallogs = 0 filelist = [] for x in os.listdir(settings.FREPPLE_LOGDIR): if x.endswith('.log'): size = 0 creation = 0 filename = os.path.join(settings.FREPPLE_LOGDIR, x) # needs try/catch because log files may still be open or being used and Windows does not like it try: size = os.path.getsize(filename) creation = os.path.getctime(filename) filelist.append({ 'name': filename, 'size': size, 'creation': creation }) except: pass totallogs += size todelete = totallogs - settings.MAXTOTALLOGFILESIZE * 1024 * 1024 filelist.sort(key=operator.itemgetter('creation')) for fordeletion in filelist: if todelete > 0: try: os.remove(fordeletion['name']) todelete -= fordeletion['size'] except: pass # Exit if 'FREPPLE_TEST' not in os.environ: logger.info( "Worker %s for database '%s' finished all jobs in the queue and exits" % (os.getpid(), settings.DATABASES[database]['NAME']))
def close_thread_connection(): # Close the connection that was created by # @database_sync_to_async. Assumes we're running in the same # thread that ran the database stuff. connections.close_all()
def on_done(future): # Because each thread has a db connection, we call close_all() when the thread is # terminated. This is needed because the thread are not managed by django here but # by us. connections.close_all()
def on_finish(self): connections.close_all()
def import_business_data( self, file_format=None, business_data=None, mapping=None, overwrite="append", bulk=False, create_concepts=False, create_collections=False, use_multiprocessing=False, prevent_indexing=False, transaction_id=None, ): reader = None start = time() cursor = connection.cursor() try: if file_format is None: file_format = self.file_format if business_data is None: business_data = self.business_data if mapping is None: mapping = self.mapping if file_format == "json": reader = ArchesFileReader() reader.import_business_data(business_data, mapping=mapping, overwrite=overwrite, prevent_indexing=prevent_indexing, transaction_id=transaction_id) elif file_format == "jsonl": with open(self.file[0], "rU") as openf: lines = openf.readlines() if use_multiprocessing is True: pool = Pool(cpu_count()) pool.map(import_one_resource, lines, prevent_indexing=prevent_indexing) connections.close_all() reader = ArchesFileReader() else: reader = ArchesFileReader() for line in lines: archesresource = JSONDeserializer().deserialize( line) reader.import_business_data( {"resources": [archesresource]}, overwrite=overwrite, prevent_indexing=prevent_indexing, transaction_id=transaction_id, ) elif file_format == "csv" or file_format == "shp" or file_format == "zip": if mapping is not None: reader = CsvReader() reader.import_business_data( business_data=business_data, mapping=mapping, overwrite=overwrite, bulk=bulk, create_concepts=create_concepts, create_collections=create_collections, prevent_indexing=prevent_indexing, transaction_id=transaction_id, ) else: print("*" * 80) print( f"ERROR: No mapping file detected for {self.file[0]}. Please indicate one \ with the '-c' paramater or place one in the same directory as your business data." ) print("*" * 80) elapsed = time() - start print("Time to import_business_data = {0}".format( datetime.timedelta(seconds=elapsed))) if reader is not None: reader.report_errors() finally: # cleans up the ResourceXResource table, adding any graph_id values that were unavailable during package/csv load for res_x_res in ResourceXResource.objects.filter( resourceinstanceto_graphid__isnull=True): # wrapping in a try allows for graceful handling of corrupted data try: res_x_res.resourceinstanceto_graphid = res_x_res.resourceinstanceidto.graph except: pass res_x_res.save() datatype_factory = DataTypeFactory() datatypes = DDataType.objects.all() for datatype in datatypes: try: datatype_instance = datatype_factory.get_instance( datatype.datatype) datatype_instance.after_update_all() except BrokenPipeError as e: logger = logging.getLogger(__name__) logger.info( "Celery not working: tasks unavailable during import.")
def _close_connections(self): # Used for mocking in tests. connections.close_all()
def job_syncgreenhouse(): call_command('sync_greenhouse') # Django won't close db connections after call_command. Close them manually # to prevent errors in case the DB goes away, e.g. during a failover event. connections.close_all()
def process_message(self, peer, mailfrom, rcpttos, data, **kwargs): # get a new db connection in case the old one has timed out: connections.close_all() result = _process_message(peer[0], mailfrom, rcpttos[0], data) self.stdout.write(result)
def become_daemon(**kwargs): # close all connections before forking, to avoid SQLite corruption: # https://www.sqlite.org/howtocorrupt.html#_carrying_an_open_database_connection_across_a_fork_ connections.close_all() _become_daemon_function(**kwargs)
def cleanup(): logger.info('closing all django database connections for this process') connections.close_all()
def handle(self, *args, **options): """ """ processes = options['processes'] begin = dt.now() try: review_ids = { 2008: [], 2009: [], 2010: [], 2011: [], 2012: [], 2013: [], 2014: [], 2015: [], 2016: [] } print("REVIEWS:") for i in review_ids.keys(): review_ids[i] = list(qs.query_by_year(i, 'review', ids=True)) print("\t{0}: {1}".format(str(i), str(len(review_ids[i])))) connections.close_all() comment_ids = { 2008: [], 2009: [], 2010: [], 2011: [], 2012: [], 2013: [], 2014: [], 2015: [], 2016: [] } message_ids = { 2008: [], 2009: [], 2010: [], 2011: [], 2012: [], 2013: [], 2014: [], 2015: [], 2016: [] } for year, ids in review_ids.items(): comment_ids[year] = list(qs.query_by_year(year, 'comment', ids=True)) connections.close_all() message_ids[year] = list(qs.query_by_year(year, 'message', ids=True)) connections.close_all() print("COMMENTS:") for k, v in comment_ids.items(): print("\t{0}: {1}".format(str(k), str(len(v)))) print("MESSAGES:") for k, v in message_ids.items(): print("\t{0}: {1}".format(str(k), str(len(v)))) comment_sentences_ids = { 2008: [], 2009: [], 2010: [], 2011: [], 2012: [], 2013: [], 2014: [], 2015: [], 2016: [] } message_sentences_ids = { 2008: [], 2009: [], 2010: [], 2011: [], 2012: [], 2013: [], 2014: [], 2015: [], 2016: [] } print("COMMENT_SENTENCES:") for year, ids in comment_ids.items(): comments = Comment.objects.filter(id__in=ids) connections.close_all() for c in comments: comment_sentences_ids[year] += list(c.sentences.values_list('id')) print("\t{0}: {1}".format(str(year), str(len(comment_sentences_ids[year])))) # for year, ids in comment_ids.items(): # comment_sentences_ids[year] = list(CommentSentences.objects.filter(comment_id__in=ids).values_list('sentence_id', flat=True)) # connections.close_all() # print("\t{0}: {1}".format(str(year), str(len(comment_sentences_ids[year])))) print("MESSAGE_SENTENCES:") for year, ids in message_ids.items(): messages = Message.objects.filter(id__in=ids) connections.close_all() for m in messages: message_sentences_ids[year] += list(m.sentences.values_list('id')) print("\t{0}: {1}".format(str(year), str(len(message_sentences_ids[year])))) # for year, ids, in message_ids.items(): # message_sentences_ids[year] = list(MessageSentences.objects.filter(message_id__in=ids).values_list('sentence_id', flat=True)) # connections.close_all() # print("\t{0}: {1}".format(str(year), str(len(message_sentences_ids[year])))) sentences = list(qs.query_all('sentence', ids=False).values_list('id', 'text')) connections.close_all() orphans = { 2008: [], 2009: [], 2010: [], 2011: [], 2012: [], 2013: [], 2014: [], 2015: [], 2016: [] } duplicates = { 2008: [], 2009: [], 2010: [], 2011: [], 2012: [], 2013: [], 2014: [], 2015: [], 2016: [] } for sentence in sentences: for year in review_ids.keys(): print("YEAR: {0}".format(str(year))) if sentence[0] not in comment_sentences_ids[year] and sentence[0] not in message_sentences_ids[year]: orphans[year].append(sentence[0]) elif sentence[0] in comment_sentences_ids[year] and sentence[0] in message_sentences_ids[year]: duplicates[year].append(sentence[0]) print("================") print("ORPHANS:") for year, ids in orphans.items(): print("\t{0}: {1}".format(str(year), str(len(ids)))) print("DUPLICATES:") for year, ids in duplicates.items(): print("\t{0}: {1}".format(str(year), str(len(ids)))) connections.close_all() except KeyboardInterrupt: logger.warning('Attempting to abort...') finally: logger.info('Time: {:.2f} minutes.' .format(helpers.get_elapsed(begin, dt.now())))
def run_on_schema( schema_name, executor_codename, command, function_name=None, args=None, kwargs=None, pass_schema_in_kwargs=False, fork_db=False, ): if args is None: args = [] if kwargs is None: kwargs = {} if not isinstance(command, BaseCommand): # Parallel executor needs to pass command 'type' instead of 'instance' # Therefore, no customizations for the command can be done, nor using custom stdout, stderr command = command() command.stdout = kwargs.pop("stdout", command.stdout) if not isinstance(command.stdout, OutputWrapper): command.stdout = OutputWrapper(command.stdout) command.stderr = kwargs.pop("stderr", command.stderr) if not isinstance(command.stderr, OutputWrapper): command.stderr = OutputWrapper(command.stderr) # Since we are prepending every output with the schema_name and executor, we need to determine # whether we need to do so based on the last ending used to write. If the last write didn't end # in '\n' then we don't do the prefixing in order to keep the output looking good. class StyleFunc: last_message = None def __call__(self, message): last_message = self.last_message self.last_message = message if last_message is None or last_message.endswith("\n"): return "[%s:%s] %s" % ( command.style.NOTICE(executor_codename), command.style.NOTICE(schema_name), message, ) return message command.stdout.style_func = StyleFunc() command.stderr.style_func = StyleFunc() if fork_db: connections.close_all() if schema_name in settings.TENANTS: domains = settings.TENANTS[schema_name].get("DOMAINS", []) schema = SchemaDescriptor.create( schema_name=schema_name, domain_url=domains[0] if domains else None) elif schema_name == get_clone_reference(): schema = SchemaDescriptor.create(schema_name=schema_name) else: TenantModel = get_tenant_model() schema = TenantModel.objects.get(schema_name=schema_name) activate(schema) if pass_schema_in_kwargs: kwargs.update({"schema_name": schema_name}) if function_name == "special:call_command": call_command(command, *args, **kwargs) elif function_name == "special:run_from_argv": command.run_from_argv(args) else: getattr(command, function_name)(*args, **kwargs) if fork_db: transaction.commit() connection.close() return schema_name
# -*- coding: utf-8 -*-
def background_send_notifications_to_users(event: Event): connections.close_all() LOG.info('Notification: backgrounded: about to send event "%s"', event) p = Process(target=send_notifications_to_users, args=(event, )) p.start() return p
def connwrap(p,*a,**k): try: return p(*a,**k) finally: connections.close_all()
def convert(self, ): """ Convert external service to gpkg. """ from ..tasks.task_process import TaskProcess from .geopackage import remove_empty_zoom_levels if self.config: conf_dict = yaml.load(self.config) else: conf_dict = create_conf_from_url(self.service_url) if not conf_dict.get('grids'): conf_dict['grids'] = { 'geodetic': { 'srs': 'EPSG:4326', 'tile_size': [256, 256], 'origin': 'nw' }, 'webmercator': { 'srs': 'EPSG:3857', 'tile_size': [256, 256], 'origin': 'nw' } } # If user provides a cache setup then use that and substitute in the geopackage file for the placeholder. conf_dict['caches'] = conf_dict.get('caches', {}) try: conf_dict['caches']['cache']['cache']['filename'] = self.gpkgfile except KeyError: conf_dict['caches']['cache'] = get_cache_template( ["{0}_{1}".format(self.layer, self.service_type)], [grids for grids in conf_dict.get('grids')], self.gpkgfile) # Prevent the service from failing if source has missing tiles. for source in conf_dict.get('sources'): if 'wmts' in source: conf_dict['sources'][source]['transparent'] = True conf_dict['sources'][source]['on_error'] = { "other": { "response": "transparent", "cache": False } } # disable SSL cert checks if getattr(settings, "DISABLE_SSL_VERIFICATION", False): conf_dict['globals'] = {'http': {'ssl_no_cert_checks': True}} # Add autoconfiguration to base_config # default = load_default_config() mapproxy_config = load_default_config() load_config(mapproxy_config, config_dict=conf_dict) # Create a configuration object mapproxy_configuration = ProxyConfiguration(mapproxy_config, seed=seed, renderd=None) # # As of Mapproxy 1.9.x, datasource files covering a small area cause a bbox error. if isclose(self.bbox[0], self.bbox[2], rel_tol=0.01) or isclose( self.bbox[0], self.bbox[2], rel_tol=0.01): logger.warn( 'Using bbox instead of selection, because the area is too small' ) self.selection = None seed_dict = get_seed_template(bbox=self.bbox, level_from=self.level_from, level_to=self.level_to, coverage_file=self.selection) # Create a seed configuration object seed_configuration = SeedingConfiguration( seed_dict, mapproxy_conf=mapproxy_configuration) logger.info("Beginning seeding to {0}".format(self.gpkgfile)) logger.error(mapproxy_config) try: check_service(conf_dict) progress_logger = CustomLogger(verbose=True, task_uid=self.task_uid) task_process = TaskProcess(task_uid=self.task_uid) task_process.start_process( billiard=True, target=seeder.seed, kwargs={ "tasks": seed_configuration.seeds(['seed']), "concurrency": int(getattr(settings, 'MAPPROXY_CONCURRENCY', 1)), "progress_logger": progress_logger }) remove_empty_zoom_levels(self.gpkgfile) except Exception as e: logger.error("Export failed for url {}.".format(self.service_url)) errors, informal_only = validate_options(mapproxy_config) if not informal_only: logger.error("MapProxy configuration failed.") logger.error("Using Configuration:") logger.error(mapproxy_config) errors, informal_only = validate_seed_conf(seed_dict) if not informal_only: logger.error("Mapproxy Seed failed.") logger.error("Using Seed Configuration:") logger.error(seed_dict) raise SeedConfigurationError( 'MapProxy seed configuration error - {}'.format( ', '.join(errors))) raise e finally: connections.close_all() return self.gpkgfile
def run(self): while True: try: # 清理所有任务 # self.scheduler.remove_all_jobs() log_common.warn('*********** 刷新调度器 **********') redis_jobs = self.scheduler.get_jobs() redis_job_ids = [rj.id for rj in redis_jobs] db_job_ids = [] script_models = CrawlScript.objects.filter(is_deleted=0, is_disable=0) for script_model in script_models: node_list = [] if not script_model.hosts or script_model.hosts == '[]': project = CrawlProject.objects.get(id=script_model.project_id) task = CrawlTask.objects.get(id=project.task_id) for node_id in json.loads(task.node_ids): node = CrawlNode.objects.get(id=node_id) node_list.append('{}:{}'.format(node.node_ip, node.node_port)) else: node_list = eval(script_model.hosts) json_args = [] if script_model.args: json_args = eval(script_model.args) for json_arg in json_args: script_args = json_arg["args"] script_triggers = json_arg["trigger"] fix_type = json_arg["fix_type"] try: if script_triggers: # 补数据逻辑 if fix_type in (1, 2, 3): run_date = json_arg['fix_date'] mix = "{}-{}".format(json.dumps(script_triggers), json.dumps(script_args)) job_id = "fix-{}-{}".format(str(script_model.id), md5(mix)) log_common.warn('添加补数据调度任务: {}'.format(script_model.id)) # 立即测试 # schedule_fix_data(node_list, script_model.project_name, script_model.name, script_model.id, script_args, job_id, fix_type) # 正常逻辑 db_job_ids.append(job_id) if datetime.datetime.strptime(run_date, '%Y-%m-%d %H:%M:%S') >= datetime.datetime.now() and job_id not in redis_job_ids: self.scheduler.add_job(schedule_fix_data, 'date', run_date=run_date, id=job_id, args=[node_list, script_model.project_name, script_model.name, script_model.id, script_args, job_id, fix_type], misfire_grace_time=60) else: # 动态参数 if json_arg.get('dynamic_value'): sql = json_arg.get('dynamic_value') result = db_kit.fetch_all_to_json(sql) for r in result: script_args['dynamic_value'] = r log_common.warn('>>>> 动态切割参数调度 {}, args: {}'.format(script_model.name, script_args)) mix = "{}-{}".format(json.dumps(script_triggers), json.dumps(script_args)) job_id = "{}-{}".format(str(script_model.id), md5(mix)) log_common.warn("args#{}".format(job_id)) crawl_redis.set("args#{}".format(job_id), json.dumps(script_args)) # log_common.warn('添加调度任务: {}'.format(script_model.id)) db_job_ids.append(job_id) if job_id not in redis_job_ids: self.scheduler.add_job(work_func, trigger="cron", **script_triggers, id=job_id, args=[node_list, script_model.project_name, script_model.name, job_id], misfire_grace_time=60) else: mix = "{}-{}".format(json.dumps(script_triggers), json.dumps(script_args)) job_id = "{}-{}".format(str(script_model.id), md5(mix)) crawl_redis.set("args#{}".format(job_id), json.dumps(script_args)) log_common.warn('添加调度任务: {}'.format(script_model.id)) db_job_ids.append(job_id) if job_id not in redis_job_ids: self.scheduler.add_job(work_func, trigger="cron", **script_triggers, id=job_id, args=[node_list, script_model.project_name, script_model.name, job_id], misfire_grace_time=60) except Exception as e: log_common.warn(">>>> 添加报错任务报错: ", e) continue c_ids = [i for i in redis_job_ids if i not in db_job_ids] for c_id in c_ids: self.scheduler.remove_job(c_id) log_common.warn('移除差异任务: {}'.format(c_id)) db_job_ids.clear() except Exception as ex: log_common.warn(ex) continue finally: connections.close_all() time.sleep(7 * 60)
def handle(self, *args, **options): """ """ processes = options['processes'] year = options['year'] begin = dt.now() try: info('loaddb Command') info(' Years: {}'.format(settings.YEARS)) if year != 0: settings.YEARS = [year] loader = loaders.BugLoader(settings, processes) count = loader.load() info(' {:,} bugs loaded'.format(count)) loader = loaders.VulnerabilityLoader(settings, processes) count = loader.load() info(' {:,} vulnerabilities loaded'.format(count)) loader = loaders.ReviewLoader(settings, processes) count = loader.load() info(' {:,} reviews loaded'.format(count)) tagger = taggers.MissedVulnerabilityTagger(settings, processes) count = tagger.tag() info(' {:,} reviews missed a vulnerability'.format(count)) if year != 0: ids = qs.query_by_year(year, 'review', True) else: ids = qs.query_all('review', True) connections.close_all() # Hack # Comments loader = loaders.CommentLoader(settings, processes, ids) count = loader.load() info(' {:,} comments loaded'.format(count)) connections.close_all() # Hack loader = loaders.SentenceCommentLoader(settings, processes, ids) count = loader.load() info(' {:,} sentences loaded'.format(count)) connections.close_all() # Hack tagger = taggers.UsefulCommentTagger(settings, processes, ids) count = tagger.tag() info(' {:,} comments were useful'.format(count)) # Messages connections.close_all() # Hack loader = loaders.MessageLoader(settings, processes, ids) count = loader.load() info(' {:,} messages loaded'.format(count)) connections.close_all() # Hack loader = loaders.SentenceMessageLoader(settings, processes, ids) count = loader.load() info(' {:,} sentences loaded'.format(count)) connections.close_all() # Hack # Tokens loader = loaders.TokenLoader(settings, processes, ids) count = loader.load() info(' {:,} tokens loaded'.format(count)) with connection.cursor() as cursor: cursor.execute( 'REFRESH MATERIALIZED VIEW {};'.format('vw_review_token')) cursor.execute( 'REFRESH MATERIALIZED VIEW {};'.format('vw_review_lemma')) except KeyboardInterrupt: # pragma: no cover warning('Attempting to abort.') finally: info('Time: {:.2f} mins'.format(get_elapsed(begin, dt.now())))
def new_test_client(): yield get_test_client connections.close_all()
def job_done(self, future): connections.close_all()
def multiprocessing_create_service_cache(service): connections.close_all() create_service_cache(service, True)
def _call_command_thread(self, options): stdout = self.call_command(**options) connections.close_all() return stdout
def _process_harvest_response(self, next_response: bytes) -> int: """ Processes the harvest response content While the last response is being processed, the next one is already loaded to decrease run time Args: response (bytes): The response as bytes Returns: number_found_entries (int): The amount of found metadata records in this response """ xml_response = xml_helper.parse_xml(next_response) if xml_response is None: csw_logger.error( "Response is no valid xml. catalogue: {}, startPosition: {}, maxRecords: {}" .format(self.metadata.title, self.start_position, self.max_records_per_request)) # Abort! self.start_position = 0 return md_metadata_entries = xml_helper.try_get_element_from_xml( "//" + GENERIC_NAMESPACE_TEMPLATE.format("MD_Metadata"), xml_response) or [] next_record_position = int( xml_helper.try_get_attribute_from_xml_element( xml_response, "nextRecord", "//" + GENERIC_NAMESPACE_TEMPLATE.format("SearchResults"), )) self.start_position = next_record_position # Fetch found identifiers in parent process, so self.deleted_metadata can be edited easily for md_identifier in md_metadata_entries: id = xml_helper.try_get_text_from_xml_element( md_identifier, ".//" + GENERIC_NAMESPACE_TEMPLATE.format("fileIdentifier") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("CharacterString")) try: self.deleted_metadata.remove(id) except KeyError: pass # Delete response to free memory del xml_response # Process response via multiple processes t_start = time() num_processes = int(cpu_count() / 2) num_processes = num_processes if num_processes >= 1 else 1 index_step = int(len(md_metadata_entries) / num_processes) start_index = 0 end_index = 0 self.resource_list = md_metadata_entries process_list = [] for i in range(0, num_processes): if index_step < 1: end_index = -1 else: end_index += index_step p = Process(target=self._create_metadata_from_md_metadata, args=(start_index, end_index)) start_index += index_step process_list.append(p) # Close all connections to force each process to create a new one for itself connections.close_all() execute_threads(process_list) csw_logger.debug( "Harvesting '{}': runtime for {} metadata parsing: {}s ####". format(self.metadata.title, self.max_records_per_request, time() - t_start)) return len(md_metadata_entries)
def import_business_data( self, file_format=None, business_data=None, mapping=None, overwrite="append", bulk=False, create_concepts=False, create_collections=False, use_multiprocessing=False, ): reader = None start = time() cursor = connection.cursor() try: if file_format is None: file_format = self.file_format if business_data is None: business_data = self.business_data if mapping is None: mapping = self.mapping if file_format == "json": reader = ArchesFileReader() reader.import_business_data(business_data, mapping) elif file_format == "jsonl": with open(self.file[0], "rU") as openf: lines = openf.readlines() if use_multiprocessing is True: pool = Pool(cpu_count()) pool.map(import_one_resource, lines) connections.close_all() reader = ArchesFileReader() else: reader = ArchesFileReader() for line in lines: archesresource = JSONDeserializer().deserialize(line) reader.import_business_data({"resources": [archesresource]}) elif file_format == "csv" or file_format == "shp" or file_format == "zip": if mapping is not None: reader = CsvReader() reader.import_business_data( business_data=business_data, mapping=mapping, overwrite=overwrite, bulk=bulk, create_concepts=create_concepts, create_collections=create_collections, ) else: print("*" * 80) print( "ERROR: No mapping file detected. Please indicate one \ with the '-c' paramater or place one in the same directory as your business data." ) print("*" * 80) sys.exit() elapsed = time() - start print("Time to import_business_data = {0}".format(datetime.timedelta(seconds=elapsed))) reader.report_errors() finally: datatype_factory = DataTypeFactory() datatypes = DDataType.objects.all() for datatype in datatypes: try: datatype_instance = datatype_factory.get_instance(datatype.datatype) datatype_instance.after_update_all() except BrokenPipeError as e: logger = logging.getLogger(__name__) logger.info("Celery not working: tasks unavailable during import.")
def setup_databases(self): res = super(PublicPrivateNoseTestSuiteRunner, self).setup_databases() connections.databases['root'] = copy(connections.databases['default']) connections.close_all() return res