Example #1
0
    def fillup_target(self, **filters):
        # fetch all alert summaries & alerts
        # with only a subset of the datum & jobs
        oldest_day = datetime.datetime.now() - self.time_window
        print('\nFetching data subset no older than {0}...'.format(str(oldest_day)))

        self.delete_local_data()
        alert_summaries = list(self.query_set)
        alert_summaries_len = len(alert_summaries)

        # close all database connections
        # new connections will be automatically opened in processes
        connections.close_all()

        processes_list = []
        num_workers = min(self.num_workers, alert_summaries_len)
        for idx in range(num_workers):
            start_idx = int(idx*alert_summaries_len/num_workers)
            stop_idx = int((idx+1)*alert_summaries_len/num_workers)

            alerts = alert_summaries[start_idx:stop_idx]
            p = Process(target=self.db_worker, args=(idx+1, alerts))
            processes_list.append(p)

        # start the processes
        for p in processes_list:
            p.start()

        # start the processes
        for p in processes_list:
            p.join()

        self.save_local_data()
Example #2
0
def check_table_exists(model):
    conn = connections[model._meta.app_label]
    cursor = conn.cursor()
    table_names = [table_info.name for table_info in conn.introspection.get_table_list(cursor)]
    cursor.close()
    connections.close_all()
    return model._meta.db_table in table_names
Example #3
0
def job_update_product_details():
    call_command('update_product_details')
    create_countries()
    create_locales()
    # Django won't close db connections after call_command. Close them manually
    # to prevent errors in case the DB goes away, e.g. during a failover event.
    connections.close_all()
Example #4
0
    def convert(self,):
        """
        Convert external service to gpkg.
        """

        from eventkit_cloud.tasks.task_process import TaskProcess
        conf_dict, seed_configuration, mapproxy_configuration = self.get_check_config()
        #  Customizations...
        mapproxy.seed.seeder.exp_backoff = get_custom_exp_backoff(max_repeat=int(conf_dict.get('max_repeat', 5)))
        mapproxy.cache.geopackage.GeopackageCache.load_tile_metadata = load_tile_metadata
        logger.info("Beginning seeding to {0}".format(self.gpkgfile))
        try:
            auth_requests.patch_https(self.name)
            auth_requests.patch_mapproxy_opener_cache(slug=self.name)

            progress_store = get_progress_store(self.gpkgfile)
            progress_logger = CustomLogger(verbose=True, task_uid=self.task_uid, progress_store=progress_store)

            task_process = TaskProcess(task_uid=self.task_uid)
            task_process.start_process(billiard=True, target=seeder.seed,
                                       kwargs={"tasks": seed_configuration.seeds(['seed']),
                                               "concurrency": get_concurrency(conf_dict),
                                               "progress_logger": progress_logger})
            check_zoom_levels(self.gpkgfile, mapproxy_configuration)
            remove_empty_zoom_levels(self.gpkgfile)
            set_gpkg_contents_bounds(self.gpkgfile, self.layer, self.bbox)
            if task_process.exitcode != 0:
                raise Exception("The Raster Service failed to complete, please contact an administrator.")
        except Exception:
            logger.error("Export failed for url {}.".format(self.service_url))
            raise
        finally:
            connections.close_all()
        return self.gpkgfile
Example #5
0
def get_migration_status(**options):
    # type: (**Any) -> str
    verbosity = options.get('verbosity', 1)

    for app_config in apps.get_app_configs():
        if module_has_submodule(app_config.module, "management"):
            import_module('.management', app_config.name)

    app_labels = [options['app_label']] if options.get('app_label') else None
    db = options.get('database', DEFAULT_DB_ALIAS)
    out = StringIO()
    call_command(
        'showmigrations',
        '--list',
        app_labels=app_labels,
        database=db,
        no_color=options.get('no_color', False),
        settings=options.get('settings', os.environ['DJANGO_SETTINGS_MODULE']),
        stdout=out,
        traceback=options.get('traceback', True),
        verbosity=verbosity,
    )
    connections.close_all()
    out.seek(0)
    output = out.read()
    return re.sub('\x1b\[(1|0)m', '', output)
Example #6
0
File: base.py Project: keemy/django
    def run_from_argv(self, argv):
        """
        Set up any environment changes requested (e.g., Python path
        and Django settings), then run this command. If the
        command raises a ``CommandError``, intercept it and print it sensibly
        to stderr. If the ``--traceback`` option is present or the raised
        ``Exception`` is not ``CommandError``, raise it.
        """
        self._called_from_command_line = True
        parser = self.create_parser(argv[0], argv[1])

        if self.use_argparse:
            options = parser.parse_args(argv[2:])
            cmd_options = vars(options)
            # Move positional args out of options to mimic legacy optparse
            args = cmd_options.pop('args', ())
        else:
            options, args = parser.parse_args(argv[2:])
            cmd_options = vars(options)
        handle_default_options(options)
        try:
            self.execute(*args, **cmd_options)
        except Exception as e:
            if options.traceback or not isinstance(e, CommandError):
                raise

            # SystemCheckError takes care of its own formatting.
            if isinstance(e, SystemCheckError):
                self.stderr.write(str(e), lambda x: x)
            else:
                self.stderr.write('%s: %s' % (e.__class__.__name__, e))
            sys.exit(1)
        finally:
            connections.close_all()
Example #7
0
 def _target():
     try:
         with transaction.atomic(using='locking'):
             target()
             self._start.set()
             self._end.wait(30)
     finally:
         connections.close_all()
Example #8
0
def database_exists(database_name, **options):
    # type: (text_type, **Any) -> bool
    db = options.get('database', DEFAULT_DB_ALIAS)
    connection = connections[db]

    with connection.cursor() as cursor:
        cursor.execute("SELECT 1 from pg_database WHERE datname='{}';".format(database_name))
        return_value = bool(cursor.fetchone())
    connections.close_all()
    return return_value
Example #9
0
def database_exists(database_name: Text, **options: Any) -> bool:
    db = options.get('database', DEFAULT_DB_ALIAS)
    try:
        connection = connections[db]

        with connection.cursor() as cursor:
            cursor.execute("SELECT 1 from pg_database WHERE datname='{}';".format(database_name))
            return_value = bool(cursor.fetchone())
        connections.close_all()
        return return_value
    except OperationalError:
        return False
Example #10
0
def init_worker(counter):
    # type: (Synchronized) -> None
    """
    This function runs only under parallel mode. It initializes the
    individual processes which are also called workers.
    """
    global _worker_id

    with counter.get_lock():
        counter.value += 1
        _worker_id = counter.value

    """
    You can now use _worker_id.
    """

    test_classes.API_KEYS = {}

    # Clear the cache
    from zerver.lib.cache import get_cache_backend
    cache = get_cache_backend(None)
    cache.clear()

    # Close all connections
    connections.close_all()

    destroy_test_databases(_worker_id)
    create_test_databases(_worker_id)

    # Every process should upload to a separate directory so that
    # race conditions can be avoided.
    settings.LOCAL_UPLOADS_DIR = '{}_{}'.format(settings.LOCAL_UPLOADS_DIR,
                                                _worker_id)

    def is_upload_avatar_url(url):
        # type: (RegexURLPattern) -> bool
        if url.regex.pattern == r'^user_avatars/(?P<path>.*)$':
            return True
        return False

    # We manually update the upload directory path in the url regex.
    from zproject import dev_urls
    found = False
    for url in dev_urls.urls:
        if is_upload_avatar_url(url):
            found = True
            new_root = os.path.join(settings.LOCAL_UPLOADS_DIR, "avatars")
            url.default_args['document_root'] = new_root

    if not found:
        print("*** Upload directory not found.")
Example #11
0
 def run_from_argv(self, argv):
     """
     Override django's run_from_argv to parse a subcommand.
     If the subcommand is present, then mangle argv and defer to the
     subcommand
     """
     self._called_from_command_line = True
     parser = self.create_parser(argv[0], argv[1])
     try:
         # first find if we have a known subcommand
         known, __ = parser.parse_known_args(argv[2:])
     except SubcommandsError:
         known = None
     is_subcommand = (
         known
         and hasattr(known, "subcommand")
         and known.subcommand in self.subcommands)
     if is_subcommand:
         # if there is a subcommand then move it in argv to the front of other
         # params and defer to the the subcommand
         del argv[argv.index(known.subcommand)]
         argv[1] = "%s %s" % (argv[1], known.subcommand)
         return self.subcommands[known.subcommand]().run_from_argv(argv)
     # continue with the normal parsing/execution
     # and make subcommand optional
     for action in parser._actions:
         if action.dest == "subcommand":
             action.option_strings = [""]
     try:
         options = parser.parse_args(argv[2:])
     except SubcommandsError as e:
         # we have to raise SystemExit here if necessary
         parser.print_usage(sys.stderr)
         return parser.exit(2, "%s\n" % e)
     cmd_options = vars(options)
     args = cmd_options.pop('args', ())
     handle_default_options(options)
     try:
         self.execute(*args, **cmd_options)
     except Exception as e:
         if options.traceback or not isinstance(e, CommandError):
             raise e
         if isinstance(e, SystemCheckError):
             self.stderr.write(str(e), lambda x: x)
         else:
             self.stderr.write('%s: %s' % (e.__class__.__name__, e))
         sys.exit(1)
     finally:
         connections.close_all()
Example #12
0
def add_db(db_conf):
    app_label = 'al_' + str(uuid4())

    settings.DATABASES[app_label] = db_conf

    router_class_name = 'Router' + app_label.capitalize()
    setattr(
        settings,
        router_class_name,
        type(router_class_name, (Router,), dict(app_label=app_label)),
    )
    settings.DATABASE_ROUTERS.append(
        '.'.join([settings.__name__, router_class_name])
    )
    connections.close_all()
    return app_label
Example #13
0
 def run(self):
     """
     Sets up the live server and databases, and then loops over handling
     http requests.
     """
     if self.connections_override:
         # Override this thread's database connections with the ones
         # provided by the main thread.
         for alias, conn in self.connections_override.items():
             connections[alias] = conn
     try:
         self.daphne = self._create_server()
         self.daphne.run()
     except Exception as e:
         self.error = e
         self.is_ready.set()
     finally:
         connections.close_all()
Example #14
0
 def spawn_process(self, process=None):
     """
     spawn a new process
     """
     if process is None:
         return False
     # start new child process
     pid = fork()
     if pid != 0:
         # parent process
         process.pid = pid
         self.PROCESSES[process.process_id] = process
         connections.close_all()
         return True
     # child process
     process.pid = getpid()
     # connection.connection.close()
     # connection.connection = None
     process.pre_init_process()
     process.init_process()
     process.run()
     sys.exit(0)
Example #15
0
def get_messages(name, timestamp, limit, order, timeout):
    def _get_messages():

        messages =  ChannelMessage.objects.filter(
            name=name,
            destroy_time__gt=timezone.now(),
            created_time__gt=datetime.datetime.utcfromtimestamp(
                timestamp
            ).replace(
                tzinfo=utc,
                microsecond = int(timestamp * 1000000) % int(timestamp)
            ),
        ).order_by("%screated_time" % order)[:limit]
        return [{
            "content": message.content,
            "timestamp": calendar.timegm(
                message.created_time.utctimetuple()
            ) + message.created_time.microsecond / 1000000.0,
        } for message in messages]
    messages = _get_messages()
    connections.close_all()  # clean connections
    if not messages:
        sub = CHANNEL_REDIS.pubsub()
        try:
            sub.subscribe([name, ])
            sub.get_message(True, timeout=timeout)
            while True:
                data = sub.get_message(timeout=timeout)
                if not data:
                    break
                if data and data["type"] == 'message':
                    messages.append(json.loads(data["data"].decode("utf-8")))
                    break
        finally:
            sub.close()
    return messages
def worker(queue):
    while True:
        # wait for a job
        bits = queue.get()

        # Check job type and do it. There are two types of jobs: do_update (which is called multiple times
        # during worker lifetime) and close (which is called before killing the process)
        if bits[0] == 'close':
            # Django makes sure that when new process/thread hits DB it gets a new connection
            # (or connections). Such connections won't be usable for other threads so let's close them.
            connections.close_all()
            queue.task_done()  # mark job as done and exit the loop
            break
        elif bits[0] == 'do_update':
            func, model, start, end, total, using, start_date, end_date, verbosity, commit = bits

            unified_index = haystack_connections[using].get_unified_index()
            index = unified_index.get_index(model)
            backend = haystack_connections[using].get_backend()

            qs = index.build_queryset(start_date=start_date, end_date=end_date)
            do_update(backend, index, qs, start, end, total, verbosity=verbosity, commit=commit)

        queue.task_done()  # mark job as done
Example #17
0
 def after_call(self, worker, result, exc):
     """Close all the connections on teardown
     TODO: Autocommit??
     """
     from django.db import connections
     connections.close_all()
Example #18
0
  def handle(self, *args, **options):

    # Pick up the options
    database = options['database']
    if database not in settings.DATABASES:
      raise CommandError("No database settings known for '%s'" % database )
    continuous = options['continuous']

    # Use the test database if we are running the test suite
    if 'FREPPLE_TEST' in os.environ:
      connections[database].close()
      settings.DATABASES[database]['NAME'] = settings.DATABASES[database]['TEST']['NAME']

    # Check if a worker already exists
    if checkActive(database):
      if 'FREPPLE_TEST' not in os.environ:
        logger.info("Worker for database '%s' already active" % settings.DATABASES[database]['NAME'])
      return

    # Spawn a worker-alive thread
    WorkerAlive(database).start()

    # Process the queue
    if 'FREPPLE_TEST' not in os.environ:
      logger.info("Worker %s for database '%s' starting to process jobs" % (
        os.getpid(), settings.DATABASES[database]['NAME']
        ))
    idle_loop_done = False
    setattr(_thread_locals, 'database', database)
    while True:
      try:
        task = Task.objects.all().using(database).filter(status='Waiting').order_by('id')[0]
        idle_loop_done = False
      except:
        # No more tasks found
        if continuous:
          time.sleep(5)
          continue
        else:
          # Special case: we need to permit a single idle loop before shutting down
          # the worker. If we shut down immediately, a newly launched task could think
          # that a worker is already running - while it just shut down.
          if idle_loop_done:
            break
          else:
            idle_loop_done = True
            time.sleep(5)
            continue
      try:
        if 'FREPPLE_TEST' not in os.environ:
          logger.info("Worker %s for database '%s' starting task %d at %s" % (
            os.getpid(), settings.DATABASES[database]['NAME'], task.id, datetime.now()
            ))
        background = False
        task.started = datetime.now()
        # Verify the command exists
        exists = False
        for commandname in get_commands():
          if commandname == task.name:
            exists = True
            break

        if not exists:
          # No such task exists
          logger.error('Task %s not recognized' % task.name)
          task.status = 'Failed'
          task.processid = None
          task.save(using=database)
        else:
          # Close all database connections to assure the parent and child
          # process don't share them.
          connections.close_all()
          # Spawn a new command process
          args = []
          kwargs = {
            'database': database,
            'task': task.id,
            'verbosity': 0
            }
          background = 'background' in task.arguments if task.arguments else False
          if task.arguments:
            for i in shlex.split(task.arguments):
              if '=' in i:
                key, val = i.split('=')
                kwargs[key.strip("--").replace('-', '_')] = val
              else:
                args.append(i)
          child = Process(
            target=runCommand,
            args=(task.name, *args),
            kwargs=kwargs,
            name="frepplectl %s" % task.name
            )
          child.start()

          # Normally, the child will update the processid.
          # Just to make sure, we do it also here.
          task.processid = child.pid
          task.save(update_fields=['processid'], using=database)

          # Wait for the child to finish
          child.join()

          # Read the task again from the database and update it
          task = Task.objects.all().using(database).get(pk=task.id)
          task.processid = None
          if task.status not in ('Done', 'Failed') or not task.finished or not task.started:
            now = datetime.now()
            if not task.started:
              task.started = now
            if not background:
              if not task.finished:
                task.finished = now
              if task.status not in ('Done', 'Failed'):
                task.status = 'Done'
            task.save(using=database)
          if 'FREPPLE_TEST' not in os.environ:
            logger.info("Worker %s for database '%s' finished task %d at %s: success" % (
              os.getpid(), settings.DATABASES[database]['NAME'], task.id, datetime.now()
              ))
      except Exception as e:
        # Read the task again from the database and update.
        task = Task.objects.all().using(database).get(pk=task.id)
        task.status = 'Failed'
        now = datetime.now()
        if not task.started:
          task.started = now
        task.finished = now
        task.message = str(e)
        task.save(using=database)
        if 'FREPPLE_TEST' not in os.environ:
          logger.info("Worker %s for database '%s' finished task %d at %s: failed" % (
            os.getpid(), settings.DATABASES[database]['NAME'], task.id, datetime.now()
            ))
    # Remove the parameter again
    try:
      Parameter.objects.all().using(database).get(pk='Worker alive').delete()
    except:
      pass
    setattr(_thread_locals, 'database', None)

    # Remove log files exceeding the configured disk space allocation
    totallogs = 0
    filelist = []
    for x in os.listdir(settings.FREPPLE_LOGDIR):
      if x.endswith('.log'):
        size = 0
        creation = 0
        filename = os.path.join(settings.FREPPLE_LOGDIR, x)
        # needs try/catch because log files may still be open or being used and Windows does not like it
        try:
          size = os.path.getsize(filename)
          creation = os.path.getctime(filename)
          filelist.append( {'name': filename, 'size': size, 'creation': creation} )
        except:
          pass
        totallogs += size
    todelete = totallogs - settings.MAXTOTALLOGFILESIZE * 1024 * 1024
    filelist.sort(key=operator.itemgetter('creation'))
    for fordeletion in filelist:
      if todelete > 0:
        try:
          os.remove(fordeletion['name'])
          todelete -= fordeletion['size']
        except:
          pass

    # Exit
    if 'FREPPLE_TEST' not in os.environ:
      logger.info("Worker %s for database '%s' finished all jobs in the queue and exits" % (
        os.getpid(), settings.DATABASES[database]['NAME']
        ))
Example #19
0
    def worker_teardown(self, worker_ctx):
        from django.db import connections

        connections.close_all()
Example #20
0
def backup(repo, progress=None, single=False):
    """Perform a backup

    This is usually called from Repository.backup() and is tightly integrated
    with the Repository class. It lives in its own module for organizational
    reasons.

    :type repo: backathon.repository.Repository
    :param progress: A callback function that provides status updates on the
        scan
    :param single: If this parameter is true, the backup process will all
        happen in a single thread. This can help with debugging and profiling.

    The progress callable takes two parameters: the backup count and backup
    total.
    """
    if models.FSEntry.objects.using(repo.db).filter(new=True).exists():
        # This happens when a new root is added but hasn't been scanned yet.
        raise RuntimeError("You need to run a scan first")

    to_backup = models.FSEntry.objects.using(repo.db).filter(obj__isnull=True)

    # The ready_to_backup set is the set of all nodes whose children have all
    # already been backed up. In other words, these are the entries that we
    # can back up right now.
    ready_to_backup = to_backup.exclude(
        # The sub query selects the *parents* of entries that are not yet
        # backed up. Therefore, we're excluding entries whose children are
        # not yet backed up.
        id__in=to_backup.exclude(parent__isnull=True).values("parent_id"))

    # The two above querysets remain unevaluated. We therefore get new results
    # on each call to .exists() below. Calls to .iterator() always return new
    # results.

    backup_total = to_backup.count()
    backup_count = 0

    if single:
        executor = DummyExecutor()
    else:
        executor = concurrent.futures.ProcessPoolExecutor(
            max_workers=NUM_WORKERS, )
        # SQLite connections should not be forked, according to the SQLite
        # documentation. Django and/or Python may have some protections
        # from this problem, but I'm not aware of any, so I'm taking caution and
        # closing all connections before forcing the process pool to immediately
        # launch the processes by submitting a dummy task.
        connections.close_all()
        executor.submit(time.time).result()

    tasks = set()

    contexts = ExitStack()
    with contexts:
        contexts.enter_context(executor)

        # Cancel all tasks that haven't been started yet
        def on_exit():
            for t in tasks:
                t.cancel()

        contexts.callback(on_exit)

        def catch_sigint(exc_type, exc_value, traceback):
            if exc_type and issubclass(exc_type, KeyboardInterrupt):
                print()
                print("Ctrl-C caught. Finishing the current batch of "
                      "uploads, please wait...")

        contexts.push(catch_sigint)

        while to_backup.exists():

            ct = 0
            last_checkpoint = time.monotonic()

            iterator = ready_to_backup.iterator()
            for entry_batch in batcher(iterator, BATCH_SIZE):
                ct += 1

                # Assert our query is working correctly and that there are no
                # SQLite isolation problems (entries we've already backed up
                # re-appearing later in the same query)
                assert all(entry.obj_id is None for entry in entry_batch)

                tasks.add(executor.submit(backup_entry, repo, entry_batch))

                # Don't put the entire to_backup result set in the queue at
                # once, to save memory.
                # If there are too many unfinished tasks, wait for one to
                # finish.
                if len(tasks) >= NUM_WORKERS + 1 or single:
                    done, tasks = concurrent.futures.wait(
                        tasks,
                        timeout=None,
                        return_when=concurrent.futures.FIRST_COMPLETED,
                    )

                    for f in done:
                        backup_count += f.result()
                        if progress is not None:
                            progress(backup_count, backup_total)

                # SQLite won't auto-checkpoint the write-ahead log while we
                # have the query iterator still open. So we force the inner
                # loop to exit every once in a while and force a WAL
                # checkpoint to keep the WAL from growing unbounded.
                if time.monotonic() - last_checkpoint > 30:
                    # Note: closing the iterator should close the cursor
                    # within it, but I think this is relying on reference
                    # counted garbage collection.
                    # If we run into problems, we'll have to find a different
                    # strategy to run checkpoints
                    iterator.close()
                    with connections[repo.db].cursor() as cursor:
                        cursor.execute("PRAGMA wal_checkpoint=RESTART")

            # Sanity check: if we entered the outer loop but the inner loop's
            # query didn't select anything, then we're not making progress and
            # may be caught in an infinite loop. In particular, this could happen
            # if we somehow got a cycle in the FSEntry tree in the database.
            # There would be entries needing backing up, but none of them have
            # all their dependent children backed up.
            assert ct > 0

            # Collect results for the rest of the tasks. We have to do this
            # at the end of each inner loop to guarantee a correct ordering
            # to backed up entries. Items selected next loop could depend on
            # items still in process in the pool.
            # This stalls the workers but it doesn't end up costing all that
            # much time compared to time spent working.
            for f in concurrent.futures.as_completed(tasks):
                backup_count += f.result()
                if progress is not None:
                    progress(backup_count, backup_total)
            tasks.clear()

    # End of outer "while" loop, and end of the contexts ExitStack. The
    # Executor is shut down at this point.

    # Now add the Snapshot object(s) to the database representing this backup
    # run. There's one snapshot per root, but we give them all the same datetime
    # so they can still be grouped together in queries.
    now = timezone.now()
    for root in models.FSEntry.objects.using(
            repo.db).filter(parent__isnull=True):
        assert root.obj_id is not None
        with atomic_immediate(using=repo.db):
            ss = models.Snapshot.objects.using(repo.db).create(
                path=root.path,
                root_id=root.obj_id,
                date=now,
            )
            repo.put_snapshot(ss)

    with connections[repo.db].cursor() as cursor:
        cursor.execute("ANALYZE")
Example #21
0
from balsam.launcher.util import get_tail, remaining_time_minutes
from balsam.core.models import BalsamJob, safe_select, PROCESSABLE_STATES
from django.conf import settings

Queue = multiprocessing.Queue
try:
    Queue().qsize()
except NotImplementedError:
    from balsam.launcher.multi_queue_fallback import MyQueue
    Queue = MyQueue
    print("No queue.qsize support: will use fallback MyQueue implementation")

SERIAL_CORES_PER_NODE = settings.SERIAL_CORES_PER_NODE
SERIAL_HYPERTHREAD_STRIDE = settings.SERIAL_HYPERTHREAD_STRIDE
logger = logging.getLogger('balsam.launcher.zmq_ensemble')
connections.close_all()


class StatusUpdater(multiprocessing.Process):
    def __init__(self):
        super().__init__()
        self.queue = Queue()

    def run(self):
        connections.close_all()
        signal.signal(signal.SIGINT, signal.SIG_IGN)
        signal.signal(signal.SIGTERM, signal.SIG_IGN)
        while True:
            first_item = self.queue.get(block=True, timeout=None)
            updates = [first_item]
            waited = False
Example #22
0
 def test_statements_pool_clear(self):
     prepared_qs = Book.objects.prepare()
     prepared_qs.execute()
     self.assertEqual(len(statements_pool), 1)
     connections.close_all()
     self.assertEqual(len(statements_pool), 0)
Example #23
0
def multiprocessing_create_thirdparty_cache(thirdparty):
    connections.close_all()
    create_third_party_cache(thirdparty, True)
Example #24
0
def runTask(task, database):
    task.started = datetime.now()
    # Verify the command exists
    exists = False
    for commandname in get_commands():
        if commandname == task.name:
            exists = True
            break

    if not exists:
        # No such task exists
        logger.error("Task %s not recognized" % task.name)
        task.status = "Failed"
        task.processid = None
        task.save(using=database)
    else:
        # Close all database connections to assure the parent and child
        # process don't share them.
        connections.close_all()
        # Spawn a new command process
        args = []
        kwargs = {"database": database, "task": task.id, "verbosity": 0}
        background = "background" in task.arguments if task.arguments else False
        if task.arguments:
            for i in shlex.split(task.arguments):
                if "=" in i:
                    key, val = i.split("=")
                    kwargs[key.strip("--").replace("-", "_")] = val
                else:
                    args.append(i)
        child = Process(
            target=runCommand,
            args=(task.name, *args),
            kwargs=kwargs,
            name="frepplectl %s" % task.name,
        )
        child.start()

        # Normally, the child will update the processid.
        # Just to make sure, we do it also here.
        task.processid = child.pid
        task.save(update_fields=["processid"], using=database)

        # Wait for the child to finish
        child.join()

        # Read the task again from the database and update it
        task = Task.objects.all().using(database).get(pk=task.id)
        task.processid = None
        if (task.status not in ("Done", "Failed") or not task.finished
                or not task.started):
            now = datetime.now()
            if not task.started:
                task.started = now
            if not background:
                if not task.finished:
                    task.finished = now
                if task.status not in ("Done", "Failed"):
                    task.status = "Done"
            task.save(using=database)
        if "FREPPLE_TEST" not in os.environ:
            logger.info(
                "Worker %s for database '%s' finished task %d at %s: success" %
                (
                    os.getpid(),
                    settings.DATABASES[database]["NAME"],
                    task.id,
                    datetime.now(),
                ))
Example #25
0
def validation(request):
    dc_prefix = 'datasets'
    ref_repfix = 'ref'
    data_initial_values = [{'filters': DataFilter.objects.filter(name='FIL_ALL_VALID_RANGE'),
                            'dataset': Dataset.objects.get(short_name=val_globals.C3S), }]

    ref_initial_values = {'filters': DataFilter.objects.filter(name='FIL_ALL_VALID_RANGE'),
                          'dataset': Dataset.objects.get(short_name=val_globals.ISMN), }

    if request.method == "POST":
        if Settings.load().maintenance_mode:
            __logger.info('Redirecting to the validation page because the system is in maintenance mode.')
            return redirect('validation')

        # formset for data configurations for our new validation
        dc_formset = DatasetConfigurationFormSet(request.POST, prefix=dc_prefix, initial=data_initial_values)

        ## apparently, a missing management form on the formset is a reason to throw a hissy fit err...
        ## ValidationError - instead of just appending it to dc_formset.non_form_errors. Whatever...
        try:
            dc_formset.is_valid()
        except ValidationError as e:
            __logger.exception(e)
            if e.code == 'missing_management_form':
                return HttpResponseBadRequest('Not a valid request: ' + e.message)

        # form for the reference configuration
        ref_dc_form = DatasetConfigurationForm(request.POST, prefix=ref_repfix, is_reference=True, initial=ref_initial_values)
        # form for the rest of the validation parameters
        val_form = ValidationRunForm(request.POST)
        if val_form.is_valid() and dc_formset.is_valid() and ref_dc_form.is_valid():
            newrun = val_form.save(commit=False)
            newrun.user = request.user
            newrun.start_time = datetime.now(tzlocal())

            if newrun.interval_from is not None:
                # truncate time
                newrun.interval_from = datetime(year=newrun.interval_from.year,
                                                month=newrun.interval_from.month,
                                                day=newrun.interval_from.day,
                                                tzinfo=newrun.interval_from.tzinfo)
            if newrun.interval_to is not None:
                # truncate time and go to 1 sec before midnight
                newrun.interval_to = datetime(year=newrun.interval_to.year,
                                                month=newrun.interval_to.month,
                                                day=newrun.interval_to.day,
                                                hour=23,
                                                minute=59,
                                                second=59,
                                                microsecond=999999,
                                                tzinfo=newrun.interval_to.tzinfo)
            newrun.save() # save the validation run
            run_id = newrun.id

            # attach all dataset configurations to the validation and save them
            for dc_form in dc_formset:
                dc = dc_form.save(commit=False)
                dc.validation = newrun
                dc.save()
                dc_form.save_m2m() # save many-to-many related objects, e.g. filters. If you don't do this, filters won't get saved!

            # also attach the reference config
            ref_dc = ref_dc_form.save(commit=False)
            ref_dc.validation = newrun
            ref_dc.save()
            ref_dc_form.save_m2m() # save many-to-many related objects, e.g. filters. If you don't do this, filters won't get saved!

            newrun.reference_configuration = ref_dc

            ## determine the scaling reference. For intercomparison, only the reference makes sense. Otherwise let the user pick.
            if ((len(dc_formset) == 1) and
                (val_form.cleaned_data['scaling_ref'] == ValidationRun.SCALE_TO_DATA)):
                newrun.scaling_ref = dc
            else:
                newrun.scaling_ref = ref_dc

            newrun.save()

            # need to close all db connections before forking, see
            # https://stackoverflow.com/questions/8242837/django-multiprocessing-and-database-connections/10684672#10684672
            connections.close_all()

            p = Process(target=run_validation, kwargs={"validation_id": run_id})
            p.start()

            return redirect('result', result_uuid=run_id)
        else:
            __logger.error("Errors in validation form {}\n{}\n{}".format(val_form.errors, dc_formset.errors, ref_dc_form.errors))
    else:
        val_form = ValidationRunForm()
        dc_formset = DatasetConfigurationFormSet(prefix=dc_prefix, initial=data_initial_values)
        ref_dc_form = DatasetConfigurationForm(prefix=ref_repfix, is_reference=True, initial=ref_initial_values)
        # ref_dc_form.


    return render(request, 'validator/validate.html', {'val_form': val_form, 'dc_formset': dc_formset, 'ref_dc_form': ref_dc_form, 'maintenance_mode':Settings.load().maintenance_mode})
Example #26
0
    def import_business_data(self,
                             file_format=None,
                             business_data=None,
                             mapping=None,
                             overwrite='append',
                             bulk=False,
                             create_concepts=False,
                             create_collections=False,
                             use_multiprocessing=False):
        reader = None
        start = time()
        cursor = connection.cursor()

        try:
            if file_format == None:
                file_format = self.file_format
            if business_data == None:
                business_data = self.business_data
            if mapping == None:
                mapping = self.mapping
            if file_format == 'json':
                reader = ArchesFileReader()
                reader.import_business_data(business_data, mapping)
            elif file_format == 'jsonl':
                with open(self.file[0], 'rU') as openf:
                    lines = openf.readlines()
                    if use_multiprocessing is True:
                        pool = Pool(cpu_count())
                        pool.map(import_one_resource, lines)
                        connections.close_all()
                        reader = ArchesFileReader()
                    else:
                        reader = ArchesFileReader()
                        for line in lines:
                            archesresource = JSONDeserializer().deserialize(
                                line)
                            reader.import_business_data(
                                {"resources": [archesresource]})
            elif file_format == 'csv' or file_format == 'shp' or file_format == 'zip':
                if mapping != None:
                    reader = CsvReader()
                    reader.import_business_data(
                        business_data=business_data,
                        mapping=mapping,
                        overwrite=overwrite,
                        bulk=bulk,
                        create_concepts=create_concepts,
                        create_collections=create_collections)
                else:
                    print '*' * 80
                    print 'ERROR: No mapping file detected. Please indicate one with the \'-c\' paramater or place one in the same directory as your business data.'
                    print '*' * 80
                    sys.exit()

            elapsed = (time() - start)
            print 'Time to import_business_data = {0}'.format(
                datetime.timedelta(seconds=elapsed))

            reader.report_errors()

        finally:
            datatype_factory = DataTypeFactory()
            datatypes = DDataType.objects.all()
            for datatype in datatypes:
                datatype_instance = datatype_factory.get_instance(
                    datatype.datatype)
                datatype_instance.after_update_all()
Example #27
0
    def handle(self, *args, **options):

        # Pick up the options
        database = options['database']
        if database not in settings.DATABASES:
            raise CommandError("No database settings known for '%s'" %
                               database)
        continuous = options['continuous']

        # Use the test database if we are running the test suite
        if 'FREPPLE_TEST' in os.environ:
            connections[database].close()
            settings.DATABASES[database]['NAME'] = settings.DATABASES[
                database]['TEST']['NAME']

        # Check if a worker already exists
        if checkActive(database):
            if 'FREPPLE_TEST' not in os.environ:
                logger.info("Worker for database '%s' already active" %
                            settings.DATABASES[database]['NAME'])
            return

        # Spawn a worker-alive thread
        WorkerAlive(database).start()

        # Process the queue
        if 'FREPPLE_TEST' not in os.environ:
            logger.info(
                "Worker %s for database '%s' starting to process jobs" %
                (os.getpid(), settings.DATABASES[database]['NAME']))
        idle_loop_done = False
        setattr(_thread_locals, 'database', database)
        while True:
            try:
                task = Task.objects.all().using(database).filter(
                    status='Waiting').order_by('id')[0]
                idle_loop_done = False
            except:
                # No more tasks found
                if continuous:
                    time.sleep(5)
                    continue
                else:
                    # Special case: we need to permit a single idle loop before shutting down
                    # the worker. If we shut down immediately, a newly launched task could think
                    # that a worker is already running - while it just shut down.
                    if idle_loop_done:
                        break
                    else:
                        idle_loop_done = True
                        time.sleep(5)
                        continue
            try:
                if 'FREPPLE_TEST' not in os.environ:
                    logger.info(
                        "Worker %s for database '%s' starting task %d at %s" %
                        (os.getpid(), settings.DATABASES[database]['NAME'],
                         task.id, datetime.now()))
                background = False
                task.started = datetime.now()
                # Verify the command exists
                exists = False
                for commandname in get_commands():
                    if commandname == task.name:
                        exists = True
                        break

                if not exists:
                    # No such task exists
                    logger.error('Task %s not recognized' % task.name)
                    task.status = 'Failed'
                    task.processid = None
                    task.save(using=database)
                else:
                    # Close all database connections to assure the parent and child
                    # process don't share them.
                    connections.close_all()
                    # Spawn a new command process
                    args = []
                    kwargs = {
                        'database': database,
                        'task': task.id,
                        'verbosity': 0
                    }
                    if task.arguments:
                        for i in shlex.split(task.arguments):
                            if '=' in i:
                                key, val = i.split('=')
                                kwargs[key.strip("--").replace('-', '_')] = val
                            else:
                                args.append(i)
                    child = Process(target=runCommand,
                                    args=(task.name, *args),
                                    kwargs=kwargs,
                                    name="frepplectl %s" % task.name)
                    child.start()
                    background = 'background' in kwargs or '--background' in kwargs

                    # Normally, the child will update the processid.
                    # Just to make sure, we do it also here.
                    task.processid = child.pid
                    task.save(update_fields=['processid'], using=database)

                    # Wait for the child to finish
                    child.join()

                    # Read the task again from the database and update it
                    task = Task.objects.all().using(database).get(pk=task.id)
                    task.processid = None
                    if task.status not in (
                            'Done',
                            'Failed') or not task.finished or not task.started:
                        now = datetime.now()
                        if not task.started:
                            task.started = now
                        if not background:
                            if not task.finished:
                                task.finished = now
                            if task.status not in ('Done', 'Failed'):
                                task.status = 'Done'
                        task.save(using=database)
                    if 'FREPPLE_TEST' not in os.environ:
                        logger.info(
                            "Worker %s for database '%s' finished task %d at %s: success"
                            %
                            (os.getpid(), settings.DATABASES[database]['NAME'],
                             task.id, datetime.now()))
            except Exception as e:
                # Read the task again from the database and update.
                task = Task.objects.all().using(database).get(pk=task.id)
                task.status = 'Failed'
                now = datetime.now()
                if not task.started:
                    task.started = now
                task.finished = now
                task.message = str(e)
                task.save(using=database)
                if 'FREPPLE_TEST' not in os.environ:
                    logger.info(
                        "Worker %s for database '%s' finished task %d at %s: failed"
                        % (os.getpid(), settings.DATABASES[database]['NAME'],
                           task.id, datetime.now()))
        # Remove the parameter again
        try:
            Parameter.objects.all().using(database).get(
                pk='Worker alive').delete()
        except:
            pass
        setattr(_thread_locals, 'database', None)

        # Remove log files exceeding the configured disk space allocation
        totallogs = 0
        filelist = []
        for x in os.listdir(settings.FREPPLE_LOGDIR):
            if x.endswith('.log'):
                size = 0
                creation = 0
                filename = os.path.join(settings.FREPPLE_LOGDIR, x)
                # needs try/catch because log files may still be open or being used and Windows does not like it
                try:
                    size = os.path.getsize(filename)
                    creation = os.path.getctime(filename)
                    filelist.append({
                        'name': filename,
                        'size': size,
                        'creation': creation
                    })
                except:
                    pass
                totallogs += size
        todelete = totallogs - settings.MAXTOTALLOGFILESIZE * 1024 * 1024
        filelist.sort(key=operator.itemgetter('creation'))
        for fordeletion in filelist:
            if todelete > 0:
                try:
                    os.remove(fordeletion['name'])
                    todelete -= fordeletion['size']
                except:
                    pass

        # Exit
        if 'FREPPLE_TEST' not in os.environ:
            logger.info(
                "Worker %s for database '%s' finished all jobs in the queue and exits"
                % (os.getpid(), settings.DATABASES[database]['NAME']))
Example #28
0
 def close_thread_connection():
     # Close the connection that was created by
     # @database_sync_to_async.  Assumes we're running in the same
     # thread that ran the database stuff.
     connections.close_all()
Example #29
0
 def on_done(future):
     # Because each thread has a db connection, we call close_all() when the thread is
     # terminated. This is needed because the thread are not managed by django here but
     # by us.
     connections.close_all()
Example #30
0
 def on_finish(self):
     connections.close_all()
Example #31
0
    def import_business_data(
        self,
        file_format=None,
        business_data=None,
        mapping=None,
        overwrite="append",
        bulk=False,
        create_concepts=False,
        create_collections=False,
        use_multiprocessing=False,
        prevent_indexing=False,
        transaction_id=None,
    ):
        reader = None
        start = time()
        cursor = connection.cursor()

        try:
            if file_format is None:
                file_format = self.file_format
            if business_data is None:
                business_data = self.business_data
            if mapping is None:
                mapping = self.mapping
            if file_format == "json":
                reader = ArchesFileReader()
                reader.import_business_data(business_data,
                                            mapping=mapping,
                                            overwrite=overwrite,
                                            prevent_indexing=prevent_indexing,
                                            transaction_id=transaction_id)
            elif file_format == "jsonl":
                with open(self.file[0], "rU") as openf:
                    lines = openf.readlines()
                    if use_multiprocessing is True:
                        pool = Pool(cpu_count())
                        pool.map(import_one_resource,
                                 lines,
                                 prevent_indexing=prevent_indexing)
                        connections.close_all()
                        reader = ArchesFileReader()
                    else:
                        reader = ArchesFileReader()
                        for line in lines:
                            archesresource = JSONDeserializer().deserialize(
                                line)
                            reader.import_business_data(
                                {"resources": [archesresource]},
                                overwrite=overwrite,
                                prevent_indexing=prevent_indexing,
                                transaction_id=transaction_id,
                            )
            elif file_format == "csv" or file_format == "shp" or file_format == "zip":
                if mapping is not None:
                    reader = CsvReader()
                    reader.import_business_data(
                        business_data=business_data,
                        mapping=mapping,
                        overwrite=overwrite,
                        bulk=bulk,
                        create_concepts=create_concepts,
                        create_collections=create_collections,
                        prevent_indexing=prevent_indexing,
                        transaction_id=transaction_id,
                    )
                else:
                    print("*" * 80)
                    print(
                        f"ERROR: No mapping file detected for {self.file[0]}. Please indicate one \
                        with the '-c' paramater or place one in the same directory as your business data."
                    )
                    print("*" * 80)

            elapsed = time() - start
            print("Time to import_business_data = {0}".format(
                datetime.timedelta(seconds=elapsed)))

            if reader is not None:
                reader.report_errors()

        finally:
            # cleans up the ResourceXResource table, adding any graph_id values that were unavailable during package/csv load
            for res_x_res in ResourceXResource.objects.filter(
                    resourceinstanceto_graphid__isnull=True):
                # wrapping in a try allows for graceful handling of corrupted data
                try:
                    res_x_res.resourceinstanceto_graphid = res_x_res.resourceinstanceidto.graph
                except:
                    pass

                res_x_res.save()

            datatype_factory = DataTypeFactory()
            datatypes = DDataType.objects.all()
            for datatype in datatypes:
                try:
                    datatype_instance = datatype_factory.get_instance(
                        datatype.datatype)
                    datatype_instance.after_update_all()
                except BrokenPipeError as e:
                    logger = logging.getLogger(__name__)
                    logger.info(
                        "Celery not working: tasks unavailable during import.")
Example #32
0
 def _close_connections(self):
     # Used for mocking in tests.
     connections.close_all()
Example #33
0
def job_syncgreenhouse():
    call_command('sync_greenhouse')
    # Django won't close db connections after call_command. Close them manually
    # to prevent errors in case the DB goes away, e.g. during a failover event.
    connections.close_all()
Example #34
0
    def process_message(self, peer, mailfrom, rcpttos, data, **kwargs):
        # get a new db connection in case the old one has timed out:
        connections.close_all()

        result = _process_message(peer[0], mailfrom, rcpttos[0], data)
        self.stdout.write(result)
Example #35
0
def become_daemon(**kwargs):
    # close all connections before forking, to avoid SQLite corruption:
    # https://www.sqlite.org/howtocorrupt.html#_carrying_an_open_database_connection_across_a_fork_
    connections.close_all()
    _become_daemon_function(**kwargs)
Example #36
0
def cleanup():
    logger.info('closing all django database connections for this process')
    connections.close_all()
Example #37
0
    def handle(self, *args, **options):
        """

        """
        processes = options['processes']
        begin = dt.now()
        try:
            review_ids = {
                    2008: [], 2009: [], 2010: [], 2011: [], 2012: [], 2013: [],
                    2014: [], 2015: [], 2016: []
                }
            print("REVIEWS:")
            for i in review_ids.keys():
                review_ids[i] = list(qs.query_by_year(i, 'review', ids=True))
                print("\t{0}: {1}".format(str(i), str(len(review_ids[i]))))
                connections.close_all()


            comment_ids = {
                    2008: [], 2009: [], 2010: [], 2011: [], 2012: [], 2013: [],
                    2014: [], 2015: [], 2016: []
                }
            message_ids = {
                    2008: [], 2009: [], 2010: [], 2011: [], 2012: [], 2013: [],
                    2014: [], 2015: [], 2016: []
                }
            for year, ids in review_ids.items():
                comment_ids[year] = list(qs.query_by_year(year, 'comment', ids=True))
                connections.close_all()
                message_ids[year] = list(qs.query_by_year(year, 'message', ids=True))
                connections.close_all()

            print("COMMENTS:")
            for k, v in comment_ids.items():
                print("\t{0}: {1}".format(str(k), str(len(v))))

            print("MESSAGES:")
            for k, v in message_ids.items():
                print("\t{0}: {1}".format(str(k), str(len(v))))

            comment_sentences_ids = {
                    2008: [], 2009: [], 2010: [], 2011: [], 2012: [], 2013: [],
                    2014: [], 2015: [], 2016: []
                }
            message_sentences_ids = {
                    2008: [], 2009: [], 2010: [], 2011: [], 2012: [], 2013: [],
                    2014: [], 2015: [], 2016: []
                }

            print("COMMENT_SENTENCES:")
            for year, ids in comment_ids.items():
                comments = Comment.objects.filter(id__in=ids)
                connections.close_all()
                for c in comments:
                    comment_sentences_ids[year] += list(c.sentences.values_list('id'))
                print("\t{0}: {1}".format(str(year), str(len(comment_sentences_ids[year]))))
#            for year, ids in comment_ids.items():
#                comment_sentences_ids[year] = list(CommentSentences.objects.filter(comment_id__in=ids).values_list('sentence_id', flat=True))
#                connections.close_all()
#                print("\t{0}: {1}".format(str(year), str(len(comment_sentences_ids[year]))))

            print("MESSAGE_SENTENCES:")
            for year, ids in message_ids.items():
                messages = Message.objects.filter(id__in=ids)
                connections.close_all()
                for m in messages:
                    message_sentences_ids[year] += list(m.sentences.values_list('id'))
                print("\t{0}: {1}".format(str(year), str(len(message_sentences_ids[year]))))
#            for year, ids, in message_ids.items():
#                message_sentences_ids[year] = list(MessageSentences.objects.filter(message_id__in=ids).values_list('sentence_id', flat=True))
#                connections.close_all()
#                print("\t{0}: {1}".format(str(year), str(len(message_sentences_ids[year]))))

            sentences = list(qs.query_all('sentence', ids=False).values_list('id', 'text'))
            connections.close_all()

            orphans = {
                    2008: [], 2009: [], 2010: [], 2011: [], 2012: [], 2013: [],
                    2014: [], 2015: [], 2016: []
                }
            duplicates = {
                    2008: [], 2009: [], 2010: [], 2011: [], 2012: [], 2013: [],
                    2014: [], 2015: [], 2016: []
                }
            for sentence in sentences:
                for year in review_ids.keys():
                    print("YEAR: {0}".format(str(year)))
                    if sentence[0] not in comment_sentences_ids[year] and sentence[0] not in message_sentences_ids[year]:
                        orphans[year].append(sentence[0])
                    elif sentence[0] in comment_sentences_ids[year] and sentence[0] in message_sentences_ids[year]:
                        duplicates[year].append(sentence[0])

            print("================")
            print("ORPHANS:")
            for year, ids in orphans.items():
                print("\t{0}: {1}".format(str(year), str(len(ids))))

            print("DUPLICATES:")
            for year, ids in duplicates.items():
                print("\t{0}: {1}".format(str(year), str(len(ids))))

            connections.close_all()

        except KeyboardInterrupt:
            logger.warning('Attempting to abort...')
        finally:
            logger.info('Time: {:.2f} minutes.'
                .format(helpers.get_elapsed(begin, dt.now())))
Example #38
0
def run_on_schema(
    schema_name,
    executor_codename,
    command,
    function_name=None,
    args=None,
    kwargs=None,
    pass_schema_in_kwargs=False,
    fork_db=False,
):
    if args is None:
        args = []
    if kwargs is None:
        kwargs = {}

    if not isinstance(command, BaseCommand):
        # Parallel executor needs to pass command 'type' instead of 'instance'
        # Therefore, no customizations for the command can be done, nor using custom stdout, stderr
        command = command()

    command.stdout = kwargs.pop("stdout", command.stdout)
    if not isinstance(command.stdout, OutputWrapper):
        command.stdout = OutputWrapper(command.stdout)

    command.stderr = kwargs.pop("stderr", command.stderr)
    if not isinstance(command.stderr, OutputWrapper):
        command.stderr = OutputWrapper(command.stderr)

    # Since we are prepending every output with the schema_name and executor, we need to determine
    # whether we need to do so based on the last ending used to write. If the last write didn't end
    # in '\n' then we don't do the prefixing in order to keep the output looking good.
    class StyleFunc:
        last_message = None

        def __call__(self, message):
            last_message = self.last_message
            self.last_message = message
            if last_message is None or last_message.endswith("\n"):
                return "[%s:%s] %s" % (
                    command.style.NOTICE(executor_codename),
                    command.style.NOTICE(schema_name),
                    message,
                )
            return message

    command.stdout.style_func = StyleFunc()
    command.stderr.style_func = StyleFunc()

    if fork_db:
        connections.close_all()

    if schema_name in settings.TENANTS:
        domains = settings.TENANTS[schema_name].get("DOMAINS", [])
        schema = SchemaDescriptor.create(
            schema_name=schema_name,
            domain_url=domains[0] if domains else None)
    elif schema_name == get_clone_reference():
        schema = SchemaDescriptor.create(schema_name=schema_name)
    else:
        TenantModel = get_tenant_model()
        schema = TenantModel.objects.get(schema_name=schema_name)

    activate(schema)

    if pass_schema_in_kwargs:
        kwargs.update({"schema_name": schema_name})

    if function_name == "special:call_command":
        call_command(command, *args, **kwargs)
    elif function_name == "special:run_from_argv":
        command.run_from_argv(args)
    else:
        getattr(command, function_name)(*args, **kwargs)

    if fork_db:
        transaction.commit()
        connection.close()

    return schema_name
Example #39
0
# -*- coding: utf-8 -*-
Example #40
0
def background_send_notifications_to_users(event: Event):
    connections.close_all()
    LOG.info('Notification: backgrounded: about to send event "%s"', event)
    p = Process(target=send_notifications_to_users, args=(event, ))
    p.start()
    return p
Example #41
0
def connwrap(p,*a,**k):
	try:
		return p(*a,**k)
	finally:
		connections.close_all()
Example #42
0
    def convert(self, ):
        """
        Convert external service to gpkg.
        """

        from ..tasks.task_process import TaskProcess
        from .geopackage import remove_empty_zoom_levels

        if self.config:
            conf_dict = yaml.load(self.config)
        else:
            conf_dict = create_conf_from_url(self.service_url)

        if not conf_dict.get('grids'):
            conf_dict['grids'] = {
                'geodetic': {
                    'srs': 'EPSG:4326',
                    'tile_size': [256, 256],
                    'origin': 'nw'
                },
                'webmercator': {
                    'srs': 'EPSG:3857',
                    'tile_size': [256, 256],
                    'origin': 'nw'
                }
            }

        # If user provides a cache setup then use that and substitute in the geopackage file for the placeholder.
        conf_dict['caches'] = conf_dict.get('caches', {})
        try:
            conf_dict['caches']['cache']['cache']['filename'] = self.gpkgfile
        except KeyError:
            conf_dict['caches']['cache'] = get_cache_template(
                ["{0}_{1}".format(self.layer, self.service_type)],
                [grids for grids in conf_dict.get('grids')], self.gpkgfile)

        # Prevent the service from failing if source has missing tiles.
        for source in conf_dict.get('sources'):
            if 'wmts' in source:
                conf_dict['sources'][source]['transparent'] = True
                conf_dict['sources'][source]['on_error'] = {
                    "other": {
                        "response": "transparent",
                        "cache": False
                    }
                }

        # disable SSL cert checks
        if getattr(settings, "DISABLE_SSL_VERIFICATION", False):
            conf_dict['globals'] = {'http': {'ssl_no_cert_checks': True}}

        # Add autoconfiguration to base_config
        # default = load_default_config()
        mapproxy_config = load_default_config()
        load_config(mapproxy_config, config_dict=conf_dict)

        # Create a configuration object
        mapproxy_configuration = ProxyConfiguration(mapproxy_config,
                                                    seed=seed,
                                                    renderd=None)

        # # As of Mapproxy 1.9.x, datasource files covering a small area cause a bbox error.
        if isclose(self.bbox[0], self.bbox[2], rel_tol=0.01) or isclose(
                self.bbox[0], self.bbox[2], rel_tol=0.01):
            logger.warn(
                'Using bbox instead of selection, because the area is too small'
            )
            self.selection = None

        seed_dict = get_seed_template(bbox=self.bbox,
                                      level_from=self.level_from,
                                      level_to=self.level_to,
                                      coverage_file=self.selection)

        # Create a seed configuration object
        seed_configuration = SeedingConfiguration(
            seed_dict, mapproxy_conf=mapproxy_configuration)

        logger.info("Beginning seeding to {0}".format(self.gpkgfile))
        logger.error(mapproxy_config)
        try:
            check_service(conf_dict)
            progress_logger = CustomLogger(verbose=True,
                                           task_uid=self.task_uid)
            task_process = TaskProcess(task_uid=self.task_uid)
            task_process.start_process(
                billiard=True,
                target=seeder.seed,
                kwargs={
                    "tasks":
                    seed_configuration.seeds(['seed']),
                    "concurrency":
                    int(getattr(settings, 'MAPPROXY_CONCURRENCY', 1)),
                    "progress_logger":
                    progress_logger
                })
            remove_empty_zoom_levels(self.gpkgfile)
        except Exception as e:
            logger.error("Export failed for url {}.".format(self.service_url))
            errors, informal_only = validate_options(mapproxy_config)
            if not informal_only:
                logger.error("MapProxy configuration failed.")
                logger.error("Using Configuration:")
                logger.error(mapproxy_config)
            errors, informal_only = validate_seed_conf(seed_dict)
            if not informal_only:
                logger.error("Mapproxy Seed failed.")
                logger.error("Using Seed Configuration:")
                logger.error(seed_dict)
                raise SeedConfigurationError(
                    'MapProxy seed configuration error  - {}'.format(
                        ', '.join(errors)))
            raise e
        finally:
            connections.close_all()
        return self.gpkgfile
Example #43
0
    def run(self):
        while True:
            try:
                # 清理所有任务
                # self.scheduler.remove_all_jobs()
                log_common.warn('*********** 刷新调度器 **********')
                redis_jobs = self.scheduler.get_jobs()
                redis_job_ids = [rj.id for rj in redis_jobs]
                db_job_ids = []

                script_models = CrawlScript.objects.filter(is_deleted=0, is_disable=0)
                for script_model in script_models:
                    node_list = []
                    if not script_model.hosts or script_model.hosts == '[]':
                        project = CrawlProject.objects.get(id=script_model.project_id)
                        task = CrawlTask.objects.get(id=project.task_id)
                        for node_id in json.loads(task.node_ids):
                            node = CrawlNode.objects.get(id=node_id)
                            node_list.append('{}:{}'.format(node.node_ip, node.node_port))
                    else:
                        node_list = eval(script_model.hosts)
                    json_args = []
                    if script_model.args:
                        json_args = eval(script_model.args)
                    for json_arg in json_args:
                        script_args = json_arg["args"]
                        script_triggers = json_arg["trigger"]
                        fix_type = json_arg["fix_type"]

                        try:
                            if script_triggers:
                                # 补数据逻辑
                                if fix_type in (1, 2, 3):
                                    run_date = json_arg['fix_date']
                                    mix = "{}-{}".format(json.dumps(script_triggers), json.dumps(script_args))
                                    job_id = "fix-{}-{}".format(str(script_model.id), md5(mix))
                                    log_common.warn('添加补数据调度任务: {}'.format(script_model.id))
                                    # 立即测试
                                    # schedule_fix_data(node_list, script_model.project_name, script_model.name, script_model.id, script_args, job_id, fix_type)

                                    # 正常逻辑
                                    db_job_ids.append(job_id)
                                    if datetime.datetime.strptime(run_date, '%Y-%m-%d %H:%M:%S') >= datetime.datetime.now() and job_id not in redis_job_ids:
                                        self.scheduler.add_job(schedule_fix_data,
                                                               'date',
                                                               run_date=run_date,
                                                               id=job_id,
                                                               args=[node_list, script_model.project_name,
                                                                     script_model.name, script_model.id,
                                                                     script_args, job_id, fix_type],
                                                               misfire_grace_time=60)
                                else:
                                    # 动态参数
                                    if json_arg.get('dynamic_value'):
                                        sql = json_arg.get('dynamic_value')
                                        result = db_kit.fetch_all_to_json(sql)
                                        for r in result:
                                            script_args['dynamic_value'] = r
                                            log_common.warn('>>>> 动态切割参数调度 {}, args: {}'.format(script_model.name, script_args))
                                            mix = "{}-{}".format(json.dumps(script_triggers), json.dumps(script_args))
                                            job_id = "{}-{}".format(str(script_model.id), md5(mix))
                                            log_common.warn("args#{}".format(job_id))
                                            crawl_redis.set("args#{}".format(job_id), json.dumps(script_args))
                                            # log_common.warn('添加调度任务: {}'.format(script_model.id))
                                            db_job_ids.append(job_id)
                                            if job_id not in redis_job_ids:
                                                self.scheduler.add_job(work_func,
                                                                       trigger="cron",
                                                                       **script_triggers,
                                                                       id=job_id,
                                                                       args=[node_list, script_model.project_name,
                                                                             script_model.name, job_id],
                                                                       misfire_grace_time=60)
                                    else:
                                        mix = "{}-{}".format(json.dumps(script_triggers), json.dumps(script_args))
                                        job_id = "{}-{}".format(str(script_model.id), md5(mix))
                                        crawl_redis.set("args#{}".format(job_id), json.dumps(script_args))
                                        log_common.warn('添加调度任务: {}'.format(script_model.id))
                                        db_job_ids.append(job_id)
                                        if job_id not in redis_job_ids:
                                            self.scheduler.add_job(work_func,
                                                                   trigger="cron",
                                                                   **script_triggers,
                                                                   id=job_id,
                                                                   args=[node_list, script_model.project_name,
                                                                         script_model.name, job_id],
                                                                   misfire_grace_time=60)
                        except Exception as e:
                            log_common.warn(">>>> 添加报错任务报错: ", e)
                            continue

                c_ids = [i for i in redis_job_ids if i not in db_job_ids]
                for c_id in c_ids:
                    self.scheduler.remove_job(c_id)
                    log_common.warn('移除差异任务: {}'.format(c_id))
                db_job_ids.clear()
            except Exception as ex:
                log_common.warn(ex)
                continue
            finally:
                connections.close_all()
                time.sleep(7 * 60)
Example #44
0
    def handle(self, *args, **options):
        """

        """
        processes = options['processes']
        year = options['year']
        begin = dt.now()
        try:
            info('loaddb Command')
            info('  Years: {}'.format(settings.YEARS))

            if year != 0:
                settings.YEARS = [year]

            loader = loaders.BugLoader(settings, processes)
            count = loader.load()
            info('  {:,} bugs loaded'.format(count))

            loader = loaders.VulnerabilityLoader(settings, processes)
            count = loader.load()
            info('  {:,} vulnerabilities loaded'.format(count))

            loader = loaders.ReviewLoader(settings, processes)
            count = loader.load()
            info('  {:,} reviews loaded'.format(count))

            tagger = taggers.MissedVulnerabilityTagger(settings, processes)
            count = tagger.tag()
            info('  {:,} reviews missed a vulnerability'.format(count))

            if year != 0:
                ids = qs.query_by_year(year, 'review', True)
            else:
                ids = qs.query_all('review', True)
            connections.close_all()  # Hack

            # Comments
            loader = loaders.CommentLoader(settings, processes, ids)
            count = loader.load()
            info('  {:,} comments loaded'.format(count))
            connections.close_all()  # Hack
            loader = loaders.SentenceCommentLoader(settings, processes, ids)
            count = loader.load()
            info('  {:,} sentences loaded'.format(count))
            connections.close_all()  # Hack

            tagger = taggers.UsefulCommentTagger(settings, processes, ids)
            count = tagger.tag()
            info('  {:,} comments were useful'.format(count))

            # Messages
            connections.close_all()  # Hack
            loader = loaders.MessageLoader(settings, processes, ids)
            count = loader.load()
            info('  {:,} messages loaded'.format(count))
            connections.close_all()  # Hack
            loader = loaders.SentenceMessageLoader(settings, processes, ids)
            count = loader.load()
            info('  {:,} sentences loaded'.format(count))
            connections.close_all()  # Hack

            # Tokens
            loader = loaders.TokenLoader(settings, processes, ids)
            count = loader.load()
            info('  {:,} tokens loaded'.format(count))

            with connection.cursor() as cursor:
                cursor.execute(
                    'REFRESH MATERIALIZED VIEW {};'.format('vw_review_token'))
                cursor.execute(
                    'REFRESH MATERIALIZED VIEW {};'.format('vw_review_lemma'))
        except KeyboardInterrupt:  # pragma: no cover
            warning('Attempting to abort.')
        finally:
            info('Time: {:.2f} mins'.format(get_elapsed(begin, dt.now())))
Example #45
0
def new_test_client():
    yield get_test_client
    connections.close_all()
Example #46
0
 def job_done(self, future):
     connections.close_all()
Example #47
0
def job_syncgreenhouse():
    call_command('sync_greenhouse')
    # Django won't close db connections after call_command. Close them manually
    # to prevent errors in case the DB goes away, e.g. during a failover event.
    connections.close_all()
Example #48
0
def multiprocessing_create_service_cache(service):
    connections.close_all()
    create_service_cache(service, True)
Example #49
0
 def _call_command_thread(self, options):
     stdout = self.call_command(**options)
     connections.close_all()
     return stdout
Example #50
0
    def _process_harvest_response(self, next_response: bytes) -> int:
        """ Processes the harvest response content

        While the last response is being processed, the next one is already loaded to decrease run time

        Args:
            response (bytes): The response as bytes
        Returns:
             number_found_entries (int): The amount of found metadata records in this response
        """
        xml_response = xml_helper.parse_xml(next_response)
        if xml_response is None:
            csw_logger.error(
                "Response is no valid xml. catalogue: {}, startPosition: {}, maxRecords: {}"
                .format(self.metadata.title, self.start_position,
                        self.max_records_per_request))
            # Abort!
            self.start_position = 0
            return

        md_metadata_entries = xml_helper.try_get_element_from_xml(
            "//" + GENERIC_NAMESPACE_TEMPLATE.format("MD_Metadata"),
            xml_response) or []
        next_record_position = int(
            xml_helper.try_get_attribute_from_xml_element(
                xml_response,
                "nextRecord",
                "//" + GENERIC_NAMESPACE_TEMPLATE.format("SearchResults"),
            ))
        self.start_position = next_record_position

        # Fetch found identifiers in parent process, so self.deleted_metadata can be edited easily
        for md_identifier in md_metadata_entries:
            id = xml_helper.try_get_text_from_xml_element(
                md_identifier,
                ".//" + GENERIC_NAMESPACE_TEMPLATE.format("fileIdentifier") +
                "/" + GENERIC_NAMESPACE_TEMPLATE.format("CharacterString"))
            try:
                self.deleted_metadata.remove(id)
            except KeyError:
                pass

        # Delete response to free memory
        del xml_response

        # Process response via multiple processes
        t_start = time()
        num_processes = int(cpu_count() / 2)
        num_processes = num_processes if num_processes >= 1 else 1
        index_step = int(len(md_metadata_entries) / num_processes)
        start_index = 0
        end_index = 0
        self.resource_list = md_metadata_entries
        process_list = []
        for i in range(0, num_processes):
            if index_step < 1:
                end_index = -1
            else:
                end_index += index_step
            p = Process(target=self._create_metadata_from_md_metadata,
                        args=(start_index, end_index))
            start_index += index_step
            process_list.append(p)
        # Close all connections to force each process to create a new one for itself
        connections.close_all()
        execute_threads(process_list)

        csw_logger.debug(
            "Harvesting '{}': runtime for {} metadata parsing: {}s ####".
            format(self.metadata.title, self.max_records_per_request,
                   time() - t_start))
        return len(md_metadata_entries)
Example #51
0
    def import_business_data(
        self,
        file_format=None,
        business_data=None,
        mapping=None,
        overwrite="append",
        bulk=False,
        create_concepts=False,
        create_collections=False,
        use_multiprocessing=False,
    ):
        reader = None
        start = time()
        cursor = connection.cursor()

        try:
            if file_format is None:
                file_format = self.file_format
            if business_data is None:
                business_data = self.business_data
            if mapping is None:
                mapping = self.mapping
            if file_format == "json":
                reader = ArchesFileReader()
                reader.import_business_data(business_data, mapping)
            elif file_format == "jsonl":
                with open(self.file[0], "rU") as openf:
                    lines = openf.readlines()
                    if use_multiprocessing is True:
                        pool = Pool(cpu_count())
                        pool.map(import_one_resource, lines)
                        connections.close_all()
                        reader = ArchesFileReader()
                    else:
                        reader = ArchesFileReader()
                        for line in lines:
                            archesresource = JSONDeserializer().deserialize(line)
                            reader.import_business_data({"resources": [archesresource]})
            elif file_format == "csv" or file_format == "shp" or file_format == "zip":
                if mapping is not None:
                    reader = CsvReader()
                    reader.import_business_data(
                        business_data=business_data,
                        mapping=mapping,
                        overwrite=overwrite,
                        bulk=bulk,
                        create_concepts=create_concepts,
                        create_collections=create_collections,
                    )
                else:
                    print("*" * 80)
                    print(
                        "ERROR: No mapping file detected. Please indicate one \
                        with the '-c' paramater or place one in the same directory as your business data."
                    )
                    print("*" * 80)
                    sys.exit()

            elapsed = time() - start
            print("Time to import_business_data = {0}".format(datetime.timedelta(seconds=elapsed)))

            reader.report_errors()

        finally:
            datatype_factory = DataTypeFactory()
            datatypes = DDataType.objects.all()
            for datatype in datatypes:
                try:
                    datatype_instance = datatype_factory.get_instance(datatype.datatype)
                    datatype_instance.after_update_all()
                except BrokenPipeError as e:
                    logger = logging.getLogger(__name__)
                    logger.info("Celery not working: tasks unavailable during import.")
Example #52
0
 def setup_databases(self):
     res = super(PublicPrivateNoseTestSuiteRunner, self).setup_databases()
     connections.databases['root'] = copy(connections.databases['default'])
     connections.close_all()
     return res