Python ingest_zip 예제들, dallinger.data.ingest_zip Python 예제들

예제 #1

0

파일 보기

파일: deployment.py 프로젝트: mt-digital/Dallinger

    def execute(self, heroku):
        """Start the server, load the zip file into the database, then loop
        until terminated with <control>-c.
        """
        db.init_db(drop_all=True)
        self.out.log(
            "Ingesting dataset from {}...".format(os.path.basename(self.zip_path))
        )
        data.ingest_zip(self.zip_path)
        base_url = get_base_url()
        self.out.log("Server is running on {}. Press Ctrl+C to exit.".format(base_url))

        if self.exp_config.get("replay"):
            self.out.log("Launching the experiment...")
            time.sleep(4)
            _handle_launch_data("{}/launch".format(base_url), error=self.out.error)
            heroku.monitor(listener=self.notify)

        # Just run until interrupted:
        while self.keep_running():
            time.sleep(1)

예제 #2

0

파일 보기

파일: experiment.py 프로젝트: billdthompson/Dallinger

    def restore_state_from_replay(
        self, app_id, session, zip_path=None, **configuration_options
    ):
        # We need to fake dallinger_experiment to point at the current experiment
        module = sys.modules[type(self).__module__]
        if sys.modules.get("dallinger_experiment", module) != module:
            logger.warning("dallinger_experiment is already set, updating")
        sys.modules["dallinger_experiment"] = module

        # Load the configuration system and globals
        config = get_config()
        # Manually load extra parameters and ignore errors
        try:
            from dallinger_experiment.experiment import extra_parameters

            try:
                extra_parameters()
                extra_parameters.loaded = True
            except KeyError:
                pass
        except ImportError:
            pass

        config.load()
        self.app_id = self.original_app_id = app_id
        self.session = session
        self.exp_config = config

        # The replay index is initialised to 1970 as that is guaranteed
        # to be before any experiment Info objects
        self._replay_time_index = datetime.datetime(1970, 1, 1, 1, 1, 1)

        # Create a second database session so we can load the full history
        # of the experiment to be replayed and selectively import events
        # into the main database
        specific_db_url = db_url + "-import-" + app_id
        import_engine = create_engine(specific_db_url)
        try:
            # Clear the temporary storage and import it
            init_db(drop_all=True, bind=import_engine)
        except Exception:
            create_db_engine = create_engine(db_url)
            conn = create_db_engine.connect()
            conn.execute("COMMIT;")
            conn.execute(
                'CREATE DATABASE "{}"'.format(specific_db_url.rsplit("/", 1)[1])
            )
            conn.close()
            import_engine = create_engine(specific_db_url)
            init_db(drop_all=True, bind=import_engine)

        self.import_session = scoped_session(
            sessionmaker(autocommit=False, autoflush=True, bind=import_engine)
        )

        # Find the real data for this experiment
        if zip_path is None:
            zip_path = find_experiment_export(app_id)
        if zip_path is None:
            msg = 'Dataset export for app id "{}" could not be found.'
            raise IOError(msg.format(app_id))

        print("Ingesting dataset from {}...".format(os.path.basename(zip_path)))
        ingest_zip(zip_path, engine=import_engine)
        self._replay_range = tuple(
            self.import_session.query(
                func.min(Info.creation_time), func.max(Info.creation_time)
            )
        )[0]
        # We apply the configuration options we were given and yield
        # the scrubber function into the context manager, so within the
        # with experiment.restore_state_from_replay(...): block the configuration
        # options are correctly set
        with config.override(configuration_options, strict=True):
            self.replay_start()
            yield Scrubber(self, session=self.import_session)
            self.replay_finish()

        # Clear up global state
        self.import_session.rollback()
        self.import_session.close()
        session.rollback()
        session.close()
        # Remove marker preventing experiment config variables being reloaded
        try:
            del module.extra_parameters.loaded
        except AttributeError:
            pass
        config._reset(register_defaults=True)
        del sys.modules["dallinger_experiment"]

예제 #3

0

파일 보기

파일: experiment.py 프로젝트: FNDaily/Dallinger

    def restore_state_from_replay(self,
                                  app_id,
                                  session,
                                  zip_path=None,
                                  **configuration_options):
        # We need to fake dallinger_experiment to point at the current experiment
        module = sys.modules[type(self).__module__]
        if sys.modules.get('dallinger_experiment', module) != module:
            raise RuntimeError('dallinger_experiment is already set')
        sys.modules['dallinger_experiment'] = module

        # Load the configuration system and globals
        config = get_config()
        config.register_extra_parameters()
        config.load()
        self.app_id = self.original_app_id = app_id
        self.session = session
        self.exp_config = config

        # The replay index is initialised to 1970 as that is guaranteed
        # to be before any experiment Info objects
        self._replay_time_index = datetime.datetime(1970, 1, 1, 1, 1, 1)

        # Create a second database session so we can load the full history
        # of the experiment to be replayed and selectively import events
        # into the main database
        import_engine = create_engine(
            "postgresql://*****:*****@localhost/dallinger-import")
        import_session = scoped_session(
            sessionmaker(autocommit=False, autoflush=True, bind=import_engine))

        # Find the real data for this experiment
        if zip_path is None:
            zip_path = find_experiment_export(app_id)
        if zip_path is None:
            msg = u'Dataset export for app id "{}" could not be found.'
            raise IOError(msg.format(app_id))

        # Clear the temporary storage and import it
        init_db(drop_all=True, bind=import_engine)
        print("Ingesting dataset from {}...".format(
            os.path.basename(zip_path)))
        ingest_zip(zip_path, engine=import_engine)

        def go_to(time):
            """Scrub to a point in the experiment replay, given by time
            which is a datetime object."""
            if self._replay_time_index > time:
                # We do not support going back in time
                raise NotImplementedError
            events = self.events_for_replay(session=import_session)
            for event in events:
                if event.creation_time <= self._replay_time_index:
                    # Skip events we've already handled
                    continue
                if event.creation_time > time:
                    # Stop once we get future events
                    break
                self.replay_event(event)
            self._replay_time_index = time
            # Override app_id to allow exports to be created that don't
            # overwrite the original dataset
            self.app_id = "{}_{}".format(self.original_app_id,
                                         time.isoformat())

        # We apply the configuration options we were given and yield
        # the scrubber function into the context manager, so within the
        # with experiment.restore_state_from_replay(...): block the configuration
        # options are correctly set
        with config.override(configuration_options, strict=True):
            yield go_to

        # Clear up global state
        import_session.close()
        config._reset(register_defaults=True)
        del sys.modules['dallinger_experiment']