Example #1
0
    def test_encode_chunk_key(self):
        """Test encoding an object key"""
        b = BossBackend(self.example_config_data)
        b.setup(self.api_token)

        params = {"collection": 1,
                  "experiment": 2,
                  "channel": 3,
                  "resolution": 0,
                  "x_index": 5,
                  "y_index": 6,
                  "z_index": 1,
                  "t_index": 0,
                  "num_tiles": 16,
                  }

        proj = [str(params['collection']), str(params['experiment']), str(params['channel'])]
        key = b.encode_chunk_key(params['num_tiles'], proj,
                                 params['resolution'],
                                 params['x_index'],
                                 params['y_index'],
                                 params['z_index'],
                                 params['t_index'],
                                 )

        assert key == six.u("77ff984241a0d6aa443d8724a816866d&16&1&2&3&0&5&6&1&0")
Example #2
0
    def test_encode_tile_key(self):
        """Test encoding an object key"""
        b = BossBackend(self.example_config_data)
        b.setup(self.api_token)

        params = {"collection": 1,
                  "experiment": 2,
                  "channel": 3,
                  "resolution": 0,
                  "x_index": 5,
                  "y_index": 6,
                  "z_index": 1,
                  "t_index": 0,
                  }

        proj = [str(params['collection']), str(params['experiment']), str(params['channel'])]
        key = b.encode_tile_key(proj,
                                params['resolution'],
                                params['x_index'],
                                params['y_index'],
                                params['z_index'],
                                params['t_index'],
                                )

        assert key == six.u("03ca58a12ec662954ac12e06517d4269&1&2&3&0&5&6&1&0")
Example #3
0
    def test_create(self):
        """Test creating an ingest job - mock server response"""
        b = BossBackend(self.example_config_data)
        b.setup(self.api_token)

        id = b.create(self.example_config_data)

        assert id == 23
def create_messages(args):
    """Create all of the tile messages to be enqueued

    Args:
        args (dict): Same arguments as populate_upload_queue()

    Returns:
        list: List of strings containing Json data
    """

    tile_size = lambda v: args[v + "_tile_size"]
    range_ = lambda v: range(args[v + '_start'], args[v + '_stop'], tile_size(v))

    # DP NOTE: configuration is not actually used by encode_*_key method
    backend = BossBackend(None)

    msgs = []
    for t in range_('t'):
        for z in range_('z'):
            for y in range_('y'):
                for x in range_('x'):
                    chunk_x = int(x/tile_size('x'))
                    chunk_y = int(y/tile_size('y'))
                    chunk_z = int(z/tile_size('z'))

                    num_of_tiles = min(tile_size('z'), args['z_stop'] - z)

                    chunk_key = backend.encode_chunk_key(num_of_tiles,
                                                         args['project_info'],
                                                         args['resolution'],
                                                         chunk_x,
                                                         chunk_y,
                                                         chunk_z,
                                                         t)

                    for tile in range(z, z + num_of_tiles):
                        tile_key = backend.encode_tile_key(args['project_info'],
                                                           args['resolution'],
                                                           chunk_x,
                                                           chunk_y,
                                                           tile,
                                                           t)

                        msg = {
                            'job_id': args['job_id'],
                            'upload_queue_arn': args['upload_queue'],
                            'ingest_queue_arn': args['ingest_queue'],
                            'chunk_key': chunk_key,
                            'tile_key': tile_key,
                        }

                        yield json.dumps(msg)
Example #5
0
    def test_get_task(self):
        """Test getting a task from the upload queue"""
        b = BossBackend(self.example_config_data)
        b.setup(self.api_token)

        # Make sure queue is empty.
        sqs = boto3.resource('sqs')
        queue = sqs.Queue(self.upload_queue_url)
        queue.purge()

        # Put some stuff on the task queue
        self.setup_helper.add_tasks(self.aws_creds["access_key"],
                                    self.aws_creds['secret_key'],
                                    self.upload_queue_url, b)

        # Join and get a task
        b.join(23)
        msg_id, rx_handle, msg_body = b.get_task()

        assert isinstance(msg_id, str)
        assert isinstance(rx_handle, str)
        assert msg_body == self.setup_helper.test_msg[0]

        msg_id, rx_handle, msg_body = b.get_task()
        assert isinstance(msg_id, str)
        assert isinstance(rx_handle, str)
        assert msg_body == self.setup_helper.test_msg[1]
Example #6
0
def _create_messages(args, index_csv):
    """
    Create all of the tile messages to be enqueued.

    Args:
        args (dict): Same arguments as patch_upload_queue().
        index_csv (str): CSV file with tile info.

    Generates:
        (list): List of strings containing Json data
    """

    # DP NOTE: configuration is not actually used by encode_*_key method
    backend = BossBackend(None)

    chunk_key_list = []
    with open(index_csv, "rt") as data:
        lines = data.readlines()
        for line in lines:
            parts = line.split(",")
            chunk_key_list.append(parts[0])

    msgs = []
    for base_chunk_key in chunk_key_list:
        parts = backend.decode_chunk_key(base_chunk_key)
        chunk_x = parts['x_index']
        chunk_y = parts['y_index']
        chunk_z = parts['z_index']
        t = parts['t_index']

        num_of_tiles = parts['num_tiles']

        chunk_key = base_chunk_key
        z_start = chunk_z * args['z_chunk_size']

        for tile in range(z_start, z_start + num_of_tiles):
            tile_key = backend.encode_tile_key(args['project_info'],
                                               args['resolution'], chunk_x,
                                               chunk_y, tile, t)

            msg = {
                'job_id': args['job_id'],
                'upload_queue_arn': args['upload_queue'],
                'ingest_queue_arn': args['ingest_queue'],
                'chunk_key': chunk_key,
                'tile_key': tile_key,
            }

            yield json.dumps(msg)
Example #7
0
    def invoke_ingest_lambda(self, ingest_job, num_invokes=1):
        """Method to trigger extra lambda functions to make sure all the ingest jobs that are actually fully populated
        kick through

        Args:
            ingest_job: Ingest job object
            num_invokes(int): number of invocations to fire

        Returns:

        """
        bosskey = ingest_job.collection + CONNECTER + ingest_job.experiment + CONNECTER + ingest_job.channel
        lookup_key = (LookUpKey.get_lookup_key(bosskey)).lookup_key
        [col_id, exp_id, ch_id] = lookup_key.split('&')
        project_info = [col_id, exp_id, ch_id]
        fake_chunk_key = (BossBackend(self.config)).encode_chunk_key(
            16, project_info, ingest_job.resolution, 0, 0, 0, 0)

        event = {
            "ingest_job": ingest_job.id,
            "chunk_key": fake_chunk_key,
            "lambda-name": "ingest"
        }

        # Invoke Ingest lambda functions
        lambda_client = boto3.client('lambda',
                                     region_name=bossutils.aws.get_region())
        for _ in range(0, num_invokes):
            lambda_client.invoke(FunctionName=INGEST_LAMBDA,
                                 InvocationType='Event',
                                 Payload=json.dumps(event).encode())
Example #8
0
    def test_join(self):
        """Test joining an existing ingest job - mock server response"""
        b = BossBackend(self.example_config_data)
        b.setup(self.api_token)

        status, creds, queue_url, tile_bucket, params, tile_count = b.join(23)

        assert b.queue.url == self.queue_url
        assert status == 1
        assert creds == self.aws_creds
        assert queue_url == self.queue_url
        assert tile_bucket == self.tile_bucket_name
        assert tile_count == 500
        assert 'KVIO_SETTINGS' in params
        assert 'OBJECTIO_CONFIG' in params
        assert 'STATEIO_CONFIG' in params
        assert 'ingest_queue' in params
    def check_tiles(self, chunk_key, tiles):
        """
        Check the chunk's tile map for missing tiles.  If any are missing,
        generate the proper stringified JSON for putting those missing tiles
        back in the tile upload queue.

        Args:
            chunk_key (str): Identifies chunk of tiles.
            tiles (): List of tiles uploaded for the chunk.
        Yields:
            (str): JSON string for sending to SQS tile upload queue.
        """
        # Only using encode|decode_*_key methods, so don't need to provide a
        # config.
        ingest_backend = BossBackend(None)
        chunk_key_parts = ingest_backend.decode_chunk_key(chunk_key)
        chunk_x = chunk_key_parts['x_index']
        chunk_y = chunk_key_parts['y_index']
        chunk_z = chunk_key_parts['z_index']
        t = chunk_key_parts['t_index']
        num_tiles = chunk_key_parts['num_tiles']
        z_start = chunk_z * self.job['z_chunk_size']

        for tile_z in range(z_start, z_start + num_tiles):
            # First arg is a list of [collection, experiment, channel] ids.
            tile_key = ingest_backend.encode_tile_key(self._get_project_info(),
                                                      self.job['resolution'],
                                                      chunk_x, chunk_y, tile_z,
                                                      t)
            if tile_key in tiles:
                continue
            msg = {
                'job_id': self.job['task_id'],
                'upload_queue_arn': self.job['upload_queue'],
                'ingest_queue_arn': self.job['ingest_queue'],
                'chunk_key': chunk_key,
                'tile_key': tile_key
            }
            log.info(
                f'Re-enqueuing tile: {tile_key} belonging to chunk: {chunk_key}'
            )

            yield json.dumps(msg)
Example #10
0
    def test_decode_tile_key(self):
        """Test encoding an object key"""
        b = BossBackend(self.example_config_data)
        b.setup(self.api_token)

        params = {"collection": 1,
                  "experiment": 2,
                  "channel": 3,
                  "resolution": 0,
                  "x_index": 5,
                  "y_index": 6,
                  "z_index": 1,
                  "t_index": 0,
                  }

        proj = [str(params['collection']), str(params['experiment']), str(params['channel'])]
        key = b.encode_tile_key(proj,
                                params['resolution'],
                                params['x_index'],
                                params['y_index'],
                                params['z_index'],
                                params['t_index'],
                                )

        parts = b.decode_tile_key(key)

        assert parts["collection"] == params['collection']
        assert parts["experiment"] == params['experiment']
        assert parts["channel"] == params['channel']
        assert parts["resolution"] == params['resolution']
        assert parts["x_index"] == params['x_index']
        assert parts["y_index"] == params['y_index']
        assert parts["z_index"] == params['z_index']
        assert parts["t_index"] == params['t_index']
Example #11
0
    def test_setup_upload_queue(self):
        """Test connecting the backend to the upload queue"""
        b = BossBackend(self.example_config_data)
        b.setup(self.api_token)

        b.setup_upload_queue(self.aws_creds, self.queue_url)

        assert b.queue.url == self.queue_url
Example #12
0
    def test_setup_queues(self):
        """Test connecting the backend to the upload and tile index queues"""
        b = BossBackend(self.example_config_data)
        b.setup(self.api_token)

        b.setup_queues(self.aws_creds, self.upload_queue_url,
                       self.tile_index_queue_url)

        assert b.upload_queue.url == self.upload_queue_url
        assert b.tile_index_queue.url == self.tile_index_queue_url
Example #13
0
    def test_delete_task(self):
        b = BossBackend(self.example_config_data)
        b.setup(self.api_token)

        # Make sure queue is empty.
        sqs = boto3.resource('sqs')
        queue = sqs.Queue(self.upload_queue_url)
        queue.purge()

        # Put some stuff on the task queue
        self.setup_helper.add_tasks(self.aws_creds["access_key"],
                                    self.aws_creds['secret_key'],
                                    self.upload_queue_url, b)

        # Join and get a task
        b.join(23)
        msg_id, rx_handle, msg_body = b.get_task()

        assert b.delete_task(msg_id, rx_handle)
Example #14
0
    def test_create_messages(self):
        job_id = 8
        num_tiles = 16
        exp_upload_queue = 'upload-test-queue'
        exp_ingest_queue = 'ingest-test-queue'
        args = {
            'job_id': job_id,
            'project_info': [5, 3, 2],  # Collection/Experiment/Channel ids
            'z_chunk_size': 16,
            'resolution': 0,
            'upload_queue': exp_upload_queue,
            'ingest_queue': exp_ingest_queue
        }

        backend = BossBackend(None)
        x1, y1, z1 = (0, 0, 0)
        x2, y2, z2 = (1024, 1024, 16)
        res = 0
        t = 2
        chunk_key1 = backend.encode_chunk_key(num_tiles, args['project_info'],
                                              res, x1, y1, z1, t)
        chunk_key2 = backend.encode_chunk_key(num_tiles, args['project_info'],
                                              res, x2, y2, z2, t)
        with NamedTemporaryFile(mode='wt', suffix='.csv',
                                delete=False) as output_csv:
            csv_file = output_csv.name
            output_csv.write('{},{},{}'.format(chunk_key1, job_id, num_tiles))
            output_csv.write('{},{},{}'.format(chunk_key2, job_id, num_tiles))

        try:
            actual = _create_messages(args, csv_file)
            for i, raw_msg in enumerate(actual):
                msg = json.loads(raw_msg)
                self.assertEqual(job_id, msg['job_id'])
                self.assertEqual(exp_upload_queue, msg['upload_queue_arn'])
                self.assertEqual(exp_ingest_queue, msg['ingest_queue_arn'])
                if i < 16:
                    self.assertEqual(chunk_key1, msg['chunk_key'])
                    exp_tile_key = backend.encode_tile_key(
                        args['project_info'], res, x1, y1, z1 + i, t)
                    self.assertEqual(exp_tile_key, msg['tile_key'])
                elif i < 32:
                    self.assertEqual(chunk_key2, msg['chunk_key'])
                    exp_tile_key = backend.encode_tile_key(
                        args['project_info'], res, x2, y2, z2 + i - 16, t)
                    self.assertEqual(exp_tile_key, msg['tile_key'])
                else:
                    self.fail('Too many messages returned')

        finally:
            os.remove(csv_file)
Example #15
0
    def test_setup(self):
        """Method to test setup instance"""
        b = BossBackend(self.example_config_data)
        b.setup(self.api_token)

        assert b.host == "https://api.theboss.io"
Example #16
0
    def test_check_tiles(self):
        table = 'foo'
        db_host = 'bar'
        job = {
            'collection': 90,
            'experiment': 141,
            'channel': 985,
            'task_id': 1082,
            'resolution': 0,
            'z_chunk_size': 16,
            'upload_queue': 'foo',
            'ingest_queue': 'bar',
            'ingest_type': TILE_INGEST,
        }

        chunk_x = 94
        chunk_y = 40
        chunk_z = int(2128 / job['z_chunk_size'])

        # Simulate the tile map in the Dynamo tile index.
        tile_map = {
            '51b03272fb38672209b1891a5b6e20b8&90&141&985&0&94&40&2128&0': {
                'N': '1'
            },
            '1743c07e50c1a642e357d6738cab13e9&90&141&985&0&94&40&2129&0': {
                'N': '1'
            },
            'de1a408932bcfc3d36b5b4a7607d0964&90&141&985&0&94&40&2130&0': {
                'N': '1'
            },
            '12223001b0bc18b3a8cb3463b09552d8&90&141&985&0&94&40&2131&0': {
                'N': '1'
            },
            '82ae0d1f7775f29e30491036fd0335e2&90&141&985&0&94&40&2132&0': {
                'N': '1'
            },
            '4b0c2f87566cbcdffb9b89ee873c0949&90&141&985&0&94&40&2134&0': {
                'N': '1'
            },
            '6f13405cda1b04907b9ed3f9795eb9fb&90&141&985&0&94&40&2135&0': {
                'N': '1'
            },
            'ab42d5b88996f9a0076c51f13f082173&90&141&985&0&94&40&2136&0': {
                'N': '1'
            },
            'fb3bdfff8982a8250a9f21c30098d2e5&90&141&985&0&94&40&2137&0': {
                'N': '1'
            },
            '2853c92ea467cf0e3f571bba11a63473&90&141&985&0&94&40&2138&0': {
                'N': '1'
            },
            '6700434c27e4fb23de45fe0df9f2162f&90&141&985&0&94&40&2139&0': {
                'N': '1'
            },
            '2fb7f8223dae87bf8d9c6bd73f7e3d2a&90&141&985&0&94&40&2141&0': {
                'N': '1'
            },
            '6483f9efb8da500d010e1ea70b956ebe&90&141&985&0&94&40&2142&0': {
                'N': '1'
            },
            'b8f59755d83f8501e387fb15329ee7ee&90&141&985&0&94&40&2143&0': {
                'N': '1'
            },
        }

        # The tile map is missing tiles with z=2133 and 2140.
        missing_tiles = [
            '16a888bbb0457cb8e6bfce882b74afff&90&141&985&0&94&40&2133&0',
            '4a1c5be8336960d75fb4c4366544a9d3&90&141&985&0&94&40&2140&0'
        ]

        cs = ChunkScanner(self.dynamo, self.sqs, table, db_host, job,
                          self.resource, self.x_size, self.y_size,
                          self.kvio_settings, self.stateio_config,
                          self.objectio_config)
        backend = BossBackend(None)
        chunk_key = backend.encode_chunk_key(16, cs._get_project_info(),
                                             job['resolution'], chunk_x,
                                             chunk_y, chunk_z)

        msg_template = {
            'job_id': job['task_id'],
            'upload_queue_arn': 'foo',
            'ingest_queue_arn': 'bar',
            'chunk_key': chunk_key
        }

        # Expect messages for tile upload queue for tiles with z=2133 and 2140.
        exp = [
            json.dumps(dict(msg_template, tile_key=tile))
            for tile in missing_tiles
        ]
        actual = [tile for tile in cs.check_tiles(chunk_key, tile_map)]
        self.assertCountEqual(exp, actual)
Example #17
0
def main(configuration=None, parser_args=None):
    """Client UI main

    Args:
        configuration(ingestclient.core.config.Configuration): A pre-loaded configuration instance
        parser_args(argparse.ArgumentParser): A pre-loaded ArgumentParser instance

    Returns:

    """
    parser = get_parser()
    if parser_args is None:
        args = parser.parse_args()
    else:
        args = parser_args

    # Get the version
    if args.version:
        check_version()
        return

    # Make sure you have a config file
    if args.config_file is None and configuration is None:
        if args.cancel:
            # If no config is provided and you are deleting, the client defaults to the production Boss stack
            boss_backend_params = {
                "client": {
                    "backend": {
                        "name": "boss",
                        "class": "BossBackend",
                        "host": "api.theboss.io",
                        "protocol": "https"
                    }
                }
            }
            backend = BossBackend(boss_backend_params)
            backend.setup(args.api_token)

            # Trying to cancel
            if args.job_id is None:
                parser.print_usage()
                print("Error: You must provide an ingest job ID to cancel")
                sys.exit(1)

            if not get_confirmation(
                    "Are you sure you want to cancel ingest job {}? ".format(
                        args.job_id), args.force):
                print("Command ignored. Job not cancelled")
                sys.exit(0)

            backend.cancel(args.job_id)
            print("Ingest job {} successfully cancelled.".format(args.job_id))
            sys.exit(0)
        else:
            # Not deleting, so you need a config file
            parser.print_usage()
            print("Error: Ingest Job Configuration File is required")
            sys.exit(1)

    # Setup logging
    log_level = logging.getLevelName(args.log_level.upper())
    if not args.log_file:
        # Using default log path
        log_path = os.path.expanduser("~/.boss-ingest")
        log_file = os.path.join(
            log_path, 'ingest_log{}_pid{}.log'.format(
                datetime.datetime.now().strftime("%Y%m%d-%H%M%S"),
                os.getpid()))
        # Make sure the logs dir exists if using the default log path
        if not os.path.exists(log_path):
            os.makedirs(log_path)
    else:
        log_file = args.log_file

    logging.basicConfig(level=log_level,
                        format='%(asctime)s %(levelname)-8s %(message)s',
                        datefmt='%m-%d %H:%M',
                        filename=log_file,
                        filemode='a')
    logging.getLogger('ingest-client').addHandler(
        logging.StreamHandler(sys.stdout))

    # Create an engine instance
    try:
        engine = Engine(config_file=args.config_file,
                        backend_api_token=args.api_token,
                        ingest_job_id=args.job_id,
                        configuration=configuration)
    except ConfigFileError as err:
        print("ERROR: {}".format(err))
        sys.exit(1)

    if args.cancel:
        # Trying to cancel
        if args.job_id is None:
            parser.print_usage()
            print("Error: You must provide an ingest job ID to cancel")
            sys.exit(1)

        if not get_confirmation(
                "Are you sure you want to cancel ingest job {}? ".format(
                    args.job_id), args.force):
            print("Command ignored. Job not cancelled")
            sys.exit(0)

        always_log_info("Attempting to cancel Ingest Job {}.".format(
            args.job_id))
        engine.cancel()
        always_log_info("Ingest job {} successfully cancelled.".format(
            args.job_id))
        sys.exit(0)

    else:
        # Trying to create or join an ingest
        if args.job_id is None:
            # Creating a new session - make sure the user wants to do this.
            print_estimated_job(config_file=args.config_file,
                                configuration=configuration)
            print("\n")
            if not get_confirmation(
                    "Would you like to create a NEW ingest job?", args.force):
                # Don't want to create a new job
                print("Exiting")
                sys.exit(0)
        else:
            # Resuming a session - make sure the user wants to do this.
            if not get_confirmation(
                    "Are you sure you want to resume ingest job {}?".format(
                        args.job_id), args.force):
                # Don't want to resume
                print("Exiting")
                sys.exit(0)

    # Setup engine instance.  Prompt user to confirm things if needed
    question_msgs = engine.setup()
    if question_msgs:
        for msg in question_msgs:
            if not get_confirmation(msg, args.force):
                print("Ingest job cancelled")
                sys.exit(0)

    if args.job_id is None:
        # Create job
        engine.create_job()
        always_log_info("Successfully Created Ingest Job ID: {}".format(
            engine.ingest_job_id))
        always_log_info("Note: You need this ID to continue this job later!")

        if not get_confirmation("\nDo you want to start uploading now?",
                                args.force):
            print(
                "OK - Your job is waiting for you. You can resume by providing Ingest Job ID '{}' to the client"
                .format(engine.ingest_job_id))
            sys.exit(0)

        # Join job
        engine.join()

    else:
        # Join job
        engine.join()

    # Create worker processes
    workers = []
    for i in range(args.processes_nb):
        new_pipe = mp.Pipe(False)
        new_process = mp.Process(target=worker_process_run,
                                 args=(args.api_token, engine.ingest_job_id,
                                       new_pipe[0]),
                                 kwargs={
                                     'config_file': args.config_file,
                                     'configuration': configuration
                                 })
        workers.append((new_process, new_pipe[1]))
        new_process.start()

        # Sleep to slowly ramp up load on lambda
        time.sleep(.5)

    # Start the main process engine
    start_time = time.time()
    should_run = True
    job_complete = False
    while should_run:
        try:
            engine.monitor(workers)
            # run will end if no more jobs are available, join other processes
            should_run = False
            job_complete = True
        except KeyboardInterrupt:
            # Make sure they want to stop this client
            while True:
                quit_uploading = input(
                    "Are you sure you want to quit uploading? (y/n)")
                if quit_uploading.lower() == "y":
                    always_log_info("Stopping upload engine.")
                    should_run = False
                    break
                elif quit_uploading.lower() == "n":
                    print("Continuing...")
                    break
                else:
                    print("Enter 'y' or 'n' for 'yes' or 'no'")

            # notify the worker processes that they should stop execution
            for _, worker_pipe in workers:
                worker_pipe.send(should_run)

    always_log_info("Waiting for worker processes to close...\n")
    time.sleep(1)  # Make sure workers have cleaned up
    for worker_process, worker_pipe in workers:
        worker_process.join()
        worker_pipe.close()

    if job_complete:
        # If auto-complete, mark the job as complete and cleanup
        always_log_info("All upload tasks completed in {:.2f} minutes.".format(
            (time.time() - start_time) / 60))
        if not args.manual_complete:
            always_log_info(
                " - Marking Ingest Job as complete and cleaning up. Please wait."
            )
            engine.complete()
            always_log_info(" - Cleanup Done")
        else:
            always_log_info(
                " - Auto-complete disabled. This ingest job will remain in the 'Uploading' state until you manually mark it as complete"
            )
    else:
        always_log_info("Client exiting")
        always_log_info("Run time: {:.2f} minutes.".format(
            (time.time() - start_time) / 60))
Example #18
0
    def generate_upload_tasks(self, job_id=None):
        """
        Generate upload tasks for the ingest job. This creates once task for each tile that has to be uploaded in the
        ingest queue

        Args:
            job_id: Job id of the ingest queue. If not included this takes the current ingest job

        Returns:
            None
        Raises:
            BossError : if there is no valid ingest job

        """

        if job_id is None and self.job is None:
            raise BossError(
                "Unable to generate upload tasks for the ingest service. Please specify a ingest job",
                ErrorCodes.UNABLE_TO_VALIDATE)
        elif job_id:
            # Using the job id to get the job
            try:
                ingest_job = IngestJob.objects.get(id=job_id)
            except IngestJob.DoesNotExist:
                raise BossError(
                    "Ingest job with id {} does not exist".format(job_id),
                    ErrorCodes.RESOURCE_NOT_FOUND)
        else:
            ingest_job = self.job

        # Generate upload tasks for the ingest job
        # Get the project information
        bosskey = ingest_job.collection + CONNECTER + ingest_job.experiment + CONNECTER + ingest_job.channel
        lookup_key = (LookUpKey.get_lookup_key(bosskey)).lookup_key
        [col_id, exp_id, ch_id] = lookup_key.split('&')
        project_info = [col_id, exp_id, ch_id]

        # Batch messages and write to file
        base_file_name = 'tasks_' + lookup_key + '_' + str(ingest_job.id)
        self.file_index = 0

        # open file
        f = io.StringIO()
        header = {
            'job_id': ingest_job.id,
            'upload_queue_url': ingest_job.upload_queue,
            'ingest_queue_url': ingest_job.ingest_queue
        }
        f.write(json.dumps(header))
        f.write('\n')
        num_msg_per_file = 0

        for time_step in range(ingest_job.t_start, ingest_job.t_stop, 1):
            # For each time step, compute the chunks and tile keys

            for z in range(ingest_job.z_start, ingest_job.z_stop, 16):
                for y in range(ingest_job.y_start, ingest_job.y_stop,
                               ingest_job.tile_size_y):
                    for x in range(ingest_job.x_start, ingest_job.x_stop,
                                   ingest_job.tile_size_x):

                        # compute the chunk indices
                        chunk_x = int(x / ingest_job.tile_size_x)
                        chunk_y = int(y / ingest_job.tile_size_y)
                        chunk_z = int(z / 16)

                        # Compute the number of tiles in the chunk
                        if ingest_job.z_stop - z >= 16:
                            num_of_tiles = 16
                        else:
                            num_of_tiles = ingest_job.z_stop - z

                        # Generate the chunk key
                        chunk_key = (BossBackend(
                            self.config)).encode_chunk_key(
                                num_of_tiles, project_info,
                                ingest_job.resolution, chunk_x, chunk_y,
                                chunk_z, time_step)

                        self.num_of_chunks += 1

                        # get the tiles keys for this chunk
                        for tile in range(z, z + num_of_tiles):
                            # get the tile key
                            tile_key = (BossBackend(
                                self.config)).encode_tile_key(
                                    project_info, ingest_job.resolution,
                                    chunk_x, chunk_y, tile, time_step)
                            self.count_of_tiles += 1

                            # Generate the upload task msg
                            msg = chunk_key + ',' + tile_key + '\n'
                            f.write(msg)
                            num_msg_per_file += 1

                            # if there are 10 messages in the batch send it to the upload queue.
                            if num_msg_per_file == MAX_NUM_MSG_PER_FILE:
                                fname = base_file_name + '_' + str(
                                    self.file_index + 1) + '.txt'
                                self.upload_task_file(fname, f.getvalue())
                                self.file_index += 1
                                f.close()
                                # status = self.send_upload_message_batch(batch_msg)

                                fname = base_file_name + '_' + str(
                                    self.file_index + 1) + '.txt'
                                f = io.StringIO()
                                header = {
                                    'job_id': ingest_job.id,
                                    'upload_queue_url':
                                    ingest_job.upload_queue,
                                    'ingest_queue_url': ingest_job.ingest_queue
                                }
                                f.write(json.dumps(header))
                                f.write('\n')
                                num_msg_per_file = 0

        # Edge case: the last batch size maybe smaller than 10
        if num_msg_per_file != 0:
            fname = base_file_name + '_' + str(self.file_index + 1) + '.txt'
            self.upload_task_file(fname, f.getvalue())
            f.close()
            self.file_index += 1
            num_msg_per_file = 0

        # Update status
        self.job.tile_count = self.count_of_tiles
        self.job.save()
Example #19
0
def main():
    parser = argparse.ArgumentParser(description="Client for facilitating large-scale data ingest",
                                     formatter_class=argparse.RawDescriptionHelpFormatter,
                                     epilog="Visit https://docs.theBoss.io for more details")

    parser.add_argument("--api-token", "-a",
                        default=None,
                        help="Token for API authentication. If not provided and ndio is configured those credentials will automatically be used.")
    parser.add_argument("--job-id", "-j",
                        default=None,
                        help="ID of the ingest job if joining an existing ingest job")
    parser.add_argument("--log-file", "-l",
                        default=None,
                        help="Absolute path to the logfile to use")
    parser.add_argument("--log-level", "-v",
                        default="warning",
                        help="Log level to use: critical, error, warning, info, debug")
    parser.add_argument("--version",
                        action="store_true",
                        default=False,
                        help="Get the package version")
    parser.add_argument("--cancel", "-c",
                        action="store_true",
                        default=None,
                        help="Flag indicating if you'd like to cancel (and remove) an ingest job. This will not delete data already ingested, but will prevent continuing this ingest job.")
    parser.add_argument("--force", "-f",
                        action="store_true",
                        default=False,
                        help="Flag indicating if you'd like ignore all confirmation prompts.")
    parser.add_argument("--processes_nb", "-p", type=int,
                        default=1,
                        help="The number of client processes that will upload the images of the ingest job.")
    parser.add_argument("config_file", nargs='?', help="Path to the ingest job configuration file")

    args = parser.parse_args()

    # Get the version
    if args.version:
        check_version()
        return

    # Make sure you have a config file
    if args.config_file is None:
        if args.cancel:
            # If no config is provided and you are deleting, the client defaults to the production Boss stack
            boss_backend_params = {"client": {
                "backend": {
                    "name": "boss",
                    "class": "BossBackend",
                    "host": "api.theboss.io",
                    "protocol": "https"}}}
            backend = BossBackend(boss_backend_params)
            backend.setup(args.api_token)

            # Trying to cancel
            if args.job_id is None:
                parser.print_usage()
                print("Error: You must provide an ingest job ID to cancel")
                sys.exit(1)

            if not get_confirmation("Are you sure you want to cancel ingest job {}? ".format(args.job_id), args.force):
                print("Command ignored. Job not cancelled")
                sys.exit(0)

            backend.cancel(args.job_id)
            print("Ingest job {} successfully cancelled.".format(args.job_id))
            sys.exit(0)
        else:
            # Not deleting, so you need a config file
            parser.print_usage()
            print("Error: Ingest Job Configuration File is required")
            sys.exit(1)

    # Setup logging
    log_level = logging.getLevelName(args.log_level.upper())
    if not args.log_file:
        # Using default log path
        log_path = os.path.expanduser("~/.boss-ingest")
        log_file = os.path.join(log_path,
                                'ingest_log{}_pid{}.log'.format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"),
                                                                os.getpid()))
        # Make sure the logs dir exists if using the default log path
        if not os.path.exists(log_path):
            os.makedirs(log_path)
    else:
        log_file = args.log_file

    logging.basicConfig(level=log_level,
                        format='%(asctime)s %(levelname)-8s %(message)s',
                        datefmt='%m-%d %H:%M',
                        filename=log_file,
                        filemode='a')
    logging.getLogger('ingest-client').addHandler(logging.StreamHandler(sys.stdout))

    # Create an engine instance
    try:
        engine = Engine(args.config_file, args.api_token, args.job_id)
    except ConfigFileError as err:
        print("ERROR: {}".format(err))
        sys.exit(1)

    if args.cancel:
        # Trying to cancel
        if args.job_id is None:
            parser.print_usage()
            print("Error: You must provide an ingest job ID to cancel")
            sys.exit(1)

        if not get_confirmation("Are you sure you want to cancel ingest job {}? ".format(args.job_id), args.force):
            print("Command ignored. Job not cancelled")
            sys.exit(0)

        always_log_info("Attempting to cancel Ingest Job {}.".format(args.job_id))
        engine.cancel()
        always_log_info("Ingest job {} successfully cancelled.".format(args.job_id))
        sys.exit(0)

    else:
        # Trying to create or join an ingest
        if args.job_id is None:
            # Creating a new session - make sure the user wants to do this.
            print_estimated_job(args.config_file)
            print("\n")
            if not get_confirmation("Would you like to create a NEW ingest job?", args.force):
                # Don't want to create a new job
                print("Exiting")
                sys.exit(0)
        else:
            # Resuming a session - make sure the user wants to do this.
            if not get_confirmation("Are you sure you want to resume ingest job {}?".format(args.job_id), args.force):
                # Don't want to resume
                print("Exiting")
                sys.exit(0)

    # Setup engine instance.  Prompt user to confirm things if needed
    question_msgs = engine.setup()
    if question_msgs:
        for msg in question_msgs:
            if not get_confirmation(msg, args.force):
                print("Ingest job cancelled")
                sys.exit(0)

    if args.job_id is None:
        # Create job
        engine.create_job()
        always_log_info("Successfully Created Ingest Job ID: {}".format(engine.ingest_job_id))
        always_log_info("Note: You need this ID to continue this job later!")

        if not get_confirmation("\nDo you want to start uploading now?", args.force):
            print("OK - Your job is waiting for you. You can resume by providing Ingest Job ID '{}' to the client".format(engine.ingest_job_id))
            sys.exit(0)

        # Join job
        engine.join()

    else:
        # Join job
        engine.join()

    # Create worker processes
    workers = []
    for i in range(args.processes_nb):
        new_pipe = mp.Pipe(False)
        new_process = mp.Process(target=worker_process_run, args=(args.config_file, args.api_token,
                                                                  engine.ingest_job_id, new_pipe[0]))
        workers.append((new_process, new_pipe[1]))
        new_process.start()

        # Sleep to slowly ramp up load on lambda
        time.sleep(.25)

    # Start the main process engine
    start_time = time.time()
    should_run = True
    job_complete = False
    while should_run:
        try:
            engine.monitor(workers)
            # run will end if no more jobs are available, join other processes
            should_run = False
            job_complete = True
        except KeyboardInterrupt:
            # Make sure they want to stop this client
            while True:
                quit_uploading = input("Are you sure you want to quit uploading? (y/n)")
                if quit_uploading.lower() == "y":
                    always_log_info("Stopping upload engine.")
                    should_run = False
                    break
                elif quit_uploading.lower() == "n":
                    print("Continuing...")
                    break
                else:
                    print("Enter 'y' or 'n' for 'yes' or 'no'")

            # notify the worker processes that they should stop execution
            for _, worker_pipe in workers:
                worker_pipe.send(should_run)

    always_log_info("Waiting for worker processes to close...")
    time.sleep(1)  # Make sure workers have cleaned up
    for worker_process, worker_pipe in workers:
        worker_process.join()
        worker_pipe.close()

    if job_complete:
        always_log_info("Job Complete - No more tasks remaining.")
        always_log_info("Upload finished after {} minutes.".format((time.time() - start_time) / 60))
    else:
        always_log_info("Client exiting")
        always_log_info("Run time: {} minutes.".format((time.time() - start_time) / 60))
Example #20
0
    def test_delete(self):
        """Test deleting an existing ingest job - mock server response"""
        b = BossBackend(self.example_config_data)
        b.setup(self.api_token)

        b.cancel(23)
Example #21
0
def main(configuration=None, parser_args=None):
    """Client UI main

    Args:
        configuration(ingestclient.core.config.Configuration): A pre-loaded configuration instance
        parser_args(argparse.ArgumentParser): A pre-loaded ArgumentParser instance

    Returns:

    """
    parser = get_parser()
    if parser_args is None:
        args = parser.parse_args()
    else:
        args = parser_args

    # Get the version
    if args.version:
        check_version()
        return

    # Make sure you have a config file
    if args.config_file is None and configuration is None:
        if args.cancel:
            # If no config is provided and you are deleting, the client defaults to the production Boss stack
            boss_backend_params = {
                "client": {
                    "backend": {
                        "name": "boss",
                        "class": "BossBackend",
                        "host": "api.theboss.io",
                        "protocol": "https"
                    }
                }
            }
            backend = BossBackend(boss_backend_params)
            backend.setup(args.api_token)

            # Trying to cancel
            if args.job_id is None:
                parser.print_usage()
                print("Error: You must provide an ingest job ID to cancel")
                sys.exit(1)

            if not get_confirmation(
                    "Are you sure you want to cancel ingest job {}? ".format(
                        args.job_id), args.force):
                print("Command ignored. Job not cancelled")
                sys.exit(0)

            backend.cancel(args.job_id)
            print("Ingest job {} successfully cancelled.".format(args.job_id))
            sys.exit(0)
        else:
            # Not deleting, so you need a config file
            parser.print_usage()
            print("Error: Ingest Job Configuration File is required")
            sys.exit(1)

    # Setup logging
    log_level = logging.getLevelName(args.log_level.upper())
    if not args.log_file:
        # Using default log path
        log_path = os.path.expanduser("~/.boss-ingest")
        log_file = os.path.join(
            log_path, 'ingest_log{}_pid{}.log'.format(
                datetime.datetime.now().strftime("%Y%m%d-%H%M%S"),
                os.getpid()))
        # Make sure the logs dir exists if using the default log path
        if not os.path.exists(log_path):
            os.makedirs(log_path)
    else:
        log_file = args.log_file

    logging.basicConfig(level=log_level,
                        format='%(asctime)s %(levelname)-8s %(message)s',
                        datefmt='%m-%d %H:%M',
                        filename=log_file,
                        filemode='a')
    logging.getLogger('ingest-client').addHandler(
        logging.StreamHandler(sys.stdout))

    # Create an engine instance
    try:
        engine = Engine(config_file=args.config_file,
                        backend_api_token=args.api_token,
                        ingest_job_id=args.job_id,
                        configuration=configuration)
    except ConfigFileError as err:
        print("ERROR: {}".format(err))
        sys.exit(1)

    if args.cancel:
        # Trying to cancel
        if args.job_id is None:
            parser.print_usage()
            print("Error: You must provide an ingest job ID to cancel")
            sys.exit(1)

        if not get_confirmation(
                "Are you sure you want to cancel ingest job {}? ".format(
                    args.job_id), args.force):
            print("Command ignored. Job not cancelled")
            sys.exit(0)

        always_log_info("Attempting to cancel Ingest Job {}.".format(
            args.job_id))
        engine.cancel()
        always_log_info("Ingest job {} successfully cancelled.".format(
            args.job_id))
        sys.exit(0)

    else:
        # Trying to create or join an ingest
        if args.job_id is None:
            # Creating a new session - make sure the user wants to do this.
            print_estimated_job(config_file=args.config_file,
                                configuration=configuration)
            print("\n")
            if not get_confirmation(
                    "Would you like to create a NEW ingest job?", args.force):
                # Don't want to create a new job
                print("Exiting")
                sys.exit(0)
        else:
            # Resuming a session - make sure the user wants to do this.
            if not get_confirmation(
                    "Are you sure you want to resume ingest job {}?".format(
                        args.job_id), args.force):
                # Don't want to resume
                print("Exiting")
                sys.exit(0)

    # Setup engine instance.  Prompt user to confirm things if needed
    question_msgs = engine.setup()
    if question_msgs:
        for msg in question_msgs:
            if not get_confirmation(msg, args.force):
                print("Ingest job cancelled")
                sys.exit(0)

    if args.job_id is None:
        # Create job
        engine.create_job()
        always_log_info("Successfully Created Ingest Job ID: {}".format(
            engine.ingest_job_id))
        always_log_info("Note: You need this ID to continue this job later!")

        if not get_confirmation("\nDo you want to start uploading now?",
                                args.force):
            print(
                "OK - Your job is waiting for you. You can resume by providing Ingest Job ID '{}' to the client"
                .format(engine.ingest_job_id))
            sys.exit(0)

    # Join job
    engine.join()

    start_time = time.time()
    while upload(engine, args, configuration, start_time):
        pass