Ejemplo n.º 1
0
def main():
    parser = argparse.ArgumentParser(
        description=(
            "Construct the gene-cell table for an experiment\n"
            "e.g. gene_cell_table"
            " s3://bucket-name/path/to/results path/to/output.csv"
        ),
        epilog="See https://github.com/czbiohub/utilities for more examples",
        add_help=False,
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    )

    # basic usage
    basic_group = parser.add_argument_group("basic arguments")
    basic_group.add_argument("s3_input_path", help="Location of data on S3")
    basic_group.add_argument(
        "output_file", help="File to save the output, e.g. my_gc_table[.csv,.h5ad]"
    )

    # other arguments
    other_group = parser.add_argument_group("other options")
    other_group.add_argument(
        "--no_log", action="store_true", help="Don't try to download log files"
    )
    other_group.add_argument(
        "--dryrun", action="store_true", help="Don't actually download any files"
    )
    other_group.add_argument(
        "--debug", action="store_true", help="Set logging to debug level"
    )
    other_group.add_argument(
        "-h", "--help", action="help", help="Show this help message and exit"
    )

    args = parser.parse_args()

    main_logger, _lf, _fh = get_logger(__name__, args.debug, args.dryrun)

    gene_cell_table(args, main_logger, args.dryrun)
Ejemplo n.º 2
0

def main(logger):
    # get the argument parser and parse args
    parser = get_parser()
    args = parser.parse_args()

    # use the logger
    logger.info("Attempting to echo the message...")

    # run a subprocess and log the attempt
    failed = log_command(logger, "echo {}".format(args.message), shell=True)


if __name__ == "__main__":
    mainlogger, log_file, file_handler = get_logger(__name__)

    try:
        main(mainlogger)
    except:
        mainlogger.info("An exception occurred", exc_info=True)
        raise
    finally:
        # upload the log file no matter what. You can remove this if you don't
        # want to accumulate logs
        if log_file:
            log_cmd = "aws s3 cp --quiet {} {}".format(log_file, S3_LOG_DIR)
            mainlogger.info(log_cmd)

            file_handler.close()
            subprocess.check_output(log_cmd, shell=True)
Ejemplo n.º 3
0
    # using imap_unordered to (maybe) keep memory usage low in the main thread
    try:
        logger.debug('starting demux')
        for i, _ in enumerate(
                pool.imap_unordered(
                    read_processor,
                    zip(
                        rep_n(cbcl_file_lists[lane, part]
                              for lane, part in lane_parts),
                        rep_n(cbcl_filter_lists[lane][part]
                              for lane, part in lane_parts),
                        itertools.cycle(range(args.n_threads)),
                        itertools.repeat(args.n_threads),
                        map(output_file.format, itertools.count())))):
            if i % 100 == 0:
                logger.info(f'{i}')
    finally:
        pool.close()
        pool.join()

    log_queue.put('STOP')
    log_thread.join()

    logger.info('done!')


if __name__ == "__main__":
    mainlogger, log_file, file_handler = ut_log.get_logger('read_extraction')

    main(mainlogger)
Ejemplo n.º 4
0
def main():
    parser = argparse.ArgumentParser(
        prog="evros",
        description=("Run batch jobs on AWS\n"
                     "e.g. evros [options] demux.bcl2fastq [script args...]"),
        epilog="See https://github.com/czbiohub/utilities for more examples",
        add_help=False,
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    )

    # basic usage
    basic_group = parser.add_argument_group("basic arguments")
    basic_group.add_argument(
        "script_name",
        help="Local path of the script to run, e.g. demux.bcl2fastq")
    basic_group.add_argument(
        "script_args",
        nargs=argparse.REMAINDER,
        help="Arguments for the script (everything after script_name)",
    )

    # instance requirements
    instance_group = parser.add_argument_group("customize the instance")
    image_group = instance_group.add_mutually_exclusive_group()
    image_group.add_argument(
        "--ecr-image",
        metavar="ECR",
        default="demuxer",
        help="ECR image to use for the job",
    )

    instance_group.add_argument("--queue",
                                default="aegea_batch",
                                help="Queue to submit the job")
    instance_group.add_argument(
        "--vcpus",
        type=resource_range("vcpus", 1, 64),
        help="Number of vCPUs needed, e.g. 16",
    )
    instance_group.add_argument(
        "--memory",
        type=resource_range("memory", 0, 256000),
        help="Amount of memory needed, in MB, e.g. 16000",
    )
    instance_group.add_argument(
        "--storage",
        type=resource_range("storage", 500, 16000),
        help="Request additional storage, in GiB (min 500)",
    )
    instance_group.add_argument(
        "--ulimits",
        metavar="U",
        default=None,
        nargs="+",
        help="Change instance ulimits, e.g. nofile:1000000",
    )
    instance_group.add_argument(
        "--environment",
        metavar="ENV",
        default=None,
        nargs="+",
        help="Set environment variables",
    )

    # other arguments
    other_group = parser.add_argument_group("other options")
    other_group.add_argument(
        "--dryrun",
        action="store_true",
        help="Print the command but don't launch the job",
    )
    other_group.add_argument("--branch",
                             default="master",
                             help="branch of utilities repo to use")
    other_group.add_argument("-d",
                             "--debug",
                             action="store_true",
                             help="Set logging to debug level")
    other_group.add_argument("-h",
                             "--help",
                             action="help",
                             help="show this help message and exit")

    args = parser.parse_args()

    logger = ut_log.get_logger(__name__, args.debug, args.dryrun)[0]

    logger.debug("Importing script as a module")
    if not args.script_name.startswith("."):
        args.script_name = f".{args.script_name}"
    script_module = importlib.import_module(args.script_name, "utilities")

    logger.debug("Checking for script default requirements")

    if hasattr(script_module, "get_default_requirements"):
        script_reqs = script_module.get_default_requirements()
        logger.debug(
            f"{args.script_name} defines default requirements: {script_reqs}")
        args = parser.parse_args(namespace=script_reqs)
    else:
        logger.warning(
            f"{args.script_name} does not define default requirements")

    logger.debug("Testing script args")

    if hasattr(script_module, "get_parser"):
        script_parser = script_module.get_parser()
        try:
            script_parser.parse_args(args.script_args)
        except:
            logger.error(
                f"{args.script_name} failed with the given arg string\n\t{args.script_args}"
            )
            raise
    else:
        raise NotImplementedError(
            f"{args.script_name} must have a 'get_parser' method to test args")

    logger.debug("Script parsed args successfully")

    job_command = "; ".join((
        "PATH=$HOME/anaconda/bin:$PATH",
        "cd utilities",
        "git pull",
        f"git checkout {args.branch}",
        "python setup.py install",
        f"python -m utilities.{args.script_name} {' '.join(args.script_args)}",
    ))

    aegea_command = [
        "aegea",
        "batch",
        "submit",
        "--queue",
        args.queue,
        "--vcpus",
        str(args.vcpus),
        "--memory",
        str(args.memory),
        "--ecr-image",
        args.ecr_image,
    ]

    if args.storage:
        aegea_command.extend(["--storage", f"/mnt={args.storage}"])

    if args.ulimits:
        aegea_command.extend(["--ulimits", " ".join(args.ulimits)])

    if args.environment:
        aegea_command.extend(["--environment", " ".join(args.environment)])

    aegea_command.extend(["--command", f"'{job_command}'"])

    logger.info(f"executing command:\n\t{' '.join(aegea_command)}")
    if not args.dryrun:
        output = subprocess.check_output(" ".join(aegea_command), shell=True)
        try:
            output = json.loads(output)["jobId"]
            logger.info(f"Launched job with jobId: {output}")
        except json.decoder.JSONDecodeError:
            job_id_m = re.search(r'"jobId": "([\w\-]{36})"', output.decode())
            if job_id_m:
                logger.info(f"Launched job with jobId: {job_id_m.group(1)}")
            else:
                logger.info(output)
Ejemplo n.º 5
0
    # Move reports data back to S3
    reports_path = subprocess.check_output(
        "ls -d {}".format(
            os.path.join(output_path, "Reports", "html", "*", "all", "all",
                         "all")),
        shell=True,
    ).rstrip()
    command = [
        "aws",
        "s3",
        "cp",
        "--quiet",
        reports_path.decode(),
        os.path.join(args.s3_report_dir, args.exp_id),
        "--recursive",
    ]
    for i in range(S3_RETRY):
        if not log_command(logger, command, shell=True):
            break
        logger.info("retrying cp reports")
    else:
        raise RuntimeError("couldn't cp reports")

    p.kill()


if __name__ == "__main__":
    mainlogger, log_file, file_handler = get_logger(__name__, debug=True)
    main(mainlogger)