def get_arguments():
    """Create the command line interface and return the command line arguments

    Returns
    -------
    Namespace
        The command line arguments

    """
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawTextHelpFormatter,
        description="Example Command:\n"
        "--json-file=data/run1.B1phi1.i1.seqwish.w100.json --cells-per-file=5000 --fasta=data/run1.B1phi1.i1.seqwish.fasta"
    )

    parser.add_argument('-j',
                        '--json-file',
                        dest='json_file',
                        required=True,
                        help='input JSON file')

    parser.add_argument(
        '-f',
        '--fasta',
        dest='fasta',
        help=
        'Optional: Fasta file containing the pangenome sequence generated by '
        'odgi for this Graph.')

    parser.add_argument('-o',
                        '--out-folder',
                        dest='output_folder',
                        help='output folder')

    parser.add_argument(
        '-c',
        '--cells-per-file',
        dest='cells_per_file',
        default=5000,
        type=int,
        help='Tip: Adjust this number to get chunk files output close to 2MB. '
        'Number of cells per file (#bins per file = #cells / #paths)')

    parser.add_argument(
        '-l',
        '--log-level',
        default='DEBUG',
        choices=('DEBUG', 'INFO', 'WARNING', 'ERROR'),
        help='level of logging verbosity. DEBUG is most verbose')

    args = parser.parse_args()
    if not args.output_folder:
        # directory with the same name as the json
        args.output_folder = osPath(args.json_file).parent.joinpath(
            osPath(args.json_file).stem)
    else:
        args.output_folder = osPath(args.output_folder)
    os.makedirs(args.output_folder, exist_ok=True)

    return args
def write_json_files(json_file, schematic: PangenomeSchematic):
    partitions, bin2file_mapping = schematic.split(args.cells_per_file)
    folder = osPath(json_file).parent
    if args.output_folder:
        folder = osPath(args.output_folder)
    os.makedirs(folder, exist_ok=True)  # make directory for all files
    for part in partitions:
        p = folder.joinpath(part.filename)
        with p.open('w') as fpgh9:
            fpgh9.write(part.json_dump())
        print("Saved results to", p)

    schematic.write_index_file(folder, bin2file_mapping)
def setup_logging():
    """Setup the logging, add a log file"""
    log_name = osPath(args.json_file).with_suffix('.log')
    if args.output_folder:
        log_name = osPath(args.output_folder).joinpath('log')
        os.makedirs(args.output_folder, exist_ok=True)
    t = datetime.now()
    timestr = f"{t.year}{t.month:02}{t.day:02}-{t.hour:02}:{t.minute:02}:{t.second:02}"
    log_name = str(log_name) + '.' + timestr

    handler = logging.FileHandler(log_name)
    handler.setLevel(args.log_level)
    handler.setFormatter(
        logging.Formatter(matrixcomponent.LOGGING_FORMAT_STR,
                          datefmt=matrixcomponent.LOGGING_DATE_FORMAT))
    logging.getLogger().addHandler(handler)
def main():
    global args
    args = get_arguments()
    setup_logging()

    if args.parallel_cores > 0:
        chunk_size = args.parallel_cores
    else:
        chunk_size = os.cpu_count()

    parallel = Parallel(n_jobs=chunk_size, prefer="processes")

    if args.json_file.endswith("*"):
        files = glob(args.json_file + '.json')
        print("===Input Files Found===\n", '\n'.join(files))
    else:
        files = [args.json_file]

    for json_file in files:
        LOGGER.info(f'reading {osPath(json_file)}...\n')
        paths, pangenome_length, bin_width = JSONparser.parse(
            json_file, chunk_size * 2, parallel)  # give 2x jobs to do
        schematic = segment_matrix(paths, bin_width, args.cells_per_file,
                                   pangenome_length, args.no_adjacent_links,
                                   parallel)

        # this one spits out json and optionally other output files (fasta, ttl)
        path_name = str(bin_width)
        folder_path = osPath(args.output_folder).joinpath(
            path_name)  # full path
        write_files(folder_path, args.fasta, schematic, args.no_adjacent_links)

        LOGGER.info("Finished processing the file " + json_file)
Example #5
0
def resource_directory(_context, name, directory):
    if directory:
        directory = osPath(_context.path(directory))
        if not directory.is_dir():
            raise ConfigurationError('No such directory', directory)
    root = getUtility(IApplication, 'root')
    if name not in root:
        # prevent circular import
        from plone.server.content import StaticDirectory
        root[name] = StaticDirectory(directory)
Example #6
0
def resource_directory(_context, name, directory):
    if directory:
        directory = osPath(_context.path(directory))
        if not directory.is_dir():
            raise ConfigurationError('No such directory', directory)
    root = getUtility(IApplication, 'root')
    if name not in root:
        # prevent circular import
        from plone.server.content import StaticDirectory
        root[name] = StaticDirectory(directory)
def get_arguments():
    """Create the command line interface and return the command line arguments

    Returns
    -------
    Namespace
        The command line arguments

    """
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawTextHelpFormatter,
        description="Example Command:\n"
        "--json-file=data/run1.B1phi1.i1.seqwish.w100.json --cells-per-file=5000 --fasta=data/run1.B1phi1.i1.seqwish.fasta"
    )

    parser.add_argument('-j',
                        '--json-file',
                        dest='json_file',
                        required=True,
                        help='input JSON file')

    parser.add_argument(
        '-f',
        '--fasta',
        dest='fasta',
        help=
        'Optional: Fasta file containing the pangenome sequence generated by '
        'odgi for this Graph.')

    parser.add_argument('-o',
                        '--out-folder',
                        dest='output_folder',
                        help='output folder')

    parser.add_argument(
        '-c',
        '--cells-per-file',
        dest='cells_per_file',
        default=5000,
        type=int,
        help='Tip: Adjust this number to get chunk files output close to 2MB. '
        'Number of cells per file (#bins per file = #cells / #paths)')

    parser.add_argument(
        '-l',
        '--log-level',
        default='DEBUG',
        choices=('DEBUG', 'INFO', 'WARNING', 'ERROR'),
        help='level of logging verbosity. DEBUG is most verbose')

    parser.add_argument(
        '-p',
        '--parallel-cores',
        dest='parallel_cores',
        default=os.cpu_count(),
        type=int,
        help='Tip: do not set this one to more than available CPU cores)')

    parser.add_argument(
        '-nal',
        '--no-adjacent-links',
        dest='no_adjacent_links',
        default=False,
        action='store_true',
        help='Switches off the add_adjacent_connector_column() routine)')

    args = parser.parse_args()

    # file path logic for single or list of files with wildcard *
    if not args.output_folder:
        if args.json_file.endswith("*"):  # directory is user provided prefix
            args.output_folder = args.json_file[:-1]
        elif args.json_file.endswith(".json"):  # single json file
            args.output_folder = osPath(args.json_file).parent.joinpath(
                osPath(args.json_file).stem)
        else:
            print(
                "Please provide an --out-folder or end --json-file= prefix with a *",
                file=sys.stderr)
            exit(1)
    else:
        args.output_folder = osPath(args.output_folder)
    os.makedirs(args.output_folder, exist_ok=True)

    if args.parallel_cores <= 0:
        args.parallel_cores = os.cpu_count()

    return args