def get_arguments(): """Create the command line interface and return the command line arguments Returns ------- Namespace The command line arguments """ parser = argparse.ArgumentParser( formatter_class=argparse.RawTextHelpFormatter, description="Example Command:\n" "--json-file=data/run1.B1phi1.i1.seqwish.w100.json --cells-per-file=5000 --fasta=data/run1.B1phi1.i1.seqwish.fasta" ) parser.add_argument('-j', '--json-file', dest='json_file', required=True, help='input JSON file') parser.add_argument( '-f', '--fasta', dest='fasta', help= 'Optional: Fasta file containing the pangenome sequence generated by ' 'odgi for this Graph.') parser.add_argument('-o', '--out-folder', dest='output_folder', help='output folder') parser.add_argument( '-c', '--cells-per-file', dest='cells_per_file', default=5000, type=int, help='Tip: Adjust this number to get chunk files output close to 2MB. ' 'Number of cells per file (#bins per file = #cells / #paths)') parser.add_argument( '-l', '--log-level', default='DEBUG', choices=('DEBUG', 'INFO', 'WARNING', 'ERROR'), help='level of logging verbosity. DEBUG is most verbose') args = parser.parse_args() if not args.output_folder: # directory with the same name as the json args.output_folder = osPath(args.json_file).parent.joinpath( osPath(args.json_file).stem) else: args.output_folder = osPath(args.output_folder) os.makedirs(args.output_folder, exist_ok=True) return args
def write_json_files(json_file, schematic: PangenomeSchematic): partitions, bin2file_mapping = schematic.split(args.cells_per_file) folder = osPath(json_file).parent if args.output_folder: folder = osPath(args.output_folder) os.makedirs(folder, exist_ok=True) # make directory for all files for part in partitions: p = folder.joinpath(part.filename) with p.open('w') as fpgh9: fpgh9.write(part.json_dump()) print("Saved results to", p) schematic.write_index_file(folder, bin2file_mapping)
def setup_logging(): """Setup the logging, add a log file""" log_name = osPath(args.json_file).with_suffix('.log') if args.output_folder: log_name = osPath(args.output_folder).joinpath('log') os.makedirs(args.output_folder, exist_ok=True) t = datetime.now() timestr = f"{t.year}{t.month:02}{t.day:02}-{t.hour:02}:{t.minute:02}:{t.second:02}" log_name = str(log_name) + '.' + timestr handler = logging.FileHandler(log_name) handler.setLevel(args.log_level) handler.setFormatter( logging.Formatter(matrixcomponent.LOGGING_FORMAT_STR, datefmt=matrixcomponent.LOGGING_DATE_FORMAT)) logging.getLogger().addHandler(handler)
def main(): global args args = get_arguments() setup_logging() if args.parallel_cores > 0: chunk_size = args.parallel_cores else: chunk_size = os.cpu_count() parallel = Parallel(n_jobs=chunk_size, prefer="processes") if args.json_file.endswith("*"): files = glob(args.json_file + '.json') print("===Input Files Found===\n", '\n'.join(files)) else: files = [args.json_file] for json_file in files: LOGGER.info(f'reading {osPath(json_file)}...\n') paths, pangenome_length, bin_width = JSONparser.parse( json_file, chunk_size * 2, parallel) # give 2x jobs to do schematic = segment_matrix(paths, bin_width, args.cells_per_file, pangenome_length, args.no_adjacent_links, parallel) # this one spits out json and optionally other output files (fasta, ttl) path_name = str(bin_width) folder_path = osPath(args.output_folder).joinpath( path_name) # full path write_files(folder_path, args.fasta, schematic, args.no_adjacent_links) LOGGER.info("Finished processing the file " + json_file)
def resource_directory(_context, name, directory): if directory: directory = osPath(_context.path(directory)) if not directory.is_dir(): raise ConfigurationError('No such directory', directory) root = getUtility(IApplication, 'root') if name not in root: # prevent circular import from plone.server.content import StaticDirectory root[name] = StaticDirectory(directory)
def resource_directory(_context, name, directory): if directory: directory = osPath(_context.path(directory)) if not directory.is_dir(): raise ConfigurationError('No such directory', directory) root = getUtility(IApplication, 'root') if name not in root: # prevent circular import from plone.server.content import StaticDirectory root[name] = StaticDirectory(directory)
def get_arguments(): """Create the command line interface and return the command line arguments Returns ------- Namespace The command line arguments """ parser = argparse.ArgumentParser( formatter_class=argparse.RawTextHelpFormatter, description="Example Command:\n" "--json-file=data/run1.B1phi1.i1.seqwish.w100.json --cells-per-file=5000 --fasta=data/run1.B1phi1.i1.seqwish.fasta" ) parser.add_argument('-j', '--json-file', dest='json_file', required=True, help='input JSON file') parser.add_argument( '-f', '--fasta', dest='fasta', help= 'Optional: Fasta file containing the pangenome sequence generated by ' 'odgi for this Graph.') parser.add_argument('-o', '--out-folder', dest='output_folder', help='output folder') parser.add_argument( '-c', '--cells-per-file', dest='cells_per_file', default=5000, type=int, help='Tip: Adjust this number to get chunk files output close to 2MB. ' 'Number of cells per file (#bins per file = #cells / #paths)') parser.add_argument( '-l', '--log-level', default='DEBUG', choices=('DEBUG', 'INFO', 'WARNING', 'ERROR'), help='level of logging verbosity. DEBUG is most verbose') parser.add_argument( '-p', '--parallel-cores', dest='parallel_cores', default=os.cpu_count(), type=int, help='Tip: do not set this one to more than available CPU cores)') parser.add_argument( '-nal', '--no-adjacent-links', dest='no_adjacent_links', default=False, action='store_true', help='Switches off the add_adjacent_connector_column() routine)') args = parser.parse_args() # file path logic for single or list of files with wildcard * if not args.output_folder: if args.json_file.endswith("*"): # directory is user provided prefix args.output_folder = args.json_file[:-1] elif args.json_file.endswith(".json"): # single json file args.output_folder = osPath(args.json_file).parent.joinpath( osPath(args.json_file).stem) else: print( "Please provide an --out-folder or end --json-file= prefix with a *", file=sys.stderr) exit(1) else: args.output_folder = osPath(args.output_folder) os.makedirs(args.output_folder, exist_ok=True) if args.parallel_cores <= 0: args.parallel_cores = os.cpu_count() return args