def main(clargs): # validate and/or create directories if not os.path.isdir(clargs.fastq_directory): error.fail("The given fastq directory does not exist.") if not os.path.isdir(clargs.output_directory): os.makedirs(clargs.output_directory) readmap.main(clargs)
def main(clargs): """ Stores details of the experiment that are needed for all analyses. This accomplishes two things: first, it reduces the number of arguments that have to be specified in further commands, and also acts as a way of documenting the experiment. """ channels = initialize.determine_channel_names(clargs.image_directory) alignment_channel = channels[0] if len(channels) == 1 else clargs.alignment_channel if len(channels) > 1 and not clargs.alignment_channel: alignment_channel = initialize.request_alignment_channel(channels) if clargs.alignment_channel and clargs.alignment_channel not in channels: error.fail("The given alignment channel ('%s') does not exist in the image data. Available channels: %s" % (clargs.alignment_channel, ", ".join(channels))) log.debug("Initializing with alignment channel: %s" % alignment_channel) initialize.save_metadata(clargs, alignment_channel)
def load_metadata(image_directory): filename = get_existing_metadata_filename(image_directory) #Fletcher print str(filename) try: with open(filename) as fh: return yaml.load(fh) except IOError: fail("The image directory you provided (%s) has not been initialized. We need you to provide metadata with" "the 'champ init' command first." % str(image_directory)) except: fail("Something is wrong with the metadata file in the image directory. Try rerunning 'champ init'.", 2)
def get_end_tiles(cluster_strategies, rotation_adjustment, h5_filenames, alignment_channel, snr, metadata, sequencing_chip, fia): right_end_tiles = {} left_end_tiles = {} for cluster_strategy in cluster_strategies: with h5py.File(h5_filenames[0]) as first_file: grid = GridImages(first_file, alignment_channel) # no reason to use all cores yet, since we're IO bound? num_processes = len(h5_filenames) pool = multiprocessing.Pool(num_processes) log.info("Checking Columns") base_column_checker = functools.partial( check_column_for_alignment, cluster_strategy, rotation_adjustment, alignment_channel, snr, sequencing_chip, metadata['microns_per_pixel'], fia) # start = time.time() #FLETCHER log.info("Left Tiles: %s Right Tiles: %s" % (sequencing_chip.left_side_tiles, sequencing_chip.right_side_tiles)) log.info( "------------------Searching Left End Tile-------------------") left_end_tiles = dict( find_bounds(pool, h5_filenames, base_column_checker, grid.columns, sequencing_chip.left_side_tiles, sequencing_chip.cluster_size)) log.info("Left End Tiles: %s " % left_end_tiles) # re_time = time.time() log.info( "******************Searching Right End Tile******************") right_end_tiles = dict( find_bounds(pool, h5_filenames, base_column_checker, reversed(grid.columns), sequencing_chip.miseq_tiles, sequencing_chip.cluster_size)) log.info("Right End Tiles: %s" % right_end_tiles) pool.close() pool.join() if left_end_tiles and right_end_tiles: break if not left_end_tiles and not right_end_tiles: error.fail( "End tiles could not be found! Try adjusting the rotation or look at the raw images." ) log.info("Left End Tiles: %s and Right End Tiles: %s" % left_end_tiles, right_end_tiles) default_left_tile, default_left_column, default_left_correlation = decide_default_tiles_and_columns( left_end_tiles) default_right_tile, default_right_column, default_right_correlation = decide_default_tiles_and_columns( right_end_tiles) end_tiles = build_end_tiles(h5_filenames, sequencing_chip, left_end_tiles, default_left_tile, default_left_correlation, right_end_tiles, default_right_tile, default_left_column, default_right_column, default_right_correlation) return end_tiles
def get_end_tiles(cluster_strategies, rotation_adjustment, h5_filenames, alignment_channel, snr, metadata, sequencing_chip, fia, floor_alignment): #print "champ_align, floor = ", floor_alignment right_end_tiles = {} left_end_tiles = {} for cluster_strategy in cluster_strategies: with h5py.File(h5_filenames[0]) as first_file: grid = GridImages(first_file, alignment_channel) # no reason to use all cores yet, since we're IO bound? num_processes = len(h5_filenames) pool = multiprocessing.Pool(num_processes) if floor_alignment: floor = 1 else: floor = 0 #print "fia = ", fia base_column_checker = functools.partial( check_column_for_alignment, cluster_strategy, rotation_adjustment, alignment_channel, snr, sequencing_chip, metadata['microns_per_pixel'], fia, floor) # print "pass_base_column", floor_alignment, type(floor_alignment) left_end_tiles = dict( find_bounds(pool, h5_filenames, base_column_checker, grid.columns, sequencing_chip.left_side_tiles)) right_end_tiles = dict( find_bounds(pool, h5_filenames, base_column_checker, reversed(grid.columns), sequencing_chip.right_side_tiles)) # print "pass left and right" pool.close() pool.join() if left_end_tiles and right_end_tiles: break if not left_end_tiles and not right_end_tiles: error.fail( "End tiles could not be found! Try adjusting the rotation or look at the raw images." ) default_left_tile, default_left_column = decide_default_tiles_and_columns( left_end_tiles) default_right_tile, default_right_column = decide_default_tiles_and_columns( right_end_tiles) end_tiles = build_end_tiles(h5_filenames, sequencing_chip, left_end_tiles, default_left_tile, right_end_tiles, default_right_tile, default_left_column, default_right_column) return end_tiles
def determine_process_files(layers, date, chipID, parent_dir): if layers == 'total': dirs = [ os.path.join(parent_dir, name) for name in os.listdir(parent_dir) if os.path.isdir(name) and chipID in name.split('_') and date in name.split('_') and not name.endswith('histogram') ] else: dirs = [ os.path.join(parent_dir, name) for name in os.listdir(parent_dir) if os.path.isdir(name) and layers in name.split('_') and chipID in name.split('_') and date in name.split('_') ] if len(dirs) == 0: fail("There is no folders to analyze!") for items in dirs: candidates = [ folders for folders in os.listdir(os.path.join(parent_dir, items)) ] if 'C55_images' not in candidates: fail("Could not find C55_images folder in {}!".format( os.path.join(parent_dir, items))) return dirs
def main(clargs): log.debug("Checking data analysis files.") PhiX_intensity_stats = [] if clargs.experimental_date is None: error.fail("Please specify the experimental date you want to analyze!") if clargs.alignment_layer is None: error.fail("Please select a layer you want to align, or both layers!") process_files = ncb.determine_process_files(clargs.alignment_layer, clargs.experimental_date, clargs.chip_id, clargs.parent_directory) if clargs.alignment_layer == 'total': read_names_files = [ os.path.join( clargs.read_names_directory, 'read_names_of_all_seq_{}_ceiling.txt'.format(clargs.chip_id)), os.path.join( clargs.read_names_directory, 'read_names_of_all_seq_{}_floor.txt'.format(clargs.chip_id)) ] else: read_names_files = [ os.path.join( clargs.read_names_directory, 'read_names_of_all_seq_{}_{}.txt'.format( clargs.chip_id, clargs.alignment_layer)) ] for items in read_names_files: if not os.path.exists(items): error.fail( "Please link the read_names_files with sequences first!") ncb.result_seq(process_files, read_names_files) #ncb.check_seq_results(process_files) image.division(process_files, clargs.flipud, clargs.fliplr, clargs.rotation_adjustment) image.uneven_correction(process_files, clargs.chip_id, clargs.kernel_path, clargs.lib_seq_len) ncb.intensity_seq_link(process_files, PhiX_intensity_stats) ncb.PhiX_background_eval(process_files, PhiX_intensity_stats) if clargs.analysis: seq = lib_seq_analysis.anal(process_files, clargs.parent_directory, clargs.chip_id, clargs.experimental_date, clargs.iteration, clargs.alignment_layer) if clargs.histogram: lib_seq_analysis.make_histogram(seq, clargs.experimental_date, clargs.parent_directory, clargs.alignment_layer, clargs.chip_id)
def main(clargs): metadata = initialize.load_metadata(clargs.image_directory) cache = initialize.load_cache(clargs.image_directory) if not cache['preprocessed']: preprocess(clargs.image_directory, cache) h5_filenames = load_filenames(clargs.image_directory) if len(h5_filenames) == 0: error.fail( "There were no HDF5 files to process. You must have deleted or moved them after preprocessing them." ) path_info = PathInfo(clargs.image_directory, metadata['mapped_reads'], metadata['perfect_target_name'], metadata['alternate_fiducial_reads'], metadata['alternate_perfect_target_reads_filename'], metadata['alternate_good_target_reads_filename']) # Ensure we have the directories where output will be written align.make_output_directories(h5_filenames, path_info) log.debug("Loading tile data.") sequencing_chip = chip.load(metadata['chip_type'])( metadata['ports_on_right']) alignment_tile_data = align.load_read_names( path_info.aligning_read_names_filepath) perfect_tile_data = align.load_read_names(path_info.perfect_read_names) on_target_tile_data = align.load_read_names(path_info.on_target_read_names) all_tile_data = align.load_read_names(path_info.all_read_names_filepath) log.debug("Tile data loaded.") # We use one process per concentration. We could theoretically speed this up since our machine # has significantly more cores than the typical number of concentration points, but since it # usually finds a result in the first image or two, it's not going to deliver any practical benefits log.debug("Loading FastQImageAligner") fia = fastqimagealigner.FastqImageAligner(clargs.microns_per_pixel) fia.load_reads(alignment_tile_data) log.debug("Loaded %s points" % sum([len(v) for v in alignment_tile_data.values()])) log.debug("FastQImageAligner loaded.") if 'end_tiles' not in cache: end_tiles = align.get_end_tiles(cluster_strategies, clargs.rotation_adjustment, h5_filenames, metadata['alignment_channel'], clargs.snr, metadata, sequencing_chip, fia) cache['end_tiles'] = end_tiles initialize.save_cache(clargs.image_directory, cache) else: log.debug("End tiles already calculated.") end_tiles = cache['end_tiles'] gc.collect() if not cache['phix_aligned']: for cluster_strategy in cluster_strategies: align.run(cluster_strategy, clargs.rotation_adjustment, h5_filenames, path_info, clargs.snr, clargs.min_hits, fia, end_tiles, metadata['alignment_channel'], all_tile_data, metadata, clargs.make_pdfs, sequencing_chip, clargs.process_limit) cache['phix_aligned'] = True initialize.save_cache(clargs.image_directory, cache) else: log.debug("Phix already aligned.") if clargs.fiducial_only: # the user doesn't want us to align the protein channels exit(0) gc.collect() protein_channels = [ channel for channel in projectinfo.load_channels(clargs.image_directory) if channel != metadata['alignment_channel'] ] if protein_channels: log.debug("Protein channels found: %s" % ", ".join(protein_channels)) else: # protein is in phix channel, hopefully? log.warn( "No protein channels detected. Assuming protein is in phiX channel: %s" % [metadata['alignment_channel']]) protein_channels = [metadata['alignment_channel']] for channel_name in protein_channels: # Attempt to precision align protein channels using the phix channel alignment as a starting point. # Not all experiments have "on target" or "perfect target" reads - that only applies to CRISPR systems # (at the time of this writing anyway) for cluster_strategy in cluster_strategies: gc.collect() if on_target_tile_data: channel_combo = channel_name + "_on_target" combo_align(cluster_strategy, h5_filenames, channel_combo, channel_name, path_info, on_target_tile_data, all_tile_data, metadata, cache, clargs) gc.collect() if perfect_tile_data: channel_combo = channel_name + "_perfect_target" combo_align(cluster_strategy, h5_filenames, channel_combo, channel_name, path_info, perfect_tile_data, all_tile_data, metadata, cache, clargs) gc.collect()