Example #1
0
def main(clargs):
    # validate and/or create directories
    if not os.path.isdir(clargs.fastq_directory):
        error.fail("The given fastq directory does not exist.")
    if not os.path.isdir(clargs.output_directory):
        os.makedirs(clargs.output_directory)

    readmap.main(clargs)
def main(clargs):
    """ Stores details of the experiment that are needed for all analyses. This accomplishes two things: first, it
        reduces the number of arguments that have to be specified in further commands, and also acts as a way of
        documenting the experiment. """
    channels = initialize.determine_channel_names(clargs.image_directory)
    alignment_channel = channels[0] if len(channels) == 1 else clargs.alignment_channel
    if len(channels) > 1 and not clargs.alignment_channel:
        alignment_channel = initialize.request_alignment_channel(channels)
    if clargs.alignment_channel and clargs.alignment_channel not in channels:
        error.fail("The given alignment channel ('%s') does not exist in the image data. Available channels: %s" % (clargs.alignment_channel, ", ".join(channels)))
    log.debug("Initializing with alignment channel: %s" % alignment_channel)
    initialize.save_metadata(clargs, alignment_channel)
Example #3
0
def load_metadata(image_directory):
    filename = get_existing_metadata_filename(image_directory)
#Fletcher
    print str(filename)
    try:
        with open(filename) as fh:
            return yaml.load(fh)
    except IOError:
        fail("The image directory you provided (%s) has not been initialized. We need you to provide metadata with"
             "the 'champ init' command first." % str(image_directory))
    except:
        fail("Something is wrong with the metadata file in the image directory. Try rerunning 'champ init'.", 2)
Example #4
0
def get_end_tiles(cluster_strategies, rotation_adjustment, h5_filenames,
                  alignment_channel, snr, metadata, sequencing_chip, fia):
    right_end_tiles = {}
    left_end_tiles = {}
    for cluster_strategy in cluster_strategies:
        with h5py.File(h5_filenames[0]) as first_file:
            grid = GridImages(first_file, alignment_channel)
            # no reason to use all cores yet, since we're IO bound?
            num_processes = len(h5_filenames)
            pool = multiprocessing.Pool(num_processes)
            log.info("Checking Columns")
            base_column_checker = functools.partial(
                check_column_for_alignment, cluster_strategy,
                rotation_adjustment, alignment_channel, snr, sequencing_chip,
                metadata['microns_per_pixel'], fia)
            #            start = time.time()
            #FLETCHER            log.info("Left Tiles: %s Right Tiles: %s" % (sequencing_chip.left_side_tiles, sequencing_chip.right_side_tiles))
            log.info(
                "------------------Searching Left End Tile-------------------")
            left_end_tiles = dict(
                find_bounds(pool, h5_filenames, base_column_checker,
                            grid.columns, sequencing_chip.left_side_tiles,
                            sequencing_chip.cluster_size))
            log.info("Left End Tiles: %s " % left_end_tiles)
            #            re_time = time.time()
            log.info(
                "******************Searching Right End Tile******************")
            right_end_tiles = dict(
                find_bounds(pool, h5_filenames, base_column_checker,
                            reversed(grid.columns),
                            sequencing_chip.miseq_tiles,
                            sequencing_chip.cluster_size))
            log.info("Right End Tiles:  %s" % right_end_tiles)
            pool.close()
            pool.join()
            if left_end_tiles and right_end_tiles:
                break
    if not left_end_tiles and not right_end_tiles:
        error.fail(
            "End tiles could not be found! Try adjusting the rotation or look at the raw images."
        )
        log.info("Left End Tiles: %s and Right End Tiles: %s" % left_end_tiles,
                 right_end_tiles)
    default_left_tile, default_left_column, default_left_correlation = decide_default_tiles_and_columns(
        left_end_tiles)
    default_right_tile, default_right_column, default_right_correlation = decide_default_tiles_and_columns(
        right_end_tiles)
    end_tiles = build_end_tiles(h5_filenames, sequencing_chip, left_end_tiles,
                                default_left_tile, default_left_correlation,
                                right_end_tiles, default_right_tile,
                                default_left_column, default_right_column,
                                default_right_correlation)
    return end_tiles
Example #5
0
def get_end_tiles(cluster_strategies, rotation_adjustment, h5_filenames,
                  alignment_channel, snr, metadata, sequencing_chip, fia,
                  floor_alignment):
    #print "champ_align, floor = ", floor_alignment
    right_end_tiles = {}
    left_end_tiles = {}
    for cluster_strategy in cluster_strategies:
        with h5py.File(h5_filenames[0]) as first_file:
            grid = GridImages(first_file, alignment_channel)
            # no reason to use all cores yet, since we're IO bound?
            num_processes = len(h5_filenames)
            pool = multiprocessing.Pool(num_processes)
            if floor_alignment:
                floor = 1
            else:
                floor = 0
            #print "fia = ", fia
            base_column_checker = functools.partial(
                check_column_for_alignment, cluster_strategy,
                rotation_adjustment, alignment_channel, snr, sequencing_chip,
                metadata['microns_per_pixel'], fia, floor)
            # print "pass_base_column", floor_alignment, type(floor_alignment)
            left_end_tiles = dict(
                find_bounds(pool, h5_filenames, base_column_checker,
                            grid.columns, sequencing_chip.left_side_tiles))
            right_end_tiles = dict(
                find_bounds(pool, h5_filenames, base_column_checker,
                            reversed(grid.columns),
                            sequencing_chip.right_side_tiles))
            # print "pass left and right"
            pool.close()
            pool.join()
            if left_end_tiles and right_end_tiles:
                break
    if not left_end_tiles and not right_end_tiles:
        error.fail(
            "End tiles could not be found! Try adjusting the rotation or look at the raw images."
        )
    default_left_tile, default_left_column = decide_default_tiles_and_columns(
        left_end_tiles)
    default_right_tile, default_right_column = decide_default_tiles_and_columns(
        right_end_tiles)
    end_tiles = build_end_tiles(h5_filenames, sequencing_chip, left_end_tiles,
                                default_left_tile, right_end_tiles,
                                default_right_tile, default_left_column,
                                default_right_column)
    return end_tiles
Example #6
0
def determine_process_files(layers, date, chipID, parent_dir):
    if layers == 'total':
        dirs = [
            os.path.join(parent_dir, name) for name in os.listdir(parent_dir)
            if os.path.isdir(name) and chipID in name.split('_')
            and date in name.split('_') and not name.endswith('histogram')
        ]
    else:
        dirs = [
            os.path.join(parent_dir, name) for name in os.listdir(parent_dir)
            if os.path.isdir(name) and layers in name.split('_')
            and chipID in name.split('_') and date in name.split('_')
        ]
    if len(dirs) == 0:
        fail("There is no folders to analyze!")
    for items in dirs:
        candidates = [
            folders for folders in os.listdir(os.path.join(parent_dir, items))
        ]
        if 'C55_images' not in candidates:
            fail("Could not find C55_images folder in {}!".format(
                os.path.join(parent_dir, items)))
    return dirs
Example #7
0
def main(clargs):

    log.debug("Checking data analysis files.")
    PhiX_intensity_stats = []

    if clargs.experimental_date is None:
        error.fail("Please specify the experimental date you want to analyze!")
    if clargs.alignment_layer is None:
        error.fail("Please select a layer you want to align, or both layers!")
    process_files = ncb.determine_process_files(clargs.alignment_layer,
                                                clargs.experimental_date,
                                                clargs.chip_id,
                                                clargs.parent_directory)
    if clargs.alignment_layer == 'total':
        read_names_files = [
            os.path.join(
                clargs.read_names_directory,
                'read_names_of_all_seq_{}_ceiling.txt'.format(clargs.chip_id)),
            os.path.join(
                clargs.read_names_directory,
                'read_names_of_all_seq_{}_floor.txt'.format(clargs.chip_id))
        ]
    else:
        read_names_files = [
            os.path.join(
                clargs.read_names_directory,
                'read_names_of_all_seq_{}_{}.txt'.format(
                    clargs.chip_id, clargs.alignment_layer))
        ]
    for items in read_names_files:
        if not os.path.exists(items):
            error.fail(
                "Please link the read_names_files with sequences first!")

    ncb.result_seq(process_files, read_names_files)
    #ncb.check_seq_results(process_files)
    image.division(process_files, clargs.flipud, clargs.fliplr,
                   clargs.rotation_adjustment)
    image.uneven_correction(process_files, clargs.chip_id, clargs.kernel_path,
                            clargs.lib_seq_len)
    ncb.intensity_seq_link(process_files, PhiX_intensity_stats)
    ncb.PhiX_background_eval(process_files, PhiX_intensity_stats)
    if clargs.analysis:
        seq = lib_seq_analysis.anal(process_files, clargs.parent_directory,
                                    clargs.chip_id, clargs.experimental_date,
                                    clargs.iteration, clargs.alignment_layer)
    if clargs.histogram:
        lib_seq_analysis.make_histogram(seq, clargs.experimental_date,
                                        clargs.parent_directory,
                                        clargs.alignment_layer, clargs.chip_id)
Example #8
0
def main(clargs):
    metadata = initialize.load_metadata(clargs.image_directory)
    cache = initialize.load_cache(clargs.image_directory)
    if not cache['preprocessed']:
        preprocess(clargs.image_directory, cache)

    h5_filenames = load_filenames(clargs.image_directory)
    if len(h5_filenames) == 0:
        error.fail(
            "There were no HDF5 files to process. You must have deleted or moved them after preprocessing them."
        )

    path_info = PathInfo(clargs.image_directory, metadata['mapped_reads'],
                         metadata['perfect_target_name'],
                         metadata['alternate_fiducial_reads'],
                         metadata['alternate_perfect_target_reads_filename'],
                         metadata['alternate_good_target_reads_filename'])
    # Ensure we have the directories where output will be written
    align.make_output_directories(h5_filenames, path_info)

    log.debug("Loading tile data.")
    sequencing_chip = chip.load(metadata['chip_type'])(
        metadata['ports_on_right'])

    alignment_tile_data = align.load_read_names(
        path_info.aligning_read_names_filepath)
    perfect_tile_data = align.load_read_names(path_info.perfect_read_names)
    on_target_tile_data = align.load_read_names(path_info.on_target_read_names)
    all_tile_data = align.load_read_names(path_info.all_read_names_filepath)
    log.debug("Tile data loaded.")

    # We use one process per concentration. We could theoretically speed this up since our machine
    # has significantly more cores than the typical number of concentration points, but since it
    # usually finds a result in the first image or two, it's not going to deliver any practical benefits
    log.debug("Loading FastQImageAligner")
    fia = fastqimagealigner.FastqImageAligner(clargs.microns_per_pixel)
    fia.load_reads(alignment_tile_data)
    log.debug("Loaded %s points" %
              sum([len(v) for v in alignment_tile_data.values()]))
    log.debug("FastQImageAligner loaded.")

    if 'end_tiles' not in cache:
        end_tiles = align.get_end_tiles(cluster_strategies,
                                        clargs.rotation_adjustment,
                                        h5_filenames,
                                        metadata['alignment_channel'],
                                        clargs.snr, metadata, sequencing_chip,
                                        fia)
        cache['end_tiles'] = end_tiles
        initialize.save_cache(clargs.image_directory, cache)
    else:
        log.debug("End tiles already calculated.")
        end_tiles = cache['end_tiles']
    gc.collect()

    if not cache['phix_aligned']:
        for cluster_strategy in cluster_strategies:
            align.run(cluster_strategy, clargs.rotation_adjustment,
                      h5_filenames, path_info, clargs.snr, clargs.min_hits,
                      fia, end_tiles, metadata['alignment_channel'],
                      all_tile_data, metadata, clargs.make_pdfs,
                      sequencing_chip, clargs.process_limit)
            cache['phix_aligned'] = True
            initialize.save_cache(clargs.image_directory, cache)
        else:
            log.debug("Phix already aligned.")

    if clargs.fiducial_only:
        # the user doesn't want us to align the protein channels
        exit(0)

    gc.collect()
    protein_channels = [
        channel
        for channel in projectinfo.load_channels(clargs.image_directory)
        if channel != metadata['alignment_channel']
    ]
    if protein_channels:
        log.debug("Protein channels found: %s" % ", ".join(protein_channels))
    else:
        # protein is in phix channel, hopefully?
        log.warn(
            "No protein channels detected. Assuming protein is in phiX channel: %s"
            % [metadata['alignment_channel']])
        protein_channels = [metadata['alignment_channel']]

    for channel_name in protein_channels:
        # Attempt to precision align protein channels using the phix channel alignment as a starting point.
        # Not all experiments have "on target" or "perfect target" reads - that only applies to CRISPR systems
        # (at the time of this writing anyway)
        for cluster_strategy in cluster_strategies:
            gc.collect()
            if on_target_tile_data:
                channel_combo = channel_name + "_on_target"
                combo_align(cluster_strategy, h5_filenames, channel_combo,
                            channel_name, path_info, on_target_tile_data,
                            all_tile_data, metadata, cache, clargs)
            gc.collect()
            if perfect_tile_data:
                channel_combo = channel_name + "_perfect_target"
                combo_align(cluster_strategy, h5_filenames, channel_combo,
                            channel_name, path_info, perfect_tile_data,
                            all_tile_data, metadata, cache, clargs)
            gc.collect()