Ejemplo n.º 1
0
    def add_decompressed_fastq(self, sample_obj: models.Sample) -> bool:
        """Adds unpacked FASTQ files to housekeeper"""
        LOG.info("Adds FASTQ to Housekeeper for %s", sample_obj.internal_id)
        version_obj = self.get_latest_version(sample_obj.internal_id)
        if not version_obj:
            LOG.warning("Could not find version obj for %s", sample_obj.internal_id)
            return False

        spring_paths: List[CompressionData] = files.get_spring_paths(version_obj)
        if not spring_paths:
            LOG.warning("Could not find any spring paths for %s", sample_obj.internal_id)
        for compression_object in spring_paths:
            if not self.crunchy_api.is_spring_decompression_done(compression_object):
                LOG.info("SPRING to FASTQ decompression not finished %s", sample_obj.internal_id)
                return False

            fastq_first = compression_object.fastq_first
            fastq_second = compression_object.fastq_second
            if files.is_file_in_version(
                version_obj=version_obj, path=fastq_first
            ) or files.is_file_in_version(version_obj=version_obj, path=fastq_second):
                LOG.warning("FASTQ files already exists in Housekeeper")
                continue

            LOG.info(
                "Adding decompressed FASTQ files to Housekeeper for sample %s ",
                sample_obj.internal_id,
            )

            self.add_fastq_hk(
                sample_obj=sample_obj, fastq_first=fastq_first, fastq_second=fastq_second
            )

        return True
Ejemplo n.º 2
0
    def decompress_spring(self, sample_id: str) -> bool:
        """Decompress SPRING archive for a sample

        This function will make sure that everything is ready for decompression from SPRING archive
        to FASTQ files.

            - Housekeeper will be updated to include FASTQ files
            - Housekeeper will still have the SPRING and SPRING metadata file
            - The SPRING metadata file will be updated to include date for decompression
        """
        version_obj = self.get_latest_version(sample_id)
        if not version_obj:
            return False

        compression_objs = files.get_spring_paths(version_obj)
        for compression_obj in compression_objs:
            if not self.crunchy_api.is_spring_decompression_possible(compression_obj):
                LOG.info("SPRING to FASTQ decompression not possible for %s", sample_id)
                return False

            LOG.info(
                "Decompressing %s to FASTQ format for sample %s ",
                compression_obj.spring_path,
                sample_id,
            )

            self.crunchy_api.spring_to_fastq(compression_obj, sample_id=sample_id)
            update_metadata_date(compression_obj.spring_metadata_path)

        return True
Ejemplo n.º 3
0
 def check_fastq_links(self, case_id: str) -> None:
     """Check if all fastq files are linked in housekeeper"""
     case_obj: models.Family = self.store.family(case_id)
     for link in case_obj.links:
         sample_id = link.sample.internal_id
         version_obj: hk_models.Version = self.compress_api.get_latest_version(
             sample_id)
         fastq_files: Dict[Path, hk_models.File] = files.get_hk_files_dict(
             tags=["fastq"], version_obj=version_obj)
         compression_objs: List[CompressionData] = files.get_spring_paths(
             version_obj)
         for compression_obj in compression_objs:
             result = True
             if compression_obj.fastq_first not in fastq_files:
                 LOG.info("Adding %s to sample %s in housekeeper" %
                          (compression_obj.fastq_first, sample_id))
                 result: bool = self.compress_api.add_decompressed_fastq(
                     sample_obj=link.sample)
             else:
                 LOG.info("%s from sample %s is already in housekeeper" %
                          (compression_obj.fastq_first, sample_id))
             if compression_obj.fastq_second not in fastq_files:
                 LOG.info("Adding %s to sample %s in housekeeper" %
                          (compression_obj.fastq_first, sample_id))
                 result: bool = self.compress_api.add_decompressed_fastq(
                     sample_obj=link.sample)
             else:
                 LOG.info("%s from sample %s is already in housekeeper" %
                          (compression_obj.fastq_first, sample_id))
             if not result:
                 LOG.warning("Files where not added to fastq!")
Ejemplo n.º 4
0
 def get_compression_objects(self, case_id: str) -> List[CompressionData]:
     """Return a list of compression objects"""
     case_obj: models.Family = self.store.family(case_id)
     compression_objects = []
     for link in case_obj.links:
         sample_id = link.sample.internal_id
         version_obj = self.compress_api.get_latest_version(sample_id)
         compression_objects.extend(files.get_spring_paths(version_obj))
     return compression_objects
Ejemplo n.º 5
0
def correct_spring_paths(hk_api: HousekeeperAPI,
                         bundle_name: str = None,
                         dry_run: bool = False) -> None:
    """Function that will be used as a one off thing

    There has been a problem when there are symlinked fastq files that are sent for compression.
    In these cases the spring archive has been created in the same place as that the symlinks are
    pointing to. This function will find those cases and move the spring archives to the correct
    place as specified in housekeeper.
    """
    versions = get_versions(hk_api=hk_api, bundle_name=bundle_name)
    for version_obj in versions:
        spring_paths = get_spring_paths(version_obj)
        i = 0
        for i, compression_obj in enumerate(spring_paths, 1):
            # We are interested in fixing the cases where spring paths are in wrong location
            spring_path = compression_obj.spring_path
            if spring_path.exists():
                continue

            spring_config_path = compression_obj.spring_metadata_path
            # true_dir is where the spring paths actually exists
            true_dir = get_true_dir(spring_path.parent)
            if not true_dir:
                LOG.info("Could not find location of spring files")
                continue

            true_spring_path = true_dir / spring_path.name
            true_spring_config_path = true_spring_path.with_suffix(
                "").with_suffix(".json")
            if not (true_spring_path.exists()
                    and true_spring_config_path.exists()):
                LOG.info(
                    "Could not find spring and/or spring metadata files, skipping"
                )
                continue
            LOG.info(
                "Moving existing spring file (and config) %s to hk bundle path %s",
                true_spring_path,
                spring_path,
            )
            if not dry_run:
                # We know from above that the spring path does not exist
                true_spring_path.replace(spring_path)
                true_spring_config_path.replace(spring_config_path)
        if i == 0:
            LOG.debug("Could not find any spring files")