Beispiel #1
0
    def run_is_complete(self):
        """	
		Check everyone except NTC has the CNV logs
		"""

        results_path = Path(self.results_dir)

        for sample in self.sample_names:

            skip_sample = False

            #get the total number of reads in the sample
            hs_metrics_file = results_path.joinpath(sample).glob(
                f'*{sample}*_HsMetrics.txt')

            hs_metrics_file = list(hs_metrics_file)[0]

            parsed_hs_metrics_data = parsers.parse_hs_metrics_file(
                hs_metrics_file)
            total_reads = int(parsed_hs_metrics_data.get('total_reads'))

            #CNVKit will not run if number of reads is less than 2 million
            if total_reads < 2000000:

                skip_sample = True

            for ntc in self.ntc_patterns:

                if ntc in sample:

                    skip_sample = True
                    break

            if skip_sample == True:

                continue

            for marker in self.run_complete_markers:

                globbed_marker = results_path.joinpath(sample).glob(marker)

                if len(list(globbed_marker)) < 1:

                    return False

        return True
Beispiel #2
0
	def get_hs_metrics(self):

		results_path = Path(self.results_dir)

		run_hs_metrics_dict = {}

		for sample in self.sample_names:

			hs_metrics_file = results_path.glob(f'post_processing/results/metrics/*{sample}_hs_metrics.txt')

			hs_metrics_file = list(hs_metrics_file)[0]

			parsed_hs_metrics_data  = parsers.parse_hs_metrics_file(hs_metrics_file)

			run_hs_metrics_dict[sample] = parsed_hs_metrics_data

		return run_hs_metrics_dict
Beispiel #3
0
    def get_hs_metrics(self):

        results_path = Path(self.results_dir)

        run_hs_metrics_dict = {}

        for sample in self.sample_names:

            hs_metrics_file = results_path.joinpath(sample).glob(
                f'*{sample}*_HsMetrics.txt')

            hs_metrics_file = list(hs_metrics_file)[0]

            parsed_hs_metrics_data = parsers.parse_hs_metrics_file(
                hs_metrics_file)

            run_hs_metrics_dict[sample] = parsed_hs_metrics_data

        return run_hs_metrics_dict
Beispiel #4
0
    def run_is_valid(self):
        """
		Look for files which have to be present for a run level pipeline to have completed \
		correctly.

		Look for files which if present indicate the pipeline has not finished correctly e.g. intermediate files.
		"""

        results_path = Path(self.results_dir)

        for sample in self.sample_names:

            skip_sample = False

            #get the total number of reads in the sample
            hs_metrics_file = results_path.joinpath(sample).glob(
                f'*{sample}*_HsMetrics.txt')

            hs_metrics_file = list(hs_metrics_file)[0]

            parsed_hs_metrics_data = parsers.parse_hs_metrics_file(
                hs_metrics_file)
            total_reads = int(parsed_hs_metrics_data.get('total_reads'))

            #CNVKit will not run for the sample if number of reads is less than 2 million
            if total_reads < 2000000:

                skip_sample = True

            for ntc in self.ntc_patterns:

                if ntc in sample:

                    skip_sample = True
                    break

            if skip_sample == True:

                continue

            for file in self.run_sample_expected_files:

                found_file = results_path.joinpath(sample).glob(file)

                if len(list(found_file)) != 1:

                    return False

        for file in self.run_expected_files:

            found_file = results_path.glob(file)

            if len(list(found_file)) != 1:

                return False

        # check file we do not want to be there are not there
        for file in self.run_not_expected_files:

            found_file = results_path.glob(file)

            if len(list(found_file)) > 0:

                return False

        return True