Beispiel #1
0
    def __init__(self, base_dir):
        self._dir = base_dir
        self._configuration = IlluminaConfiguration(base_dir)
        self._metrics_path = os.path.join(base_dir, "Data", "reports",
                                          "Summary")
        assert os.path.exists(
            self._metrics_path
        ), "The RTA QC metrics folder %s does not exist" % self._metrics_path

        # Assert that the readN.xml qc metrics files exist
        self._metric_files = []
        for read in self._configuration.reads().keys():
            qc_file = os.path.join(self._metrics_path, "read%s.xml" % read)
            assert os.path.exists(
                qc_file), "The RTA QC metrics file %s does not exist" % qc_file
            self._metric_files.append(qc_file)

        # Parse the XML files
        self.readSummaries()
Beispiel #2
0
 def __init__(self, base_dir):
     self._dir = base_dir
     self._configuration = IlluminaConfiguration(base_dir)
     self._metrics_path = os.path.join(base_dir,"Data","reports","Summary")
     assert os.path.exists(self._metrics_path), "The RTA QC metrics folder %s does not exist" % self._metrics_path
     
     # Assert that the readN.xml qc metrics files exist
     self._metric_files = []
     for read in self._configuration.reads().keys():
         qc_file = os.path.join(self._metrics_path,"read%s.xml" % read)
         assert os.path.exists(qc_file), "The RTA QC metrics file %s does not exist" % qc_file
         self._metric_files.append(qc_file)
     
     # Parse the XML files
     self.readSummaries()
Beispiel #3
0
class RTAQCMetrics:
    
    def __init__(self, base_dir):
        self._dir = base_dir
        self._configuration = IlluminaConfiguration(base_dir)
        self._metrics_path = os.path.join(base_dir,"Data","reports","Summary")
        assert os.path.exists(self._metrics_path), "The RTA QC metrics folder %s does not exist" % self._metrics_path
        
        # Assert that the readN.xml qc metrics files exist
        self._metric_files = []
        for read in self._configuration.reads().keys():
            qc_file = os.path.join(self._metrics_path,"read%s.xml" % read)
            assert os.path.exists(qc_file), "The RTA QC metrics file %s does not exist" % qc_file
            self._metric_files.append(qc_file)
        
        # Parse the XML files
        self.readSummaries()
         
    @staticmethod
    def metrics():
        return [
                ['error_rate', 'ErrRatePhiX', False],
                ['error_rate_sd', 'ErrRatePhiXSD', False],
                ['raw_cluster_dens', 'ClustersRaw', True],
                ['raw_cluster_dens_sd', 'ClustersRawSD', True],
                ['prc_cluster_pf', 'PrcPFClusters', False],
                ['prc_cluster_pf_sd', 'PrcPFClustersSD', False],
                ['pf_cluster_dens', 'ClustersPF', True],
                ['pf_cluster_dens_sd', 'ClustersPFSD', True],
                ['phasing', 'Phasing', False],
                ['prephasing', 'Prephasing', False],
                ['prc_aligned', 'PrcAlign', False],
                ['prc_aligned_sd', 'PrcAlignSD', False]
            ]
    
    def configuration(self):
        return self._configuration
    
    # getQCstats() is probably the method you usually want to call
    def getQCstats(self):
        qc_stats = {}
        for metric in self.metrics():
            qc_stats[metric[0]] = self.getAllLaneMetrics(metric[1],metric[2])
        return qc_stats

    def readSummaries(self):
        self._qc_roots = {}
        for qc_file in self._metric_files:
            tree = ET.parse(qc_file)
            root = tree.getroot()
            if root is not None:
                read = root.get("Read","0")
                self._qc_roots[read] = root

    def getAllSingleLaneMetric(self, metric, lane, clu_dens = False):
        metrics = {}
        for read, root in self._qc_roots.items():
            metrics["read%s" % read] = self.getSingleLaneMetric(root, metric, lane, clu_dens)
        return metrics

    def getSingleLaneMetric(self, root, metric, lane, clu_dens = False):
        m = self.getLaneMetric(root, metric, clu_dens, lane)
        return m[lane]
    
    def getAllLaneMetrics(self, metric, clu_dens):
        metrics = {}
        for read, root in self._qc_roots.items():
            metrics["read%s" % read] = self.getLaneMetric(root, metric, clu_dens)
        return metrics

    def getLaneMetric(self, root, metric, clu_dens, lane=None):
        if clu_dens: densRatio = float(root.get("densityRatio"))
        lanes = root.findall("Lane")    
        m = {}
        for l in lanes:
            k = l.get("key")
            if lane is not None and k != str(lane):
                continue
            val = float(l.get(metric))
            m[k] = val
            if clu_dens: 
                m[k] = str(int(round((densRatio * val)/1000))) + 'K'
        return m
Beispiel #4
0
class RTAQCMetrics:
    def __init__(self, base_dir):
        self._dir = base_dir
        self._configuration = IlluminaConfiguration(base_dir)
        self._metrics_path = os.path.join(base_dir, "Data", "reports",
                                          "Summary")
        assert os.path.exists(
            self._metrics_path
        ), "The RTA QC metrics folder %s does not exist" % self._metrics_path

        # Assert that the readN.xml qc metrics files exist
        self._metric_files = []
        for read in self._configuration.reads().keys():
            qc_file = os.path.join(self._metrics_path, "read%s.xml" % read)
            assert os.path.exists(
                qc_file), "The RTA QC metrics file %s does not exist" % qc_file
            self._metric_files.append(qc_file)

        # Parse the XML files
        self.readSummaries()

    @staticmethod
    def metrics():
        return [['error_rate', 'ErrRatePhiX', False],
                ['error_rate_sd', 'ErrRatePhiXSD', False],
                ['raw_cluster_dens', 'ClustersRaw', True],
                ['raw_cluster_dens_sd', 'ClustersRawSD', True],
                ['prc_cluster_pf', 'PrcPFClusters', False],
                ['prc_cluster_pf_sd', 'PrcPFClustersSD', False],
                ['pf_cluster_dens', 'ClustersPF', True],
                ['pf_cluster_dens_sd', 'ClustersPFSD', True],
                ['phasing', 'Phasing', False],
                ['prephasing', 'Prephasing', False],
                ['prc_aligned', 'PrcAlign', False],
                ['prc_aligned_sd', 'PrcAlignSD', False]]

    def configuration(self):
        return self._configuration

    # getQCstats() is probably the method you usually want to call
    def getQCstats(self):
        qc_stats = {}
        for metric in self.metrics():
            qc_stats[metric[0]] = self.getAllLaneMetrics(metric[1], metric[2])
        return qc_stats

    def readSummaries(self):
        self._qc_roots = {}
        for qc_file in self._metric_files:
            tree = ET.parse(qc_file)
            root = tree.getroot()
            if root is not None:
                read = root.get("Read", "0")
                self._qc_roots[read] = root

    def getAllSingleLaneMetric(self, metric, lane, clu_dens=False):
        metrics = {}
        for read, root in self._qc_roots.items():
            metrics["read%s" % read] = self.getSingleLaneMetric(
                root, metric, lane, clu_dens)
        return metrics

    def getSingleLaneMetric(self, root, metric, lane, clu_dens=False):
        m = self.getLaneMetric(root, metric, clu_dens, lane)
        return m[lane]

    def getAllLaneMetrics(self, metric, clu_dens):
        metrics = {}
        for read, root in self._qc_roots.items():
            metrics["read%s" % read] = self.getLaneMetric(
                root, metric, clu_dens)
        return metrics

    def getLaneMetric(self, root, metric, clu_dens, lane=None):
        if clu_dens: densRatio = float(root.get("densityRatio"))
        lanes = root.findall("Lane")
        m = {}
        for l in lanes:
            k = l.get("key")
            if lane is not None and k != str(lane):
                continue
            val = float(l.get(metric))
            m[k] = val
            if clu_dens:
                m[k] = str(int(round((densRatio * val) / 1000))) + 'K'
        return m