예제 #1
0
    def loadMixedFiles(self, rawdata_files, aligned_pg_files, fileType):
        """ Load files that contain raw data files and aligned peakgroup files.

        Since no mapping is present here, we need to infer it from the data.
        Basically, we try to map the column align_runid to the filenames of the
        input .chrom.mzML hoping that the user did not change the filenames.

        Parameters
        ----------
        rawdata_files : list of str
            List of paths to chrom.mzML files
        aligned_pg_files : list of str
            List of paths to output files of the FeatureAligner
        fileType : str
            Description of the type of file the metadata file (valid: simple, traml, openswath)
        """

        print "Input contained no mapping of run_id to the chromatograms."
        print "Try to infer mapping for filetype %s - if this fails, please provide a yaml input." % fileType

        precursors_mapping = {}
        sequences_mapping = {}
        protein_mapping = {}
        mapping = {}
        inferMapping(rawdata_files,
                     aligned_pg_files,
                     mapping,
                     precursors_mapping,
                     sequences_mapping,
                     protein_mapping,
                     fileType=fileType)
        print "Found the following mapping: mapping", mapping

        # Read the chromatograms
        swathfiles = SwathRunCollection()
        if fileType == "sqmass":
            swathfiles.initialize_from_sql_map(mapping, rawdata_files,
                                               precursors_mapping,
                                               sequences_mapping,
                                               protein_mapping)
        elif self.only_show_quantified:
            swathfiles.initialize_from_chromatograms(mapping,
                                                     precursors_mapping,
                                                     sequences_mapping,
                                                     protein_mapping)
        else:
            swathfiles.initialize_from_chromatograms(mapping)
        self.runs = [run for run in swathfiles.getSwathFiles()]

        if not fileType in ["simple", "traml"]:
            self._read_peakgroup_files(aligned_pg_files, swathfiles)

        print "Find in total a collection of %s runs." % len(
            swathfiles.getRunIds())
예제 #2
0
    def loadFiles(self, filenames):
        """
        Load a set of chromatogram files (no peakgroup information).

        Args:
            filenames(list of str): List of filepaths containing the chromatograms
        """

        swathfiles = SwathRunCollection()
        swathfiles.initialize_from_files(filenames)
        self.runs = [run for run in swathfiles.getSwathFiles()]
예제 #3
0
    def _loadFiles_with_peakgroups(self, RawData, aligned_pg_files):

        # Read the chromatograms
        swathfiles = SwathRunCollection()
        try:
            swathfiles.initialize_from_directories( dict( [ (d["id"], d["directory"]) for d in RawData] ) )
        except KeyError:
            swathfiles.initialize_from_chromatograms( dict( [ (d["id"], d["chromatograms"]) for d in RawData] ) )
        self.runs = [run for run in swathfiles.getSwathFiles()]
        print "Find in total a collection of %s runs." % len(swathfiles.getRunIds() )

        try:
            self._read_trafo(RawData)
        except IOError:
            self._read_peakgroup_files(aligned_pg_files, swathfiles)
예제 #4
0
    def loadSqMassFiles(self, filenames):

        # Read the chromatograms
        swathfiles = SwathRunCollection()
        swathfiles.initialize_from_sql(filenames)
        self.runs = [run for run in swathfiles.getSwathFiles()]