Ejemplo n.º 1
0
    def test_get_peptide(self):
        r = Run([], {}, "run1", "file1.txt", filename="file1.csv", aligned_filename="file1.tsv")
        r.all_precursor_groups_ = dict( [ (str(i), MockPrecursorGroup(i)) for i in range(5) ]  )
        self.assertEqual( r.getPrecursorGroup("2").id_, 2) 
        self.assertIsNone( r.getPrecursorGroup("9_dummy"))

        ids = sorted([p.id_ for p in r])
        self.assertEqual( ids, range(5))
Ejemplo n.º 2
0
 def test_get_best_peaks(self):
     r = Run([], {},
             "run1",
             "file1.txt",
             filename="file1.csv",
             aligned_filename="file1.tsv")
     r.all_precursor_groups_ = dict([(str(i), MockPrecursorGroup(i))
                                     for i in range(5)])
     self.assertEqual(r.get_best_peaks(), ["42" for i in range(5)])
Ejemplo n.º 3
0
 def test_createRun(self):
     r = Run([], {},
             "run1",
             "file1.txt",
             filename="file1.csv",
             aligned_filename="file1.tsv")
     self.assertTrue(True)
     self.assertEqual(r.get_id(), "run1")
     self.assertEqual(r.get_openswath_filename(), "file1.csv")
     self.assertEqual(r.get_aligned_filename(), "file1.tsv")
Ejemplo n.º 4
0
    def test_get_peptide(self):
        r = Run([], {},
                "run1",
                "file1.txt",
                filename="file1.csv",
                aligned_filename="file1.tsv")
        r.all_precursor_groups_ = dict([(str(i), MockPrecursorGroup(i))
                                        for i in range(5)])
        self.assertEqual(r.getPrecursorGroup("2").id_, 2)
        self.assertIsNone(r.getPrecursorGroup("9_dummy"))

        ids = sorted([p.id_ for p in r])
        self.assertEqual(ids, list(range(5)))
 def test_MSfileRunMapping(self):
     from msproteomicstoolslib.data_structures.Run import Run
     filename = os.path.join(self.datadir_DIAlign, 'merged.osw')
     chromFile0 = os.path.join(
         self.datadir_DIAlign,
         'hroest_K120808_Strep10%PlasmaBiolRepl1_R03_SW_filt.chrom.mzML')
     chromFile2 = os.path.join(
         self.datadir_DIAlign,
         'hroest_K120809_Strep0%PlasmaBiolRepl2_R04_SW_filt.chrom.mzML')
     chromFiles = [chromFile0, chromFile2]
     run0 = Run(
         [], {},
         125704171604355508,
         filename,
         'data/raw/hroest_K120808_Strep10%PlasmaBiolRepl1_R03_SW_filt.mzML.gz',
         'data/raw/hroest_K120808_Strep10%PlasmaBiolRepl1_R03_SW_filt.mzML.gz',
         useCython=False)
     run1 = Run(
         [], {},
         6752973645981403097,
         filename,
         'data/raw/hroest_K120809_Strep0%PlasmaBiolRepl2_R04_SW_filt.mzML.gz',
         'data/raw/hroest_K120809_Strep0%PlasmaBiolRepl2_R04_SW_filt.mzML.gz',
         useCython=False)
     run2 = Run(
         [], {},
         2234664662238281994,
         filename,
         'data/raw/hroest_K120809_Strep10%PlasmaBiolRepl2_R04_SW_filt.mzML.gz',
         'data/raw/hroest_K120809_Strep10%PlasmaBiolRepl2_R04_SW_filt.mzML.gz',
         useCython=False)
     runs = [run0, run1, run2]
     MStoFeature = mapper.MSfileRunMapping(chromFiles, runs)
     self.assertEqual(
         MStoFeature[
             'data/raw/hroest_K120808_Strep10%PlasmaBiolRepl1_R03_SW_filt.mzML.gz']
         [0], chromFile0)
     self.assertEqual(
         MStoFeature[
             'data/raw/hroest_K120808_Strep10%PlasmaBiolRepl1_R03_SW_filt.mzML.gz']
         [1].get_id(), 125704171604355508)
     self.assertEqual(
         MStoFeature[
             'data/raw/hroest_K120809_Strep0%PlasmaBiolRepl2_R04_SW_filt.mzML.gz']
         [0], chromFile2)
     self.assertEqual(
         MStoFeature[
             'data/raw/hroest_K120809_Strep0%PlasmaBiolRepl2_R04_SW_filt.mzML.gz']
         [1].get_id(), 6752973645981403097)
 def setUp(self):
     self.dirname = os.path.dirname(os.path.abspath(__file__))
     self.topdir = os.path.join(os.path.join(self.dirname, ".."), "..")
     self.datadir = os.path.join(os.path.join(self.topdir, "test"), "data")
     self.datadir_DIAlign = os.path.join(self.datadir, "DIAlign") # Instance attribute
     filename = os.path.join(self.datadir_DIAlign, 'merged.osw')
     self.chromFile0 = os.path.join(self.datadir_DIAlign, 'hroest_K120808_Strep10%PlasmaBiolRepl1_R03_SW_filt.chrom.mzML')
     self.chromFile2 = os.path.join(self.datadir_DIAlign, 'hroest_K120809_Strep10%PlasmaBiolRepl2_R04_SW_filt.chrom.mzML')
     run0 = Run([], {}, 125704171604355508, filename, 'data/raw/hroest_K120808_Strep10%PlasmaBiolRepl1_R03_SW_filt.mzML.gz',
      'data/raw/hroest_K120808_Strep10%PlasmaBiolRepl1_R03_SW_filt.mzML.gz', useCython=False)
     run2 = Run([], {}, 2234664662238281994, filename, 'data/raw/hroest_K120809_Strep10%PlasmaBiolRepl2_R04_SW_filt.mzML.gz',
      'data/raw/hroest_K120809_Strep10%PlasmaBiolRepl2_R04_SW_filt.mzML.gz', useCython=False)
     self.runs = [run0, run2]
     self.MStoFeature = {'data/raw/hroest_K120808_Strep10%PlasmaBiolRepl1_R03_SW_filt.mzML.gz': (self.chromFile0, run0),
     'data/raw/hroest_K120809_Strep10%PlasmaBiolRepl2_R04_SW_filt.mzML.gz':(self.chromFile2, run2)}
Ejemplo n.º 7
0
 def test_updateRetentionTime(self):
     run = Run([], {},
               125704171604355508,
               'merged.osw',
               'file.mzML.gz',
               'file.mzML.gz',
               useCython=False)
     p = Precursor(self.trgr_id, run)
     run.addPrecursor(p, self.peptide_group_label)
     run.getPrecursor(self.peptide_group_label,
                      self.trgr_id).add_peakgroup_tpl(
                          (364283, 0.001, 1.47, 3000),
                          self.trgr_id,
                          cluster_id=-1)
     t_ref = np.array([
         np.nan, 21.1, 21.2, 21.3, 21.35, 21.4, 21.5, 21.55, 21.6, 21.7,
         21.8, np.nan
     ])
     t_eXp = np.array([
         np.nan, 1.1, 1.2, 1.3, 1.35, 1.4, 1.5, 1.55, 1.6, 1.7, 1.8, np.nan
     ])
     chromAlign.updateRetentionTime(run, self.peptide_group_label,
                                    self.trgr_id, t_ref, t_eXp)
     self.assertEqual(
         run.getPrecursor(self.peptide_group_label,
                          self.trgr_id).peakgroups_,
         [(364283, 0.001, 21.5, 3000, None)])
Ejemplo n.º 8
0
    def test_getMapping(self):
        filename = os.path.join(self.datadir_DIAlign, 'merged.osw')
        chromFile1 = os.path.join(
            self.datadir_DIAlign,
            'hroest_K120808_Strep10%PlasmaBiolRepl1_R03_SW_filt.chrom.mzML')
        chromFile2 = os.path.join(
            self.datadir_DIAlign,
            'hroest_K120809_Strep0%PlasmaBiolRepl2_R04_SW_filt.chrom.mzML')
        chromatogramFiles = [chromFile1, chromFile2]
        featureFiles = [filename]
        featureFiles_chromFiles_map = reader.getMapping(
            chromatogramFiles, featureFiles)
        run0 = Run(
            [], {},
            125704171604355508,
            filename,
            'data/raw/hroest_K120808_Strep10%PlasmaBiolRepl1_R03_SW_filt.mzML.gz',
            'data/raw/hroest_K120808_Strep10%PlasmaBiolRepl1_R03_SW_filt.mzML.gz',
            useCython=False)
        run1 = Run(
            [], {},
            6752973645981403097,
            filename,
            'data/raw/hroest_K120809_Strep0%PlasmaBiolRepl2_R04_SW_filt.mzML.gz',
            'data/raw/hroest_K120809_Strep0%PlasmaBiolRepl2_R04_SW_filt.mzML.gz',
            useCython=False)

        # featureFiles_chromFiles_map = {filename : [run0, run1]}
        self.assertIsInstance(featureFiles_chromFiles_map[filename][0], Run)
        self.assertEqual(
            featureFiles_chromFiles_map[filename][0].get_openswath_filename(),
            "data/raw/hroest_K120808_Strep10%PlasmaBiolRepl1_R03_SW_filt.mzML.gz"
        )
        self.assertEqual(featureFiles_chromFiles_map[filename][0].get_id(),
                         125704171604355508)
        self.assertEqual(
            featureFiles_chromFiles_map[filename][1].get_original_filename(),
            filename)
        self.assertEqual(
            featureFiles_chromFiles_map[filename][1].get_aligned_filename(),
            "data/raw/hroest_K120809_Strep0%PlasmaBiolRepl2_R04_SW_filt.mzML.gz"
        )
Ejemplo n.º 9
0
    def test_getRunfromFeatureFile(self):
        filename = os.path.join(self.datadir_DIAlign, 'merged.osw')
        run0 = Run(
            [], {},
            125704171604355508,
            filename,
            'data/raw/hroest_K120808_Strep10%PlasmaBiolRepl1_R03_SW_filt.mzML.gz',
            'data/raw/hroest_K120808_Strep10%PlasmaBiolRepl1_R03_SW_filt.mzML.gz',
            useCython=False)
        run1 = Run(
            [], {},
            6752973645981403097,
            filename,
            'data/raw/hroest_K120809_Strep0%PlasmaBiolRepl2_R04_SW_filt.mzML.gz',
            'data/raw/hroest_K120809_Strep0%PlasmaBiolRepl2_R04_SW_filt.mzML.gz',
            useCython=False)
        run2 = Run(
            [], {},
            2234664662238281994,
            filename,
            'data/raw/hroest_K120809_Strep10%PlasmaBiolRepl2_R04_SW_filt.mzML.gz',
            'data/raw/hroest_K120809_Strep10%PlasmaBiolRepl2_R04_SW_filt.mzML.gz',
            useCython=False)
        fileMapping = reader.getRunfromFeatureFile([filename])

        # fileMapping = {filename : [run0, run1, run2]}
        self.assertIsInstance(fileMapping[filename][0], Run)
        self.assertEqual(
            fileMapping[filename][0].get_openswath_filename(),
            'data/raw/hroest_K120808_Strep10%PlasmaBiolRepl1_R03_SW_filt.mzML.gz'
        )
        self.assertEqual(fileMapping[filename][1].get_id(),
                         6752973645981403097)
        self.assertEqual(fileMapping[filename][2].get_original_filename(),
                         filename)
        self.assertEqual(
            fileMapping[filename][2].get_aligned_filename(),
            'data/raw/hroest_K120809_Strep10%PlasmaBiolRepl2_R04_SW_filt.mzML.gz'
        )
Ejemplo n.º 10
0
    def parse_file(self, filename, runs, useCython):
        """
        Parse a whole OSW file (which may contain data from multiple runs)
        """

        import sqlite3
        conn = sqlite3.connect(filename)
        c = conn.cursor()

        # Retrieve and then iterate over all available runs
        query = """SELECT ID, FILENAME FROM RUN"""
        res = [row for row in c.execute(query)]
        nrows = 0
        for row in res:
            runid = row[0]
            current_run = Run([], {},
                              runid,
                              filename,
                              row[1],
                              row[1],
                              useCython=useCython)
            runs.append(current_run)
            nrows += self._parse_file(filename, current_run, runid, conn)
        return nrows
def getRunfromFeatureFile(featureFiles, useCython=False):
    """
    Return as dictionary with key as feature file and value as associated Run objects.

    >>> featureFiles = ["merged.osw"]
    >>> fileMapping = getRunfromFeatureFile(featureFiles)
    >>> fileMapping = {"merged.osw": [Run0, Run1, Run2]}
    """

    import sqlite3
    MSfile_featureFile_mapping = {}
    for filename in featureFiles:
        MSfile_featureFile_mapping[filename] = []
        conn = sqlite3.connect(filename)
        c = conn.cursor()
        try:
            # Retrieve and then iterate over all available runs
            query = """SELECT ID, FILENAME FROM RUN"""
            results = [row for row in c.execute(query)]
            for (run_id, MS_file) in results:
                current_run = Run([], {},
                                  run_id,
                                  filename,
                                  MS_file,
                                  MS_file,
                                  useCython=useCython)
                MSfile_featureFile_mapping[filename].append(current_run)

        except sqlite3.Error as e:
            print("An error occured in reading file " + str(filename) + ", ",
                  e.args[0])
            conn.close()

        # Close the connection
        conn.close()
    return MSfile_featureFile_mapping
    def parse_files(self, read_exp_RT=True, verbosity=10):
      """Parse the input file(s) (CSV).

      Args:
          read_exp_RT(bool) : to read the real, experimental retention time
              (default behavior) or the delta iRT should be used instead.
      
      Returns:
          runs(list(SWATHScoringReader.Run))

      A single CSV file might contain more than one run and thus to create
      unique run ids, we number the runs as xx_yy where xx is the current file
      number and yy is the run found in the current file. However, if an
      alignment has already been performed and each run has already obtained a
      unique run id, we can directly use the previous alignment id.
      """

      print "Parsing input files"
      from sys import stdout
      import csv
      skipped = 0; read = 0
      runs = []
      for file_nr, f in enumerate(self.infiles):
        if verbosity >= 10:
            stdout.write("\rReading %s" % str(f))
            stdout.flush()
        header_dict = {}
        if f.endswith('.gz'):
            import gzip 
            filehandler = gzip.open(f,'rb')
        else:
            filehandler = open(f)
        reader = csv.reader(filehandler, delimiter="\t")
        header = reader.next()
        for i,n in enumerate(header):
          header_dict[n] = i
        if verbosity >= 10:
            stdout.write("\rReading file %s" % (str(f)) )
            stdout.flush()

        # Check if runs are already aligned (only one input file and correct header)
        already_aligned = (len(self.infiles) == 1 and header_dict.has_key(self.aligned_run_id_name))

        for this_row in reader:
            if already_aligned:
                runid = this_row[header_dict[self.aligned_run_id_name]]
            else:
                runnr = this_row[header_dict[self.run_id_name]]
                runid = runnr + "_" + str(file_nr)

            current_run = [r for r in runs if r.get_id() == runid]
            # check if we have a new run
            if len(current_run) == 0:
                orig_fname = None
                aligned_fname = None
                if header_dict.has_key("align_origfilename"):
                    aligned_fname = this_row[header_dict[ "align_origfilename"] ]
                if header_dict.has_key("filename"):
                    orig_fname = this_row[header_dict[ "filename"] ]
                current_run = Run(header, header_dict, runid, f, orig_fname, aligned_fname)
                runs.append(current_run)
                print current_run, "maps to ", orig_fname
            else: 
                assert len(current_run) == 1
                current_run = current_run[0]

            if not self.readfilter(this_row, current_run.header_dict):
                skipped += 1
                continue

            read += 1
            # Unfortunately, since we are using csv, tell() will not work...
            # print "parse row at", filehandler.tell()
            self.parse_row(current_run, this_row, read_exp_RT)

      # Here we check that each run indeed has a unique id
      assert len(set([r.get_id() for r in runs])) == len(runs) # each run has a unique id
      if verbosity >= 10: stdout.write("\r\r\n") # clean up
      print "Found %s runs, read %s lines and skipped %s lines" % (len(runs), read, skipped)
      return runs
Ejemplo n.º 13
0
 def test_get_best_peaks(self):
     r = Run([], {}, "run1", "file1.txt", filename="file1.csv", aligned_filename="file1.tsv")
     r.all_precursor_groups_ = dict( [ (str(i), MockPrecursorGroup(i)) for i in range(5) ]  )
     self.assertEqual( r.get_best_peaks(), ["42" for i in range(5)] )
Ejemplo n.º 14
0
 def test_createRun(self):
     r = Run([], {}, "run1", "file1.txt", filename="file1.csv", aligned_filename="file1.tsv")
     self.assertTrue(True)
     self.assertEqual(r.get_id(), "run1")
     self.assertEqual(r.get_openswath_filename(), "file1.csv")
     self.assertEqual(r.get_aligned_filename(), "file1.tsv")